CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Software/BinManager.h
Views: 1401
// Copyright (c) 2022- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#pragma once1819#include <atomic>20#include <unordered_map>21#include "GPU/Software/Rasterizer.h"2223struct BinWaitable;24class DrawBinItemsTask;2526enum class BinItemType : uint8_t {27TRIANGLE,28CLEAR_RECT,29RECT,30SPRITE,31LINE,32POINT,33};3435struct BinCoords {36int x1;37int y1;38int x2;39int y2;4041bool Invalid() const {42return x2 < x1 || y2 < y1;43}4445BinCoords Intersect(const BinCoords &range) const;46};4748struct BinItem {49BinItemType type;50uint16_t stateIndex;51BinCoords range;52VertexData v0;53VertexData v1;54VertexData v2;55};5657template <typename T, size_t N>58struct BinQueue {59BinQueue() {60Reset();61}62~BinQueue() {63FreeAlignedMemory(items_);64}6566void Setup() {67items_ = (T *)AllocateAlignedMemory(sizeof_, 16);68}6970void Reset() {71head_ = 0;72tail_ = 0;73size_ = 0;74}7576size_t Push(const T &item) {77size_t i = tail_++;78if (i + 1 == N)79tail_ -= N;80items_[i] = item;81size_++;82return i;83}8485T Pop() {86size_t i = head_++;87if (i + 1 == N)88head_ -= N;89T item = items_[i];90size_--;91return item;92}9394// Only safe if you're the only one reading.95T &PeekNext() {96return items_[head_];97}9899void SkipNext() {100size_t i = head_++;101if (i + 1 == N)102head_ -= N;103size_--;104}105106// Only safe if you're the only one reading.107const T &Peek(size_t offset) const {108size_t i = head_ + offset;109if (i >= N)110i -= N;111return items_[i];112}113114// Only safe if you're the only one writing.115T &PeekPush() {116return items_[tail_];117}118119size_t PushPeeked() {120size_t i = tail_++;121if (i + 1 == N)122tail_ -= N;123size_++;124return i;125}126127size_t Size() const {128return size_;129}130131bool Full() const {132return size_ == N - 1;133}134135bool NearFull() const {136return size_ >= N - 2;137}138139bool Empty() const {140return size_ == 0;141}142143T &operator[](size_t index) {144return items_[index];145}146147const T &operator[](size_t index) const {148return items_[index];149}150151T *items_ = nullptr;152std::atomic<size_t> head_;153std::atomic<size_t> tail_ ;154std::atomic<size_t> size_;155static constexpr size_t sizeof_ = sizeof(T) * N;156};157158union BinClut {159uint8_t readable[1024];160};161162struct BinTaskList {163// We shouldn't ever need more than two at once, since we use an atomic to run one at a time.164// A second could run due to overlap during teardown.165static constexpr int N = 2;166167DrawBinItemsTask *tasks[N]{};168int count = 0;169170DrawBinItemsTask *Next() {171return tasks[count % N];172}173};174175struct BinDirtyRange {176uint32_t base;177uint32_t strideBytes;178uint32_t widthBytes;179uint32_t height;180181void Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, const DrawingCoords &tl, const DrawingCoords &br);182};183184class BinManager {185public:186BinManager();187~BinManager();188189void UpdateState();190void UpdateClut(const void *src);191192const Rasterizer::RasterizerState &State() {193return states_[stateIndex_];194}195196void AddTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2);197void AddClearRect(const VertexData &v0, const VertexData &v1);198void AddRect(const VertexData &v0, const VertexData &v1);199void AddSprite(const VertexData &v0, const VertexData &v1);200void AddLine(const VertexData &v0, const VertexData &v1);201void AddPoint(const VertexData &v0);202203void Drain(bool flushing = false);204void Flush(const char *reason);205bool HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);206// Assumes you've also checked for a write (writes are partial so are automatically reads.)207bool HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);208209void GetStats(char *buffer, size_t bufsize);210void ResetStats();211212void SetDirty(SoftDirty flags) {213dirty_ |= flags;214}215void ClearDirty(SoftDirty flags) {216dirty_ &= ~flags;217}218SoftDirty GetDirty() {219return dirty_;220}221bool HasDirty(SoftDirty flags) {222return dirty_ & flags;223}224225protected:226#if PPSSPP_ARCH(32BIT)227// Use less memory and less address space. We're unlikely to have 32 cores on a 32-bit CPU.228static constexpr int MAX_POSSIBLE_TASKS = 16;229#else230static constexpr int MAX_POSSIBLE_TASKS = 64;231#endif232// This is about 1MB of state data.233static constexpr int QUEUED_STATES = 4096;234// These are 1KB each, so half an MB.235static constexpr int QUEUED_CLUTS = 512;236// About 360 KB, but we have usually 16 or less of them, so 5 MB - 22 MB.237static constexpr int QUEUED_PRIMS = 2048;238239typedef BinQueue<Rasterizer::RasterizerState, QUEUED_STATES> BinStateQueue;240typedef BinQueue<BinClut, QUEUED_CLUTS> BinClutQueue;241typedef BinQueue<BinItem, QUEUED_PRIMS> BinItemQueue;242243private:244BinStateQueue states_;245BinClutQueue cluts_;246uint16_t stateIndex_;247uint16_t clutIndex_;248BinCoords scissor_;249BinItemQueue queue_;250BinCoords queueRange_;251SoftDirty dirty_ = SoftDirty::NONE;252253int maxTasks_ = 1;254bool tasksSplit_ = false;255std::vector<BinCoords> taskRanges_;256BinItemQueue taskQueues_[MAX_POSSIBLE_TASKS];257BinTaskList taskLists_[MAX_POSSIBLE_TASKS];258std::atomic<bool> taskStatus_[MAX_POSSIBLE_TASKS];259BinWaitable *waitable_ = nullptr;260261BinDirtyRange pendingWrites_[2]{};262std::unordered_map<uint32_t, BinDirtyRange> pendingReads_;263264bool pendingOverlap_ = false;265bool creatingState_ = false;266uint16_t pendingStateIndex_ = 0;267268std::unordered_map<const char *, double> flushReasonTimes_;269std::unordered_map<const char *, double> lastFlushReasonTimes_;270const char *slowestFlushReason_ = nullptr;271double slowestFlushTime_ = 0.0;272int lastFlipstats_ = 0;273int enqueues_ = 0;274int mostThreads_ = 0;275276void MarkPendingReads(const Rasterizer::RasterizerState &state);277void MarkPendingWrites(const Rasterizer::RasterizerState &state);278bool HasTextureWrite(const Rasterizer::RasterizerState &state);279static bool IsExactSelfRender(const Rasterizer::RasterizerState &state, const BinItem &item);280void OptimizePendingStates(uint16_t first, uint16_t last);281BinCoords Scissor(BinCoords range);282BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);283BinCoords Range(const VertexData &v0, const VertexData &v1);284BinCoords Range(const VertexData &v0);285void Expand(const BinCoords &range);286287friend class DrawBinItemsTask;288};289290291