CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Software/BinManager.h
Views: 1401
1
// Copyright (c) 2022- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#pragma once
19
20
#include <atomic>
21
#include <unordered_map>
22
#include "GPU/Software/Rasterizer.h"
23
24
struct BinWaitable;
25
class DrawBinItemsTask;
26
27
enum class BinItemType : uint8_t {
28
TRIANGLE,
29
CLEAR_RECT,
30
RECT,
31
SPRITE,
32
LINE,
33
POINT,
34
};
35
36
struct BinCoords {
37
int x1;
38
int y1;
39
int x2;
40
int y2;
41
42
bool Invalid() const {
43
return x2 < x1 || y2 < y1;
44
}
45
46
BinCoords Intersect(const BinCoords &range) const;
47
};
48
49
struct BinItem {
50
BinItemType type;
51
uint16_t stateIndex;
52
BinCoords range;
53
VertexData v0;
54
VertexData v1;
55
VertexData v2;
56
};
57
58
template <typename T, size_t N>
59
struct BinQueue {
60
BinQueue() {
61
Reset();
62
}
63
~BinQueue() {
64
FreeAlignedMemory(items_);
65
}
66
67
void Setup() {
68
items_ = (T *)AllocateAlignedMemory(sizeof_, 16);
69
}
70
71
void Reset() {
72
head_ = 0;
73
tail_ = 0;
74
size_ = 0;
75
}
76
77
size_t Push(const T &item) {
78
size_t i = tail_++;
79
if (i + 1 == N)
80
tail_ -= N;
81
items_[i] = item;
82
size_++;
83
return i;
84
}
85
86
T Pop() {
87
size_t i = head_++;
88
if (i + 1 == N)
89
head_ -= N;
90
T item = items_[i];
91
size_--;
92
return item;
93
}
94
95
// Only safe if you're the only one reading.
96
T &PeekNext() {
97
return items_[head_];
98
}
99
100
void SkipNext() {
101
size_t i = head_++;
102
if (i + 1 == N)
103
head_ -= N;
104
size_--;
105
}
106
107
// Only safe if you're the only one reading.
108
const T &Peek(size_t offset) const {
109
size_t i = head_ + offset;
110
if (i >= N)
111
i -= N;
112
return items_[i];
113
}
114
115
// Only safe if you're the only one writing.
116
T &PeekPush() {
117
return items_[tail_];
118
}
119
120
size_t PushPeeked() {
121
size_t i = tail_++;
122
if (i + 1 == N)
123
tail_ -= N;
124
size_++;
125
return i;
126
}
127
128
size_t Size() const {
129
return size_;
130
}
131
132
bool Full() const {
133
return size_ == N - 1;
134
}
135
136
bool NearFull() const {
137
return size_ >= N - 2;
138
}
139
140
bool Empty() const {
141
return size_ == 0;
142
}
143
144
T &operator[](size_t index) {
145
return items_[index];
146
}
147
148
const T &operator[](size_t index) const {
149
return items_[index];
150
}
151
152
T *items_ = nullptr;
153
std::atomic<size_t> head_;
154
std::atomic<size_t> tail_ ;
155
std::atomic<size_t> size_;
156
static constexpr size_t sizeof_ = sizeof(T) * N;
157
};
158
159
union BinClut {
160
uint8_t readable[1024];
161
};
162
163
struct BinTaskList {
164
// We shouldn't ever need more than two at once, since we use an atomic to run one at a time.
165
// A second could run due to overlap during teardown.
166
static constexpr int N = 2;
167
168
DrawBinItemsTask *tasks[N]{};
169
int count = 0;
170
171
DrawBinItemsTask *Next() {
172
return tasks[count % N];
173
}
174
};
175
176
struct BinDirtyRange {
177
uint32_t base;
178
uint32_t strideBytes;
179
uint32_t widthBytes;
180
uint32_t height;
181
182
void Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, const DrawingCoords &tl, const DrawingCoords &br);
183
};
184
185
class BinManager {
186
public:
187
BinManager();
188
~BinManager();
189
190
void UpdateState();
191
void UpdateClut(const void *src);
192
193
const Rasterizer::RasterizerState &State() {
194
return states_[stateIndex_];
195
}
196
197
void AddTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2);
198
void AddClearRect(const VertexData &v0, const VertexData &v1);
199
void AddRect(const VertexData &v0, const VertexData &v1);
200
void AddSprite(const VertexData &v0, const VertexData &v1);
201
void AddLine(const VertexData &v0, const VertexData &v1);
202
void AddPoint(const VertexData &v0);
203
204
void Drain(bool flushing = false);
205
void Flush(const char *reason);
206
bool HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
207
// Assumes you've also checked for a write (writes are partial so are automatically reads.)
208
bool HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h);
209
210
void GetStats(char *buffer, size_t bufsize);
211
void ResetStats();
212
213
void SetDirty(SoftDirty flags) {
214
dirty_ |= flags;
215
}
216
void ClearDirty(SoftDirty flags) {
217
dirty_ &= ~flags;
218
}
219
SoftDirty GetDirty() {
220
return dirty_;
221
}
222
bool HasDirty(SoftDirty flags) {
223
return dirty_ & flags;
224
}
225
226
protected:
227
#if PPSSPP_ARCH(32BIT)
228
// Use less memory and less address space. We're unlikely to have 32 cores on a 32-bit CPU.
229
static constexpr int MAX_POSSIBLE_TASKS = 16;
230
#else
231
static constexpr int MAX_POSSIBLE_TASKS = 64;
232
#endif
233
// This is about 1MB of state data.
234
static constexpr int QUEUED_STATES = 4096;
235
// These are 1KB each, so half an MB.
236
static constexpr int QUEUED_CLUTS = 512;
237
// About 360 KB, but we have usually 16 or less of them, so 5 MB - 22 MB.
238
static constexpr int QUEUED_PRIMS = 2048;
239
240
typedef BinQueue<Rasterizer::RasterizerState, QUEUED_STATES> BinStateQueue;
241
typedef BinQueue<BinClut, QUEUED_CLUTS> BinClutQueue;
242
typedef BinQueue<BinItem, QUEUED_PRIMS> BinItemQueue;
243
244
private:
245
BinStateQueue states_;
246
BinClutQueue cluts_;
247
uint16_t stateIndex_;
248
uint16_t clutIndex_;
249
BinCoords scissor_;
250
BinItemQueue queue_;
251
BinCoords queueRange_;
252
SoftDirty dirty_ = SoftDirty::NONE;
253
254
int maxTasks_ = 1;
255
bool tasksSplit_ = false;
256
std::vector<BinCoords> taskRanges_;
257
BinItemQueue taskQueues_[MAX_POSSIBLE_TASKS];
258
BinTaskList taskLists_[MAX_POSSIBLE_TASKS];
259
std::atomic<bool> taskStatus_[MAX_POSSIBLE_TASKS];
260
BinWaitable *waitable_ = nullptr;
261
262
BinDirtyRange pendingWrites_[2]{};
263
std::unordered_map<uint32_t, BinDirtyRange> pendingReads_;
264
265
bool pendingOverlap_ = false;
266
bool creatingState_ = false;
267
uint16_t pendingStateIndex_ = 0;
268
269
std::unordered_map<const char *, double> flushReasonTimes_;
270
std::unordered_map<const char *, double> lastFlushReasonTimes_;
271
const char *slowestFlushReason_ = nullptr;
272
double slowestFlushTime_ = 0.0;
273
int lastFlipstats_ = 0;
274
int enqueues_ = 0;
275
int mostThreads_ = 0;
276
277
void MarkPendingReads(const Rasterizer::RasterizerState &state);
278
void MarkPendingWrites(const Rasterizer::RasterizerState &state);
279
bool HasTextureWrite(const Rasterizer::RasterizerState &state);
280
static bool IsExactSelfRender(const Rasterizer::RasterizerState &state, const BinItem &item);
281
void OptimizePendingStates(uint16_t first, uint16_t last);
282
BinCoords Scissor(BinCoords range);
283
BinCoords Range(const VertexData &v0, const VertexData &v1, const VertexData &v2);
284
BinCoords Range(const VertexData &v0, const VertexData &v1);
285
BinCoords Range(const VertexData &v0);
286
void Expand(const BinCoords &range);
287
288
friend class DrawBinItemsTask;
289
};
290
291