CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Software/BinManager.cpp
Views: 1401
1
// Copyright (c) 2022- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <atomic>
19
#include <condition_variable>
20
#include <mutex>
21
#include "Common/Profiler/Profiler.h"
22
#include "Common/Thread/ThreadManager.h"
23
#include "Common/TimeUtil.h"
24
#include "Core/System.h"
25
#include "GPU/Common/TextureDecoder.h"
26
#include "GPU/Software/BinManager.h"
27
#include "GPU/Software/Rasterizer.h"
28
#include "GPU/Software/RasterizerRectangle.h"
29
30
// Sometimes useful for debugging.
31
static constexpr bool FORCE_SINGLE_THREAD = false;
32
33
using namespace Rasterizer;
34
35
struct BinWaitable : public Waitable {
36
public:
37
BinWaitable() {
38
count_ = 0;
39
}
40
41
void Fill() {
42
count_++;
43
}
44
45
bool Empty() {
46
return count_ == 0;
47
}
48
49
void Drain() {
50
int result = --count_;
51
if (result == 0) {
52
// We were the last one to increment.
53
std::unique_lock<std::mutex> lock(mutex_);
54
cond_.notify_all();
55
}
56
}
57
58
void Wait() override {
59
std::unique_lock<std::mutex> lock(mutex_);
60
while (count_ != 0) {
61
cond_.wait(lock);
62
}
63
}
64
65
std::atomic<int> count_;
66
std::mutex mutex_;
67
std::condition_variable cond_;
68
};
69
70
static inline void DrawBinItem(const BinItem &item, const RasterizerState &state) {
71
switch (item.type) {
72
case BinItemType::TRIANGLE:
73
DrawTriangle(item.v0, item.v1, item.v2, item.range, state);
74
break;
75
76
case BinItemType::CLEAR_RECT:
77
ClearRectangle(item.v0, item.v1, item.range, state);
78
break;
79
80
case BinItemType::RECT:
81
DrawRectangle(item.v0, item.v1, item.range, state);
82
break;
83
84
case BinItemType::SPRITE:
85
DrawSprite(item.v0, item.v1, item.range, state);
86
break;
87
88
case BinItemType::LINE:
89
DrawLine(item.v0, item.v1, item.range, state);
90
break;
91
92
case BinItemType::POINT:
93
DrawPoint(item.v0, item.range, state);
94
break;
95
}
96
}
97
98
class DrawBinItemsTask : public Task {
99
public:
100
DrawBinItemsTask(BinWaitable *notify, BinManager::BinItemQueue &items, std::atomic<bool> &status, const BinManager::BinStateQueue &states)
101
: notify_(notify), items_(items), status_(status), states_(states) {
102
}
103
104
TaskType Type() const override {
105
return TaskType::CPU_COMPUTE;
106
}
107
108
TaskPriority Priority() const override {
109
// Let priority emulation tasks win over this.
110
return TaskPriority::NORMAL;
111
}
112
113
void Run() override {
114
ProcessItems();
115
status_ = false;
116
// In case of any atomic issues, do another pass.
117
ProcessItems();
118
notify_->Drain();
119
}
120
121
void Release() override {
122
// Don't delete, this is statically allocated.
123
}
124
125
private:
126
void ProcessItems() {
127
while (!items_.Empty()) {
128
const BinItem &item = items_.PeekNext();
129
DrawBinItem(item, states_[item.stateIndex]);
130
items_.SkipNext();
131
}
132
}
133
134
BinWaitable *notify_;
135
BinManager::BinItemQueue &items_;
136
std::atomic<bool> &status_;
137
const BinManager::BinStateQueue &states_;
138
};
139
140
constexpr int BinManager::MAX_POSSIBLE_TASKS;
141
142
BinManager::BinManager() {
143
queueRange_.x1 = 0x7FFFFFFF;
144
queueRange_.y1 = 0x7FFFFFFF;
145
queueRange_.x2 = 0;
146
queueRange_.y2 = 0;
147
148
waitable_ = new BinWaitable();
149
for (auto &s : taskStatus_)
150
s = false;
151
152
int maxInitTasks = std::min(g_threadManager.GetNumLooperThreads(), MAX_POSSIBLE_TASKS);
153
for (int i = 0; i < maxInitTasks; ++i) {
154
taskQueues_[i].Setup();
155
for (DrawBinItemsTask *&task : taskLists_[i].tasks)
156
task = new DrawBinItemsTask(waitable_, taskQueues_[i], taskStatus_[i], states_);
157
}
158
states_.Setup();
159
cluts_.Setup();
160
queue_.Setup();
161
}
162
163
BinManager::~BinManager() {
164
delete waitable_;
165
166
for (int i = 0; i < MAX_POSSIBLE_TASKS; ++i) {
167
for (DrawBinItemsTask *task : taskLists_[i].tasks)
168
delete task;
169
}
170
}
171
172
void BinManager::UpdateState() {
173
PROFILE_THIS_SCOPE("bin_state");
174
if (HasDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL)) {
175
if (states_.Full())
176
Flush("states");
177
creatingState_ = true;
178
stateIndex_ = (uint16_t)states_.Push(RasterizerState());
179
// When new funcs are compiled, we need to flush if WX exclusive.
180
ComputeRasterizerState(&states_[stateIndex_], this);
181
states_[stateIndex_].samplerID.cached.clut = cluts_[clutIndex_].readable;
182
creatingState_ = false;
183
184
ClearDirty(SoftDirty::PIXEL_ALL | SoftDirty::SAMPLER_ALL | SoftDirty::RAST_ALL);
185
}
186
187
if (lastFlipstats_ != gpuStats.numFlips) {
188
lastFlipstats_ = gpuStats.numFlips;
189
ResetStats();
190
}
191
192
const auto &state = State();
193
const bool hadDepth = pendingWrites_[1].base != 0;
194
195
if (HasDirty(SoftDirty::BINNER_RANGE)) {
196
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());
197
DrawingCoords scissorBR(std::min(gstate.getScissorX2(), gstate.getRegionX2()), std::min(gstate.getScissorY2(), gstate.getRegionY2()));
198
ScreenCoords screenScissorTL = TransformUnit::DrawingToScreen(scissorTL, 0);
199
ScreenCoords screenScissorBR = TransformUnit::DrawingToScreen(scissorBR, 0);
200
201
scissor_.x1 = screenScissorTL.x;
202
scissor_.y1 = screenScissorTL.y;
203
scissor_.x2 = screenScissorBR.x + SCREEN_SCALE_FACTOR - 1;
204
scissor_.y2 = screenScissorBR.y + SCREEN_SCALE_FACTOR - 1;
205
206
// If we're about to texture from something still pending (i.e. depth), flush.
207
if (HasTextureWrite(state))
208
Flush("tex");
209
210
// Okay, now update what's pending.
211
MarkPendingWrites(state);
212
213
ClearDirty(SoftDirty::BINNER_RANGE);
214
} else if (pendingOverlap_) {
215
if (HasTextureWrite(state)) {
216
Flush("tex");
217
218
// We need the pending writes set, which flushing cleared. Set them again.
219
MarkPendingWrites(state);
220
}
221
}
222
223
if (HasDirty(SoftDirty::BINNER_OVERLAP)) {
224
// This is a good place to record any dependencies for block transfer overlap.
225
MarkPendingReads(state);
226
227
// Disallow threads when rendering to the target, even offset.
228
bool selfRender = HasTextureWrite(state);
229
int newMaxTasks = selfRender || FORCE_SINGLE_THREAD ? 1 : g_threadManager.GetNumLooperThreads();
230
if (newMaxTasks > MAX_POSSIBLE_TASKS)
231
newMaxTasks = MAX_POSSIBLE_TASKS;
232
// We don't want to overlap wrong, so flush any pending.
233
if (maxTasks_ != newMaxTasks) {
234
maxTasks_ = newMaxTasks;
235
Flush("selfrender");
236
}
237
pendingOverlap_ = pendingOverlap_ || selfRender;
238
239
// Lastly, we have to check if we're newly writing depth we were texturing before.
240
// This happens in Call of Duty (depth clear after depth texture), for example.
241
if (!hadDepth && state.pixelID.depthWrite) {
242
for (size_t i = 0; i < states_.Size(); ++i) {
243
if (HasTextureWrite(states_.Peek(i))) {
244
Flush("selfdepth");
245
}
246
}
247
}
248
ClearDirty(SoftDirty::BINNER_OVERLAP);
249
}
250
}
251
252
bool BinManager::HasTextureWrite(const RasterizerState &state) {
253
if (!state.enableTextures)
254
return false;
255
256
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
257
for (int i = 0; i <= state.maxTexLevel; ++i) {
258
int byteStride = (state.texbufw[i] * textureBits) / 8;
259
int byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
260
int h = state.samplerID.cached.sizes[i].h;
261
if (HasPendingWrite(state.texaddr[i], byteStride, byteWidth, h))
262
return true;
263
}
264
265
return false;
266
}
267
268
bool BinManager::IsExactSelfRender(const Rasterizer::RasterizerState &state, const BinItem &item) {
269
if (item.type != BinItemType::SPRITE && item.type != BinItemType::RECT)
270
return false;
271
if (state.textureProj || state.maxTexLevel > 0)
272
return false;
273
274
// Only possible if the texture is 1:1.
275
if ((state.texaddr[0] & 0x0F1FFFFF) != (gstate.getFrameBufAddress() & 0x0F1FFFFF))
276
return false;
277
int bufferPixelWidth = BufferFormatBytesPerPixel(state.pixelID.FBFormat());
278
int texturePixelWidth = textureBitsPerPixel[state.samplerID.texfmt] / 8;
279
if (bufferPixelWidth != texturePixelWidth)
280
return false;
281
282
Vec4f tc = Vec4f(item.v0.texturecoords.x, item.v0.texturecoords.y, item.v1.texturecoords.x, item.v1.texturecoords.y);
283
if (state.throughMode) {
284
// Already at texels, convert to screen.
285
tc = tc * SCREEN_SCALE_FACTOR;
286
} else {
287
// Need to also multiply by width/height in transform mode.
288
int w = state.samplerID.cached.sizes[0].w * SCREEN_SCALE_FACTOR;
289
int h = state.samplerID.cached.sizes[0].h * SCREEN_SCALE_FACTOR;
290
tc = tc * Vec4f(w, h, w, h);
291
}
292
293
Vec4<int> tci = tc.Cast<int>();
294
if (tci.x != item.v0.screenpos.x || tci.y != item.v0.screenpos.y)
295
return false;
296
if (tci.z != item.v1.screenpos.x || tci.w != item.v1.screenpos.y)
297
return false;
298
299
return true;
300
}
301
302
void BinManager::MarkPendingReads(const Rasterizer::RasterizerState &state) {
303
if (!state.enableTextures)
304
return;
305
306
const uint8_t textureBits = textureBitsPerPixel[state.samplerID.texfmt];
307
for (int i = 0; i <= state.maxTexLevel; ++i) {
308
uint32_t byteStride = (state.texbufw[i] * textureBits) / 8;
309
uint32_t byteWidth = (state.samplerID.cached.sizes[i].w * textureBits) / 8;
310
uint32_t h = state.samplerID.cached.sizes[i].h;
311
auto it = pendingReads_.find(state.texaddr[i]);
312
if (it != pendingReads_.end()) {
313
uint32_t total = byteStride * (h - 1) + byteWidth;
314
uint32_t existing = it->second.strideBytes * (it->second.height - 1) + it->second.widthBytes;
315
if (existing < total) {
316
it->second.strideBytes = std::max(it->second.strideBytes, byteStride);
317
it->second.widthBytes = std::max(it->second.widthBytes, byteWidth);
318
it->second.height = std::max(it->second.height, h);
319
}
320
} else {
321
auto &range = pendingReads_[state.texaddr[i]];
322
range.base = state.texaddr[i];
323
range.strideBytes = byteStride;
324
range.widthBytes = byteWidth;
325
range.height = h;
326
}
327
}
328
}
329
330
void BinManager::MarkPendingWrites(const Rasterizer::RasterizerState &state) {
331
DrawingCoords scissorTL(gstate.getScissorX1(), gstate.getScissorY1());
332
DrawingCoords scissorBR(std::min(gstate.getScissorX2(), gstate.getRegionX2()), std::min(gstate.getScissorY2(), gstate.getRegionY2()));
333
334
constexpr uint32_t mirrorMask = 0x041FFFFF;
335
const uint32_t bpp = state.pixelID.FBFormat() == GE_FORMAT_8888 ? 4 : 2;
336
pendingWrites_[0].Expand(gstate.getFrameBufAddress() & mirrorMask, bpp, gstate.FrameBufStride(), scissorTL, scissorBR);
337
if (state.pixelID.depthWrite)
338
pendingWrites_[1].Expand(gstate.getDepthBufAddress() & mirrorMask, 2, gstate.DepthBufStride(), scissorTL, scissorBR);
339
}
340
341
inline void BinDirtyRange::Expand(uint32_t newBase, uint32_t bpp, uint32_t stride, const DrawingCoords &tl, const DrawingCoords &br) {
342
const uint32_t w = br.x - tl.x + 1;
343
const uint32_t h = br.y - tl.y + 1;
344
345
newBase += tl.y * stride * bpp + tl.x * bpp;
346
if (base == 0) {
347
base = newBase;
348
strideBytes = stride * bpp;
349
widthBytes = w * bpp;
350
height = h;
351
return;
352
}
353
354
height = std::max(height, h);
355
if (base == newBase && strideBytes == stride * bpp) {
356
widthBytes = std::max(widthBytes, w * bpp);
357
return;
358
}
359
360
if (stride != 0)
361
height += ((int)base - (int)newBase) / (stride * bpp);
362
base = std::min(base, newBase);
363
strideBytes = std::max(strideBytes, stride * bpp);
364
widthBytes = strideBytes;
365
}
366
367
void BinManager::UpdateClut(const void *src) {
368
PROFILE_THIS_SCOPE("bin_clut");
369
if (cluts_.Full())
370
Flush("cluts");
371
BinClut &clut = cluts_.PeekPush();
372
memcpy(clut.readable, src, sizeof(BinClut));
373
clutIndex_ = (uint16_t)cluts_.PushPeeked();
374
}
375
376
void BinManager::AddTriangle(const VertexData &v0, const VertexData &v1, const VertexData &v2) {
377
Vec2<int> d01((int)v0.screenpos.x - (int)v1.screenpos.x, (int)v0.screenpos.y - (int)v1.screenpos.y);
378
Vec2<int> d02((int)v0.screenpos.x - (int)v2.screenpos.x, (int)v0.screenpos.y - (int)v2.screenpos.y);
379
Vec2<int> d12((int)v1.screenpos.x - (int)v2.screenpos.x, (int)v1.screenpos.y - (int)v2.screenpos.y);
380
381
// Drop primitives which are not in CCW order by checking the cross product.
382
static_assert(SCREEN_SCALE_FACTOR <= 16, "Fails if scale factor is too high");
383
if (d01.x * d02.y - d01.y * d02.x < 0)
384
return;
385
// If all points have identical coords, we'll have 0 weights and not skip properly, so skip here.
386
if ((d01.x == 0 && d02.x == 0) || (d01.y == 0 && d02.y == 0))
387
return;
388
389
// Was it fully outside the scissor?
390
const BinCoords range = Range(v0, v1, v2);
391
if (range.Invalid())
392
return;
393
394
if (queue_.Full())
395
Drain();
396
queue_.Push(BinItem{ BinItemType::TRIANGLE, stateIndex_, range, v0, v1, v2 });
397
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, v2);
398
Expand(range);
399
}
400
401
void BinManager::AddClearRect(const VertexData &v0, const VertexData &v1) {
402
const BinCoords range = Range(v0, v1);
403
if (range.Invalid())
404
return;
405
406
if (queue_.Full())
407
Drain();
408
queue_.Push(BinItem{ BinItemType::CLEAR_RECT, stateIndex_, range, v0, v1 });
409
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
410
Expand(range);
411
}
412
413
void BinManager::AddRect(const VertexData &v0, const VertexData &v1) {
414
const BinCoords range = Range(v0, v1);
415
if (range.Invalid())
416
return;
417
418
if (queue_.Full())
419
Drain();
420
queue_.Push(BinItem{ BinItemType::RECT, stateIndex_, range, v0, v1 });
421
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
422
Expand(range);
423
}
424
425
void BinManager::AddSprite(const VertexData &v0, const VertexData &v1) {
426
const BinCoords range = Range(v0, v1);
427
if (range.Invalid())
428
return;
429
430
if (queue_.Full())
431
Drain();
432
queue_.Push(BinItem{ BinItemType::SPRITE, stateIndex_, range, v0, v1 });
433
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, true);
434
Expand(range);
435
}
436
437
void BinManager::AddLine(const VertexData &v0, const VertexData &v1) {
438
const BinCoords range = Range(v0, v1);
439
if (range.Invalid())
440
return;
441
442
if (queue_.Full())
443
Drain();
444
queue_.Push(BinItem{ BinItemType::LINE, stateIndex_, range, v0, v1 });
445
CalculateRasterStateFlags(&states_[stateIndex_], v0, v1, false);
446
Expand(range);
447
}
448
449
void BinManager::AddPoint(const VertexData &v0) {
450
const BinCoords range = Range(v0);
451
if (range.Invalid())
452
return;
453
454
if (queue_.Full())
455
Drain();
456
queue_.Push(BinItem{ BinItemType::POINT, stateIndex_, range, v0 });
457
CalculateRasterStateFlags(&states_[stateIndex_], v0);
458
Expand(range);
459
}
460
461
void BinManager::Drain(bool flushing) {
462
PROFILE_THIS_SCOPE("bin_drain");
463
464
// If the waitable has fully drained, we can update our binning decisions.
465
if (!tasksSplit_ || waitable_->Empty()) {
466
int w2 = (queueRange_.x2 - queueRange_.x1 + (SCREEN_SCALE_FACTOR * 2 - 1)) / (SCREEN_SCALE_FACTOR * 2);
467
int h2 = (queueRange_.y2 - queueRange_.y1 + (SCREEN_SCALE_FACTOR * 2 - 1)) / (SCREEN_SCALE_FACTOR * 2);
468
469
// Always bin the entire possible range, but focus on the drawn area.
470
ScreenCoords tl(0, 0, 0);
471
ScreenCoords br(1024 * SCREEN_SCALE_FACTOR, 1024 * SCREEN_SCALE_FACTOR, 0);
472
473
if (pendingOverlap_ && maxTasks_ == 1 && flushing && queue_.Size() == 1 && !FORCE_SINGLE_THREAD) {
474
// If the drawing is 1:1, we can potentially use threads. It's worth checking.
475
const auto &item = queue_.PeekNext();
476
const auto &state = states_[item.stateIndex];
477
if (IsExactSelfRender(state, item))
478
maxTasks_ = std::min(g_threadManager.GetNumLooperThreads(), MAX_POSSIBLE_TASKS);
479
}
480
481
taskRanges_.clear();
482
if (h2 >= 18 && w2 >= h2 * 4) {
483
int bin_w = std::max(4, (w2 + maxTasks_ - 1) / maxTasks_) * SCREEN_SCALE_FACTOR * 2;
484
taskRanges_.push_back(BinCoords{ tl.x, tl.y, queueRange_.x1 + bin_w - 1, br.y - 1 });
485
for (int x = queueRange_.x1 + bin_w; x <= queueRange_.x2; x += bin_w) {
486
int x2 = x + bin_w > queueRange_.x2 ? br.x : x + bin_w;
487
taskRanges_.push_back(BinCoords{ x, tl.y, x2 - 1, br.y - 1 });
488
}
489
} else if (h2 >= 18 && w2 >= 18) {
490
int bin_h = std::max(4, (h2 + maxTasks_ - 1) / maxTasks_) * SCREEN_SCALE_FACTOR * 2;
491
taskRanges_.push_back(BinCoords{ tl.x, tl.y, br.x - 1, queueRange_.y1 + bin_h - 1 });
492
for (int y = queueRange_.y1 + bin_h; y <= queueRange_.y2; y += bin_h) {
493
int y2 = y + bin_h > queueRange_.y2 ? br.y : y + bin_h;
494
taskRanges_.push_back(BinCoords{ tl.x, y, br.x - 1, y2 - 1 });
495
}
496
}
497
498
tasksSplit_ = true;
499
}
500
501
// Let's try to optimize states, if we can.
502
OptimizePendingStates(pendingStateIndex_, stateIndex_);
503
pendingStateIndex_ = stateIndex_;
504
505
if (taskRanges_.size() <= 1) {
506
PROFILE_THIS_SCOPE("bin_drain_single");
507
while (!queue_.Empty()) {
508
const BinItem &item = queue_.PeekNext();
509
DrawBinItem(item, states_[item.stateIndex]);
510
queue_.SkipNext();
511
}
512
} else {
513
int max = flushing ? QUEUED_PRIMS : QUEUED_PRIMS / 2;
514
while (!queue_.Empty()) {
515
const BinItem &item = queue_.PeekNext();
516
for (int i = 0; i < (int)taskRanges_.size(); ++i) {
517
const BinCoords range = taskRanges_[i].Intersect(item.range);
518
if (range.Invalid())
519
continue;
520
521
if (taskQueues_[i].NearFull()) {
522
// This shouldn't often happen, but if it does, wait for space.
523
if (taskQueues_[i].Full())
524
waitable_->Wait();
525
// If we're not flushing and not near full, let's just continue later.
526
// Near full means we'd drain on next prim, so better to finish it now.
527
else if (!flushing && !queue_.NearFull())
528
max = 0;
529
}
530
531
BinItem &taskItem = taskQueues_[i].PeekPush();
532
taskItem = item;
533
taskItem.range = range;
534
taskQueues_[i].PushPeeked();
535
}
536
queue_.SkipNext();
537
if (--max <= 0)
538
break;
539
}
540
541
int threads = 0;
542
for (int i = 0; i < (int)taskRanges_.size(); ++i) {
543
if (taskQueues_[i].Empty())
544
continue;
545
threads++;
546
if (taskStatus_[i])
547
continue;
548
549
waitable_->Fill();
550
taskStatus_[i] = true;
551
g_threadManager.EnqueueTaskOnThread(i, taskLists_[i].Next());
552
enqueues_++;
553
}
554
555
mostThreads_ = std::max(mostThreads_, threads);
556
}
557
}
558
559
void BinManager::Flush(const char *reason) {
560
if (queueRange_.x1 == 0x7FFFFFFF)
561
return;
562
563
double st;
564
if (coreCollectDebugStats)
565
st = time_now_d();
566
Drain(true);
567
waitable_->Wait();
568
taskRanges_.clear();
569
tasksSplit_ = false;
570
571
queue_.Reset();
572
while (states_.Size() > 1)
573
states_.SkipNext();
574
while (cluts_.Size() > 1)
575
cluts_.SkipNext();
576
577
Rasterizer::FlushJit();
578
Sampler::FlushJit();
579
580
queueRange_.x1 = 0x7FFFFFFF;
581
queueRange_.y1 = 0x7FFFFFFF;
582
queueRange_.x2 = 0;
583
queueRange_.y2 = 0;
584
585
for (auto &pending : pendingWrites_)
586
pending.base = 0;
587
pendingOverlap_ = false;
588
pendingReads_.clear();
589
590
// We'll need to set the pending writes and reads again, since we just flushed it.
591
dirty_ |= SoftDirty::BINNER_RANGE | SoftDirty::BINNER_OVERLAP;
592
593
if (coreCollectDebugStats) {
594
double et = time_now_d();
595
flushReasonTimes_[reason] += et - st;
596
if (et - st > slowestFlushTime_) {
597
slowestFlushTime_ = et - st;
598
slowestFlushReason_ = reason;
599
}
600
}
601
}
602
603
void BinManager::OptimizePendingStates(uint16_t first, uint16_t last) {
604
// We can sometimes hit this when compiling new funcs while creating a state.
605
// At that point, the state isn't loaded fully yet, so don't touch it.
606
if (creatingState_ && last == stateIndex_) {
607
if (first == last)
608
return;
609
last--;
610
}
611
612
int count = (QUEUED_STATES + last - first) % QUEUED_STATES + 1;
613
for (int i = 0; i < count; ++i) {
614
size_t pos = (first + i) % QUEUED_STATES;
615
OptimizeRasterState(&states_[pos]);
616
}
617
}
618
619
bool BinManager::HasPendingWrite(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
620
// We can only write to VRAM.
621
if (!Memory::IsVRAMAddress(start))
622
return false;
623
// Ignore mirrors for overlap detection.
624
start &= 0x041FFFFF;
625
626
uint32_t size = stride * (h - 1) + w;
627
for (const auto &range : pendingWrites_) {
628
if (range.base == 0 || range.strideBytes == 0)
629
continue;
630
if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)
631
continue;
632
633
// Let's simply go through each line. Might be in the stride gap.
634
uint32_t row = start;
635
for (uint32_t y = 0; y < h; ++y) {
636
int32_t offset = row - range.base;
637
int32_t rangeY = offset / (int32_t)range.strideBytes;
638
uint32_t rangeX = offset % (int32_t)range.strideBytes;
639
if (rangeY >= 0 && (uint32_t)rangeY < range.height) {
640
// If this row is either within width, or extends beyond stride, overlap.
641
if (rangeX < range.widthBytes || rangeX + w >= range.strideBytes)
642
return true;
643
}
644
645
row += stride;
646
}
647
}
648
649
return false;
650
}
651
652
bool BinManager::HasPendingRead(uint32_t start, uint32_t stride, uint32_t w, uint32_t h) {
653
if (Memory::IsVRAMAddress(start)) {
654
// Ignore VRAM mirrors.
655
start &= 0x041FFFFF;
656
} else {
657
// Ignore only regular RAM mirrors.
658
start &= 0x3FFFFFFF;
659
}
660
661
uint32_t size = stride * (h - 1) + w;
662
for (const auto &pair : pendingReads_) {
663
const auto &range = pair.second;
664
if (start >= range.base + range.height * range.strideBytes || start + size <= range.base)
665
continue;
666
667
// Stride gaps are uncommon with reads, so don't bother.
668
return true;
669
}
670
671
return false;
672
}
673
674
void BinManager::GetStats(char *buffer, size_t bufsize) {
675
double allTotal = 0.0;
676
double slowestTotalTime = 0.0;
677
const char *slowestTotalReason = nullptr;
678
for (auto &it : flushReasonTimes_) {
679
if (it.second > slowestTotalTime) {
680
slowestTotalTime = it.second;
681
slowestTotalReason = it.first;
682
}
683
allTotal += it.second;
684
}
685
686
// Many games are 30 FPS, so check last frame too for better stats.
687
double recentTotal = allTotal;
688
double slowestRecentTime = slowestTotalTime;
689
const char *slowestRecentReason = slowestTotalReason;
690
for (auto &it : lastFlushReasonTimes_) {
691
if (it.second > slowestRecentTime) {
692
slowestRecentTime = it.second;
693
slowestRecentReason = it.first;
694
}
695
recentTotal += it.second;
696
}
697
698
snprintf(buffer, bufsize,
699
"Slowest individual flush: %s (%0.4f)\n"
700
"Slowest frame flush: %s (%0.4f)\n"
701
"Slowest recent flush: %s (%0.4f)\n"
702
"Total flush time: %0.4f (%05.2f%%, last 2: %05.2f%%)\n"
703
"Thread enqueues: %d, count %d",
704
slowestFlushReason_, slowestFlushTime_,
705
slowestTotalReason, slowestTotalTime,
706
slowestRecentReason, slowestRecentTime,
707
allTotal, allTotal * (6000.0 / 1.001), recentTotal * (3000.0 / 1.001),
708
enqueues_, mostThreads_);
709
}
710
711
void BinManager::ResetStats() {
712
lastFlushReasonTimes_ = std::move(flushReasonTimes_);
713
flushReasonTimes_.clear();
714
slowestFlushReason_ = nullptr;
715
slowestFlushTime_ = 0.0;
716
enqueues_ = 0;
717
mostThreads_ = 0;
718
}
719
720
inline BinCoords BinCoords::Intersect(const BinCoords &range) const {
721
BinCoords sub;
722
sub.x1 = std::max(x1, range.x1);
723
sub.y1 = std::max(y1, range.y1);
724
sub.x2 = std::min(x2, range.x2);
725
sub.y2 = std::min(y2, range.y2);
726
return sub;
727
}
728
729
BinCoords BinManager::Scissor(BinCoords range) {
730
return range.Intersect(scissor_);
731
}
732
733
BinCoords BinManager::Range(const VertexData &v0, const VertexData &v1, const VertexData &v2) {
734
BinCoords range;
735
range.x1 = std::min(std::min(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) & ~(SCREEN_SCALE_FACTOR - 1);
736
range.y1 = std::min(std::min(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) & ~(SCREEN_SCALE_FACTOR - 1);
737
range.x2 = std::max(std::max(v0.screenpos.x, v1.screenpos.x), v2.screenpos.x) | (SCREEN_SCALE_FACTOR - 1);
738
range.y2 = std::max(std::max(v0.screenpos.y, v1.screenpos.y), v2.screenpos.y) | (SCREEN_SCALE_FACTOR - 1);
739
return Scissor(range);
740
}
741
742
BinCoords BinManager::Range(const VertexData &v0, const VertexData &v1) {
743
BinCoords range;
744
range.x1 = std::min(v0.screenpos.x, v1.screenpos.x) & ~(SCREEN_SCALE_FACTOR - 1);
745
range.y1 = std::min(v0.screenpos.y, v1.screenpos.y) & ~(SCREEN_SCALE_FACTOR - 1);
746
range.x2 = std::max(v0.screenpos.x, v1.screenpos.x) | (SCREEN_SCALE_FACTOR - 1);
747
range.y2 = std::max(v0.screenpos.y, v1.screenpos.y) | (SCREEN_SCALE_FACTOR - 1);
748
return Scissor(range);
749
}
750
751
BinCoords BinManager::Range(const VertexData &v0) {
752
BinCoords range;
753
range.x1 = v0.screenpos.x & ~(SCREEN_SCALE_FACTOR - 1);
754
range.y1 = v0.screenpos.y & ~(SCREEN_SCALE_FACTOR - 1);
755
range.x2 = v0.screenpos.x | (SCREEN_SCALE_FACTOR - 1);
756
range.y2 = v0.screenpos.y | (SCREEN_SCALE_FACTOR - 1);
757
return Scissor(range);
758
}
759
760
void BinManager::Expand(const BinCoords &range) {
761
queueRange_.x1 = std::min(queueRange_.x1, range.x1);
762
queueRange_.y1 = std::min(queueRange_.y1, range.y1);
763
queueRange_.x2 = std::max(queueRange_.x2, range.x2);
764
queueRange_.y2 = std::max(queueRange_.y2, range.y2);
765
766
if (maxTasks_ == 1 || (queueRange_.y2 - queueRange_.y1 >= 224 * SCREEN_SCALE_FACTOR && enqueues_ < 36 * maxTasks_)) {
767
if (pendingOverlap_)
768
Flush("expand");
769
else
770
Drain();
771
}
772
}
773
774