CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GPUCommon.cpp
Views: 1401
1
#include "ppsspp_config.h"
2
3
#if defined(_M_SSE)
4
#include <emmintrin.h>
5
#endif
6
#if PPSSPP_ARCH(ARM_NEON)
7
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
8
#include <arm64_neon.h>
9
#else
10
#include <arm_neon.h>
11
#endif
12
#endif
13
14
#include <algorithm>
15
16
#include "Common/Profiler/Profiler.h"
17
18
#include "Common/GraphicsContext.h"
19
#include "Common/LogReporting.h"
20
#include "Common/Serialize/Serializer.h"
21
#include "Common/Serialize/SerializeFuncs.h"
22
#include "Common/Serialize/SerializeList.h"
23
#include "Common/TimeUtil.h"
24
#include "GPU/GeDisasm.h"
25
#include "GPU/GPU.h"
26
#include "GPU/GPUCommon.h"
27
#include "GPU/GPUState.h"
28
#include "Core/Config.h"
29
#include "Core/CoreTiming.h"
30
#include "Core/Debugger/MemBlockInfo.h"
31
#include "Core/MemMap.h"
32
#include "Core/Reporting.h"
33
#include "Core/HLE/HLE.h"
34
#include "Core/HLE/sceKernelMemory.h"
35
#include "Core/HLE/sceKernelInterrupt.h"
36
#include "Core/HLE/sceKernelThread.h"
37
#include "Core/HLE/sceGe.h"
38
#include "Core/HW/Display.h"
39
#include "Core/Util/PPGeDraw.h"
40
#include "Core/MemMapHelpers.h"
41
#include "GPU/Common/DrawEngineCommon.h"
42
#include "GPU/Common/FramebufferManagerCommon.h"
43
#include "GPU/Common/TextureCacheCommon.h"
44
#include "GPU/Debugger/Debugger.h"
45
#include "GPU/Debugger/Record.h"
46
47
void GPUCommon::Flush() {
48
drawEngineCommon_->DispatchFlush();
49
}
50
51
void GPUCommon::DispatchFlush() {
52
drawEngineCommon_->DispatchFlush();
53
}
54
55
GPUCommon::GPUCommon(GraphicsContext *gfxCtx, Draw::DrawContext *draw) :
56
gfxCtx_(gfxCtx),
57
draw_(draw)
58
{
59
// This assert failed on GCC x86 32-bit (but not MSVC 32-bit!) before adding the
60
// "padding" field at the end. This is important for save state compatibility.
61
// The compiler was not rounding the struct size up to an 8 byte boundary, which
62
// you'd expect due to the int64 field, but the Linux ABI apparently does not require that.
63
static_assert(sizeof(DisplayList) == 456, "Bad DisplayList size");
64
65
Reinitialize();
66
gstate.Reset();
67
gstate_c.Reset();
68
gpuStats.Reset();
69
70
PPGeSetDrawContext(draw);
71
ResetMatrices();
72
}
73
74
void GPUCommon::BeginHostFrame() {
75
ReapplyGfxState();
76
77
// TODO: Assume config may have changed - maybe move to resize.
78
gstate_c.Dirty(DIRTY_ALL);
79
80
UpdateCmdInfo();
81
82
UpdateMSAALevel(draw_);
83
CheckConfigChanged();
84
CheckDisplayResized();
85
CheckRenderResized();
86
}
87
88
void GPUCommon::EndHostFrame() {
89
// Probably not necessary.
90
if (draw_) {
91
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
92
}
93
}
94
95
void GPUCommon::Reinitialize() {
96
memset(dls, 0, sizeof(dls));
97
for (int i = 0; i < DisplayListMaxCount; ++i) {
98
dls[i].state = PSP_GE_DL_STATE_NONE;
99
dls[i].waitTicks = 0;
100
}
101
102
nextListID = 0;
103
currentList = nullptr;
104
isbreak = false;
105
drawCompleteTicks = 0;
106
busyTicks = 0;
107
timeSpentStepping_ = 0.0;
108
interruptsEnabled_ = true;
109
110
if (textureCache_)
111
textureCache_->Clear(true);
112
if (framebufferManager_)
113
framebufferManager_->DestroyAllFBOs();
114
}
115
116
int GPUCommon::EstimatePerVertexCost() {
117
// TODO: This is transform cost, also account for rasterization cost somehow... although it probably
118
// runs in parallel with transform.
119
120
// Also, this is all pure guesswork. If we can find a way to do measurements, that would be great.
121
122
// GTA wants a low value to run smooth, GoW wants a high value (otherwise it thinks things
123
// went too fast and starts doing all the work over again).
124
125
int cost = 20;
126
if (gstate.isLightingEnabled()) {
127
cost += 10;
128
129
for (int i = 0; i < 4; i++) {
130
if (gstate.isLightChanEnabled(i))
131
cost += 7;
132
}
133
}
134
135
if (gstate.getUVGenMode() != GE_TEXMAP_TEXTURE_COORDS) {
136
cost += 20;
137
}
138
int morphCount = gstate.getNumMorphWeights();
139
if (morphCount > 1) {
140
cost += 5 * morphCount;
141
}
142
return cost;
143
}
144
145
void GPUCommon::PopDLQueue() {
146
if(!dlQueue.empty()) {
147
dlQueue.pop_front();
148
if(!dlQueue.empty()) {
149
bool running = currentList->state == PSP_GE_DL_STATE_RUNNING;
150
currentList = &dls[dlQueue.front()];
151
if (running)
152
currentList->state = PSP_GE_DL_STATE_RUNNING;
153
} else {
154
currentList = nullptr;
155
}
156
}
157
}
158
159
bool GPUCommon::BusyDrawing() {
160
u32 state = DrawSync(1);
161
if (state == PSP_GE_LIST_DRAWING || state == PSP_GE_LIST_STALLING) {
162
if (currentList && currentList->state != PSP_GE_DL_STATE_PAUSED) {
163
return true;
164
}
165
}
166
return false;
167
}
168
169
void GPUCommon::NotifyConfigChanged() {
170
configChanged_ = true;
171
}
172
173
void GPUCommon::NotifyRenderResized() {
174
renderResized_ = true;
175
}
176
177
void GPUCommon::NotifyDisplayResized() {
178
displayResized_ = true;
179
}
180
181
void GPUCommon::DumpNextFrame() {
182
dumpNextFrame_ = true;
183
}
184
185
u32 GPUCommon::DrawSync(int mode) {
186
gpuStats.numDrawSyncs++;
187
188
if (mode < 0 || mode > 1)
189
return SCE_KERNEL_ERROR_INVALID_MODE;
190
191
if (mode == 0) {
192
if (!__KernelIsDispatchEnabled()) {
193
return SCE_KERNEL_ERROR_CAN_NOT_WAIT;
194
}
195
if (__IsInInterrupt()) {
196
return SCE_KERNEL_ERROR_ILLEGAL_CONTEXT;
197
}
198
199
if (drawCompleteTicks > CoreTiming::GetTicks()) {
200
__GeWaitCurrentThread(GPU_SYNC_DRAW, 1, "GeDrawSync");
201
} else {
202
for (int i = 0; i < DisplayListMaxCount; ++i) {
203
if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {
204
dls[i].state = PSP_GE_DL_STATE_NONE;
205
}
206
}
207
}
208
return 0;
209
}
210
211
// If there's no current list, it must be complete.
212
DisplayList *top = NULL;
213
for (int i : dlQueue) {
214
if (dls[i].state != PSP_GE_DL_STATE_COMPLETED) {
215
top = &dls[i];
216
break;
217
}
218
}
219
if (!top || top->state == PSP_GE_DL_STATE_COMPLETED)
220
return PSP_GE_LIST_COMPLETED;
221
222
if (currentList->pc == currentList->stall)
223
return PSP_GE_LIST_STALLING;
224
225
return PSP_GE_LIST_DRAWING;
226
}
227
228
void GPUCommon::CheckDrawSync() {
229
if (dlQueue.empty()) {
230
for (int i = 0; i < DisplayListMaxCount; ++i)
231
dls[i].state = PSP_GE_DL_STATE_NONE;
232
}
233
}
234
235
int GPUCommon::ListSync(int listid, int mode) {
236
gpuStats.numListSyncs++;
237
238
if (listid < 0 || listid >= DisplayListMaxCount)
239
return SCE_KERNEL_ERROR_INVALID_ID;
240
241
if (mode < 0 || mode > 1)
242
return SCE_KERNEL_ERROR_INVALID_MODE;
243
244
DisplayList& dl = dls[listid];
245
if (mode == 1) {
246
switch (dl.state) {
247
case PSP_GE_DL_STATE_QUEUED:
248
if (dl.interrupted)
249
return PSP_GE_LIST_PAUSED;
250
return PSP_GE_LIST_QUEUED;
251
252
case PSP_GE_DL_STATE_RUNNING:
253
if (dl.pc == dl.stall)
254
return PSP_GE_LIST_STALLING;
255
return PSP_GE_LIST_DRAWING;
256
257
case PSP_GE_DL_STATE_COMPLETED:
258
return PSP_GE_LIST_COMPLETED;
259
260
case PSP_GE_DL_STATE_PAUSED:
261
return PSP_GE_LIST_PAUSED;
262
263
default:
264
return SCE_KERNEL_ERROR_INVALID_ID;
265
}
266
}
267
268
if (!__KernelIsDispatchEnabled()) {
269
return SCE_KERNEL_ERROR_CAN_NOT_WAIT;
270
}
271
if (__IsInInterrupt()) {
272
return SCE_KERNEL_ERROR_ILLEGAL_CONTEXT;
273
}
274
275
if (dl.waitTicks > CoreTiming::GetTicks()) {
276
__GeWaitCurrentThread(GPU_SYNC_LIST, listid, "GeListSync");
277
}
278
return PSP_GE_LIST_COMPLETED;
279
}
280
281
int GPUCommon::GetStack(int index, u32 stackPtr) {
282
if (!currentList) {
283
// Seems like it doesn't return an error code?
284
return 0;
285
}
286
287
if (currentList->stackptr <= index) {
288
return SCE_KERNEL_ERROR_INVALID_INDEX;
289
}
290
291
if (index >= 0) {
292
auto stack = PSPPointer<u32_le>::Create(stackPtr);
293
if (stack.IsValid()) {
294
auto entry = currentList->stack[index];
295
// Not really sure what most of these values are.
296
stack[0] = 0;
297
stack[1] = entry.pc + 4;
298
stack[2] = entry.offsetAddr;
299
stack[7] = entry.baseAddr;
300
}
301
}
302
303
return currentList->stackptr;
304
}
305
306
static void CopyMatrix24(u32_le *result, const float *mtx, u32 count, u32 cmdbits) {
307
// Screams out for simple SIMD, but probably not called often enough to be worth it.
308
for (u32 i = 0; i < count; ++i) {
309
result[i] = toFloat24(mtx[i]) | cmdbits;
310
}
311
}
312
313
bool GPUCommon::GetMatrix24(GEMatrixType type, u32_le *result, u32 cmdbits) {
314
switch (type) {
315
case GE_MTX_BONE0:
316
case GE_MTX_BONE1:
317
case GE_MTX_BONE2:
318
case GE_MTX_BONE3:
319
case GE_MTX_BONE4:
320
case GE_MTX_BONE5:
321
case GE_MTX_BONE6:
322
case GE_MTX_BONE7:
323
CopyMatrix24(result, gstate.boneMatrix + (type - GE_MTX_BONE0) * 12, 12, cmdbits);
324
break;
325
case GE_MTX_TEXGEN:
326
CopyMatrix24(result, gstate.tgenMatrix, 12, cmdbits);
327
break;
328
case GE_MTX_WORLD:
329
CopyMatrix24(result, gstate.worldMatrix, 12, cmdbits);
330
break;
331
case GE_MTX_VIEW:
332
CopyMatrix24(result, gstate.viewMatrix, 12, cmdbits);
333
break;
334
case GE_MTX_PROJECTION:
335
CopyMatrix24(result, gstate.projMatrix, 16, cmdbits);
336
break;
337
default:
338
return false;
339
}
340
return true;
341
}
342
343
void GPUCommon::ResetMatrices() {
344
// This means we restored a context, so update the visible matrix data.
345
for (size_t i = 0; i < ARRAY_SIZE(gstate.boneMatrix); ++i)
346
matrixVisible.bone[i] = toFloat24(gstate.boneMatrix[i]);
347
for (size_t i = 0; i < ARRAY_SIZE(gstate.worldMatrix); ++i)
348
matrixVisible.world[i] = toFloat24(gstate.worldMatrix[i]);
349
for (size_t i = 0; i < ARRAY_SIZE(gstate.viewMatrix); ++i)
350
matrixVisible.view[i] = toFloat24(gstate.viewMatrix[i]);
351
for (size_t i = 0; i < ARRAY_SIZE(gstate.projMatrix); ++i)
352
matrixVisible.proj[i] = toFloat24(gstate.projMatrix[i]);
353
for (size_t i = 0; i < ARRAY_SIZE(gstate.tgenMatrix); ++i)
354
matrixVisible.tgen[i] = toFloat24(gstate.tgenMatrix[i]);
355
356
// Assume all the matrices changed, so dirty things related to them.
357
gstate_c.Dirty(DIRTY_WORLDMATRIX | DIRTY_VIEWMATRIX | DIRTY_PROJMATRIX | DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BONE_UNIFORMS);
358
}
359
360
u32 GPUCommon::EnqueueList(u32 listpc, u32 stall, int subIntrBase, PSPPointer<PspGeListArgs> args, bool head) {
361
// TODO Check the stack values in missing arg and ajust the stack depth
362
363
// Check alignment
364
// TODO Check the context and stack alignement too
365
if (((listpc | stall) & 3) != 0 || !Memory::IsValidAddress(listpc)) {
366
ERROR_LOG_REPORT(Log::G3D, "sceGeListEnqueue: invalid address %08x", listpc);
367
return SCE_KERNEL_ERROR_INVALID_POINTER;
368
}
369
370
// If args->size is below 16, it's the old struct without stack info.
371
if (args.IsValid() && args->size >= 16 && args->numStacks >= 256) {
372
return hleLogError(Log::G3D, SCE_KERNEL_ERROR_INVALID_SIZE, "invalid stack depth %d", args->numStacks);
373
}
374
375
int id = -1;
376
u64 currentTicks = CoreTiming::GetTicks();
377
u32 stackAddr = args.IsValid() && args->size >= 16 ? (u32)args->stackAddr : 0;
378
// Check compatibility
379
if (sceKernelGetCompiledSdkVersion() > 0x01FFFFFF) {
380
//numStacks = 0;
381
//stack = NULL;
382
for (int i = 0; i < DisplayListMaxCount; ++i) {
383
if (dls[i].state != PSP_GE_DL_STATE_NONE && dls[i].state != PSP_GE_DL_STATE_COMPLETED) {
384
// Logically, if the CPU has not interrupted yet, it hasn't seen the latest pc either.
385
// Exit enqueues right after an END, which fails without ignoring pendingInterrupt lists.
386
if (dls[i].pc == listpc && !dls[i].pendingInterrupt) {
387
ERROR_LOG(Log::G3D, "sceGeListEnqueue: can't enqueue, list address %08X already used", listpc);
388
return 0x80000021;
389
} else if (stackAddr != 0 && dls[i].stackAddr == stackAddr && !dls[i].pendingInterrupt) {
390
ERROR_LOG(Log::G3D, "sceGeListEnqueue: can't enqueue, stack address %08X already used", stackAddr);
391
return 0x80000021;
392
}
393
}
394
}
395
}
396
// TODO Check if list stack dls[i].stack already used then return 0x80000021 as above
397
398
for (int i = 0; i < DisplayListMaxCount; ++i) {
399
int possibleID = (i + nextListID) % DisplayListMaxCount;
400
auto possibleList = dls[possibleID];
401
if (possibleList.pendingInterrupt) {
402
continue;
403
}
404
405
if (possibleList.state == PSP_GE_DL_STATE_NONE) {
406
id = possibleID;
407
break;
408
}
409
if (possibleList.state == PSP_GE_DL_STATE_COMPLETED && possibleList.waitTicks < currentTicks) {
410
id = possibleID;
411
}
412
}
413
if (id < 0) {
414
ERROR_LOG_REPORT(Log::G3D, "No DL ID available to enqueue");
415
for (int i : dlQueue) {
416
DisplayList &dl = dls[i];
417
DEBUG_LOG(Log::G3D, "DisplayList %d status %d pc %08x stall %08x", i, dl.state, dl.pc, dl.stall);
418
}
419
return SCE_KERNEL_ERROR_OUT_OF_MEMORY;
420
}
421
nextListID = id + 1;
422
423
DisplayList &dl = dls[id];
424
dl.id = id;
425
dl.startpc = listpc & 0x0FFFFFFF;
426
dl.pc = listpc & 0x0FFFFFFF;
427
dl.stall = stall & 0x0FFFFFFF;
428
dl.subIntrBase = std::max(subIntrBase, -1);
429
dl.stackptr = 0;
430
dl.signal = PSP_GE_SIGNAL_NONE;
431
dl.interrupted = false;
432
dl.waitTicks = (u64)-1;
433
dl.interruptsEnabled = interruptsEnabled_;
434
dl.started = false;
435
dl.offsetAddr = 0;
436
dl.bboxResult = false;
437
dl.stackAddr = stackAddr;
438
439
if (args.IsValid() && args->context.IsValid())
440
dl.context = args->context;
441
else
442
dl.context = 0;
443
444
if (head) {
445
if (currentList) {
446
if (currentList->state != PSP_GE_DL_STATE_PAUSED)
447
return SCE_KERNEL_ERROR_INVALID_VALUE;
448
currentList->state = PSP_GE_DL_STATE_QUEUED;
449
// Make sure we clear the signal so we don't try to pause it again.
450
currentList->signal = PSP_GE_SIGNAL_NONE;
451
}
452
453
dl.state = PSP_GE_DL_STATE_PAUSED;
454
455
currentList = &dl;
456
dlQueue.push_front(id);
457
} else if (currentList) {
458
dl.state = PSP_GE_DL_STATE_QUEUED;
459
dlQueue.push_back(id);
460
} else {
461
dl.state = PSP_GE_DL_STATE_RUNNING;
462
currentList = &dl;
463
dlQueue.push_front(id);
464
465
drawCompleteTicks = (u64)-1;
466
467
// TODO save context when starting the list if param is set
468
ProcessDLQueue();
469
}
470
471
return id;
472
}
473
474
u32 GPUCommon::DequeueList(int listid) {
475
if (listid < 0 || listid >= DisplayListMaxCount || dls[listid].state == PSP_GE_DL_STATE_NONE)
476
return SCE_KERNEL_ERROR_INVALID_ID;
477
478
auto &dl = dls[listid];
479
if (dl.started)
480
return SCE_KERNEL_ERROR_BUSY;
481
482
dl.state = PSP_GE_DL_STATE_NONE;
483
484
if (listid == dlQueue.front())
485
PopDLQueue();
486
else
487
dlQueue.remove(listid);
488
489
dl.waitTicks = 0;
490
__GeTriggerWait(GPU_SYNC_LIST, listid);
491
492
CheckDrawSync();
493
494
return 0;
495
}
496
497
u32 GPUCommon::UpdateStall(int listid, u32 newstall) {
498
if (listid < 0 || listid >= DisplayListMaxCount || dls[listid].state == PSP_GE_DL_STATE_NONE)
499
return SCE_KERNEL_ERROR_INVALID_ID;
500
auto &dl = dls[listid];
501
if (dl.state == PSP_GE_DL_STATE_COMPLETED)
502
return SCE_KERNEL_ERROR_ALREADY;
503
504
dl.stall = newstall & 0x0FFFFFFF;
505
506
ProcessDLQueue();
507
508
return 0;
509
}
510
511
u32 GPUCommon::Continue() {
512
if (!currentList)
513
return 0;
514
515
if (currentList->state == PSP_GE_DL_STATE_PAUSED)
516
{
517
if (!isbreak) {
518
// TODO: Supposedly this returns SCE_KERNEL_ERROR_BUSY in some case, previously it had
519
// currentList->signal == PSP_GE_SIGNAL_HANDLER_PAUSE, but it doesn't reproduce.
520
521
currentList->state = PSP_GE_DL_STATE_RUNNING;
522
currentList->signal = PSP_GE_SIGNAL_NONE;
523
524
// TODO Restore context of DL is necessary
525
// TODO Restore BASE
526
527
// We have a list now, so it's not complete.
528
drawCompleteTicks = (u64)-1;
529
} else {
530
currentList->state = PSP_GE_DL_STATE_QUEUED;
531
currentList->signal = PSP_GE_SIGNAL_NONE;
532
}
533
}
534
else if (currentList->state == PSP_GE_DL_STATE_RUNNING)
535
{
536
if (sceKernelGetCompiledSdkVersion() >= 0x02000000)
537
return 0x80000020;
538
return -1;
539
}
540
else
541
{
542
if (sceKernelGetCompiledSdkVersion() >= 0x02000000)
543
return 0x80000004;
544
return -1;
545
}
546
547
ProcessDLQueue();
548
return 0;
549
}
550
551
u32 GPUCommon::Break(int mode) {
552
if (mode < 0 || mode > 1)
553
return SCE_KERNEL_ERROR_INVALID_MODE;
554
555
if (!currentList)
556
return SCE_KERNEL_ERROR_ALREADY;
557
558
if (mode == 1)
559
{
560
// Clear the queue
561
dlQueue.clear();
562
for (int i = 0; i < DisplayListMaxCount; ++i)
563
{
564
dls[i].state = PSP_GE_DL_STATE_NONE;
565
dls[i].signal = PSP_GE_SIGNAL_NONE;
566
}
567
568
nextListID = 0;
569
currentList = NULL;
570
return 0;
571
}
572
573
if (currentList->state == PSP_GE_DL_STATE_NONE || currentList->state == PSP_GE_DL_STATE_COMPLETED)
574
{
575
if (sceKernelGetCompiledSdkVersion() >= 0x02000000)
576
return 0x80000004;
577
return -1;
578
}
579
580
if (currentList->state == PSP_GE_DL_STATE_PAUSED)
581
{
582
if (sceKernelGetCompiledSdkVersion() > 0x02000010)
583
{
584
if (currentList->signal == PSP_GE_SIGNAL_HANDLER_PAUSE)
585
{
586
ERROR_LOG_REPORT(Log::G3D, "sceGeBreak: can't break signal-pausing list");
587
}
588
else
589
return SCE_KERNEL_ERROR_ALREADY;
590
}
591
return SCE_KERNEL_ERROR_BUSY;
592
}
593
594
if (currentList->state == PSP_GE_DL_STATE_QUEUED)
595
{
596
currentList->state = PSP_GE_DL_STATE_PAUSED;
597
return currentList->id;
598
}
599
600
// TODO Save BASE
601
// TODO Adjust pc to be just before SIGNAL/END
602
603
// TODO: Is this right?
604
if (currentList->signal == PSP_GE_SIGNAL_SYNC)
605
currentList->pc += 8;
606
607
currentList->interrupted = true;
608
currentList->state = PSP_GE_DL_STATE_PAUSED;
609
currentList->signal = PSP_GE_SIGNAL_HANDLER_SUSPEND;
610
isbreak = true;
611
612
return currentList->id;
613
}
614
615
void GPUCommon::NotifySteppingEnter() {
616
if (coreCollectDebugStats) {
617
timeSteppingStarted_ = time_now_d();
618
}
619
}
620
void GPUCommon::NotifySteppingExit() {
621
if (coreCollectDebugStats) {
622
if (timeSteppingStarted_ <= 0.0) {
623
ERROR_LOG(Log::G3D, "Mismatched stepping enter/exit.");
624
}
625
double total = time_now_d() - timeSteppingStarted_;
626
_dbg_assert_msg_(total >= 0.0, "Time spent stepping became negative");
627
timeSpentStepping_ += total;
628
timeSteppingStarted_ = 0.0;
629
}
630
}
631
632
bool GPUCommon::InterpretList(DisplayList &list) {
633
// Initialized to avoid a race condition with bShowDebugStats changing.
634
double start = 0.0;
635
if (coreCollectDebugStats) {
636
start = time_now_d();
637
}
638
639
if (list.state == PSP_GE_DL_STATE_PAUSED)
640
return false;
641
currentList = &list;
642
643
if (!list.started && list.context.IsValid()) {
644
gstate.Save(list.context);
645
}
646
list.started = true;
647
648
gstate_c.offsetAddr = list.offsetAddr;
649
650
if (!Memory::IsValidAddress(list.pc)) {
651
ERROR_LOG_REPORT(Log::G3D, "DL PC = %08x WTF!!!!", list.pc);
652
return true;
653
}
654
655
cycleLastPC = list.pc;
656
cyclesExecuted += 60;
657
downcount = list.stall == 0 ? 0x0FFFFFFF : (list.stall - list.pc) / 4;
658
list.state = PSP_GE_DL_STATE_RUNNING;
659
list.interrupted = false;
660
661
gpuState = list.pc == list.stall ? GPUSTATE_STALL : GPUSTATE_RUNNING;
662
663
// To enable breakpoints, we don't do fast matrix loads while debugger active.
664
debugRecording_ = GPUDebug::IsActive() || GPURecord::IsActive();
665
const bool useFastRunLoop = !dumpThisFrame_ && !debugRecording_;
666
while (gpuState == GPUSTATE_RUNNING) {
667
{
668
if (list.pc == list.stall) {
669
gpuState = GPUSTATE_STALL;
670
downcount = 0;
671
}
672
}
673
674
if (useFastRunLoop) {
675
FastRunLoop(list);
676
} else {
677
SlowRunLoop(list);
678
}
679
680
{
681
downcount = list.stall == 0 ? 0x0FFFFFFF : (list.stall - list.pc) / 4;
682
683
if (gpuState == GPUSTATE_STALL && list.stall != list.pc) {
684
// Unstalled.
685
gpuState = GPUSTATE_RUNNING;
686
}
687
}
688
}
689
690
FinishDeferred();
691
if (debugRecording_)
692
GPURecord::NotifyCPU();
693
694
// We haven't run the op at list.pc, so it shouldn't count.
695
if (cycleLastPC != list.pc) {
696
UpdatePC(list.pc - 4, list.pc);
697
}
698
699
list.offsetAddr = gstate_c.offsetAddr;
700
701
if (coreCollectDebugStats) {
702
double total = time_now_d() - start - timeSpentStepping_;
703
_dbg_assert_msg_(total >= 0.0, "Time spent DL processing became negative");
704
hleSetSteppingTime(timeSpentStepping_);
705
DisplayNotifySleep(timeSpentStepping_);
706
timeSpentStepping_ = 0.0;
707
gpuStats.msProcessingDisplayLists += total;
708
}
709
return gpuState == GPUSTATE_DONE || gpuState == GPUSTATE_ERROR;
710
}
711
712
void GPUCommon::PSPFrame() {
713
immCount_ = 0;
714
if (dumpNextFrame_) {
715
NOTICE_LOG(Log::G3D, "DUMPING THIS FRAME");
716
dumpThisFrame_ = true;
717
dumpNextFrame_ = false;
718
} else if (dumpThisFrame_) {
719
dumpThisFrame_ = false;
720
}
721
GPUDebug::NotifyBeginFrame();
722
GPURecord::NotifyBeginFrame();
723
}
724
725
bool GPUCommon::PresentedThisFrame() const {
726
return framebufferManager_ ? framebufferManager_->PresentedThisFrame() : true;
727
}
728
729
void GPUCommon::SlowRunLoop(DisplayList &list) {
730
const bool dumpThisFrame = dumpThisFrame_;
731
while (downcount > 0) {
732
bool process = GPUDebug::NotifyCommand(list.pc);
733
if (process) {
734
GPURecord::NotifyCommand(list.pc);
735
u32 op = Memory::ReadUnchecked_U32(list.pc);
736
u32 cmd = op >> 24;
737
738
u32 diff = op ^ gstate.cmdmem[cmd];
739
PreExecuteOp(op, diff);
740
if (dumpThisFrame) {
741
char temp[256];
742
u32 prev;
743
if (Memory::IsValidAddress(list.pc - 4)) {
744
prev = Memory::ReadUnchecked_U32(list.pc - 4);
745
} else {
746
prev = 0;
747
}
748
GeDisassembleOp(list.pc, op, prev, temp, 256);
749
NOTICE_LOG(Log::G3D, "%08x: %s", op, temp);
750
}
751
gstate.cmdmem[cmd] = op;
752
753
ExecuteOp(op, diff);
754
}
755
756
list.pc += 4;
757
--downcount;
758
}
759
}
760
761
// The newPC parameter is used for jumps, we don't count cycles between.
762
void GPUCommon::UpdatePC(u32 currentPC, u32 newPC) {
763
// Rough estimate, 2 CPU ticks (it's double the clock rate) per GPU instruction.
764
u32 executed = (currentPC - cycleLastPC) / 4;
765
cyclesExecuted += 2 * executed;
766
cycleLastPC = newPC;
767
768
// Exit the runloop and recalculate things. This happens a lot in some games.
769
if (currentList)
770
downcount = currentList->stall == 0 ? 0x0FFFFFFF : (currentList->stall - newPC) / 4;
771
else
772
downcount = 0;
773
}
774
775
void GPUCommon::ReapplyGfxState() {
776
// The commands are embedded in the command memory so we can just reexecute the words. Convenient.
777
// To be safe we pass 0xFFFFFFFF as the diff.
778
779
// TODO: Consider whether any of this should really be done. We might be able to get all the way
780
// by simplying dirtying the appropriate gstate_c dirty flags.
781
782
for (int i = GE_CMD_VERTEXTYPE; i < GE_CMD_BONEMATRIXNUMBER; i++) {
783
if (i != GE_CMD_ORIGIN && i != GE_CMD_OFFSETADDR) {
784
ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
785
}
786
}
787
788
// Can't write to bonematrixnumber here
789
790
for (int i = GE_CMD_MORPHWEIGHT0; i <= GE_CMD_PATCHFACING; i++) {
791
ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
792
}
793
794
// There are a few here in the middle that we shouldn't execute...
795
796
// 0x42 to 0xEA
797
for (int i = GE_CMD_VIEWPORTXSCALE; i < GE_CMD_TRANSFERSTART; i++) {
798
switch (i) {
799
case GE_CMD_LOADCLUT:
800
case GE_CMD_TEXSYNC:
801
case GE_CMD_TEXFLUSH:
802
break;
803
default:
804
ExecuteOp(gstate.cmdmem[i], 0xFFFFFFFF);
805
break;
806
}
807
}
808
809
// Let's just skip the transfer size stuff, it's just values.
810
}
811
812
uint32_t GPUCommon::SetAddrTranslation(uint32_t value) {
813
std::swap(edramTranslation_, value);
814
return value;
815
}
816
817
uint32_t GPUCommon::GetAddrTranslation() {
818
return edramTranslation_;
819
}
820
821
inline void GPUCommon::UpdateState(GPURunState state) {
822
gpuState = state;
823
if (state != GPUSTATE_RUNNING)
824
downcount = 0;
825
}
826
827
int GPUCommon::GetNextListIndex() {
828
auto iter = dlQueue.begin();
829
if (iter != dlQueue.end()) {
830
return *iter;
831
} else {
832
return -1;
833
}
834
}
835
836
void GPUCommon::ProcessDLQueue() {
837
startingTicks = CoreTiming::GetTicks();
838
cyclesExecuted = 0;
839
840
// Seems to be correct behaviour to process the list anyway?
841
if (startingTicks < busyTicks) {
842
DEBUG_LOG(Log::G3D, "Can't execute a list yet, still busy for %lld ticks", busyTicks - startingTicks);
843
//return;
844
}
845
846
for (int listIndex = GetNextListIndex(); listIndex != -1; listIndex = GetNextListIndex()) {
847
DisplayList &l = dls[listIndex];
848
DEBUG_LOG(Log::G3D, "Starting DL execution at %08x - stall = %08x", l.pc, l.stall);
849
if (!InterpretList(l)) {
850
return;
851
} else {
852
// Some other list could've taken the spot while we dilly-dallied around.
853
if (l.state != PSP_GE_DL_STATE_QUEUED) {
854
// At the end, we can remove it from the queue and continue.
855
dlQueue.erase(std::remove(dlQueue.begin(), dlQueue.end(), listIndex), dlQueue.end());
856
}
857
}
858
}
859
860
currentList = nullptr;
861
862
if (coreCollectDebugStats) {
863
gpuStats.otherGPUCycles += cyclesExecuted;
864
}
865
866
drawCompleteTicks = startingTicks + cyclesExecuted;
867
busyTicks = std::max(busyTicks, drawCompleteTicks);
868
__GeTriggerSync(GPU_SYNC_DRAW, 1, drawCompleteTicks);
869
// Since the event is in CoreTiming, we're in sync. Just set 0 now.
870
}
871
872
void GPUCommon::Execute_OffsetAddr(u32 op, u32 diff) {
873
gstate_c.offsetAddr = op << 8;
874
}
875
876
void GPUCommon::Execute_Vaddr(u32 op, u32 diff) {
877
gstate_c.vertexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
878
}
879
880
void GPUCommon::Execute_Iaddr(u32 op, u32 diff) {
881
gstate_c.indexAddr = gstate_c.getRelativeAddress(op & 0x00FFFFFF);
882
}
883
884
void GPUCommon::Execute_Origin(u32 op, u32 diff) {
885
if (currentList)
886
gstate_c.offsetAddr = currentList->pc;
887
}
888
889
void GPUCommon::Execute_Jump(u32 op, u32 diff) {
890
const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);
891
if (!Memory::IsValidAddress(target)) {
892
ERROR_LOG(Log::G3D, "JUMP to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);
893
UpdateState(GPUSTATE_ERROR);
894
return;
895
}
896
UpdatePC(currentList->pc, target - 4);
897
currentList->pc = target - 4; // pc will be increased after we return, counteract that
898
}
899
900
void GPUCommon::Execute_BJump(u32 op, u32 diff) {
901
if (!currentList->bboxResult) {
902
// bounding box jump.
903
const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);
904
gpuStats.numBBOXJumps++;
905
if (Memory::IsValidAddress(target)) {
906
UpdatePC(currentList->pc, target - 4);
907
currentList->pc = target - 4; // pc will be increased after we return, counteract that
908
} else {
909
ERROR_LOG(Log::G3D, "BJUMP to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);
910
UpdateState(GPUSTATE_ERROR);
911
}
912
}
913
}
914
915
void GPUCommon::Execute_Call(u32 op, u32 diff) {
916
PROFILE_THIS_SCOPE("gpu_call");
917
918
const u32 target = gstate_c.getRelativeAddress(op & 0x00FFFFFC);
919
if (!Memory::IsValidAddress(target)) {
920
ERROR_LOG(Log::G3D, "CALL to illegal address %08x - ignoring! data=%06x", target, op & 0x00FFFFFF);
921
if (g_Config.bIgnoreBadMemAccess) {
922
return;
923
}
924
UpdateState(GPUSTATE_ERROR);
925
return;
926
}
927
DoExecuteCall(target);
928
}
929
930
void GPUCommon::DoExecuteCall(u32 target) {
931
// Bone matrix optimization - many games will CALL a bone matrix (!).
932
// We don't optimize during recording - so the matrix data gets recorded.
933
if (!debugRecording_ && Memory::IsValidRange(target, 13 * 4) && (Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA) {
934
// Check for the end
935
if ((Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
936
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
937
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
938
// Yep, pretty sure this is a bone matrix call. Double check stall first.
939
if (target > currentList->stall || target + 12 * 4 < currentList->stall) {
940
FastLoadBoneMatrix(target);
941
return;
942
}
943
}
944
}
945
946
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
947
ERROR_LOG(Log::G3D, "CALL: Stack full!");
948
// TODO: UpdateState(GPUSTATE_ERROR) ?
949
} else {
950
auto &stackEntry = currentList->stack[currentList->stackptr++];
951
stackEntry.pc = currentList->pc + 4;
952
stackEntry.offsetAddr = gstate_c.offsetAddr;
953
// The base address is NOT saved/restored for a regular call.
954
UpdatePC(currentList->pc, target - 4);
955
currentList->pc = target - 4; // pc will be increased after we return, counteract that
956
}
957
}
958
959
void GPUCommon::Execute_Ret(u32 op, u32 diff) {
960
if (currentList->stackptr == 0) {
961
DEBUG_LOG(Log::G3D, "RET: Stack empty!");
962
} else {
963
auto &stackEntry = currentList->stack[--currentList->stackptr];
964
gstate_c.offsetAddr = stackEntry.offsetAddr;
965
// We always clear the top (uncached/etc.) bits
966
const u32 target = stackEntry.pc & 0x0FFFFFFF;
967
UpdatePC(currentList->pc, target - 4);
968
currentList->pc = target - 4;
969
#ifdef _DEBUG
970
if (!Memory::IsValidAddress(currentList->pc)) {
971
ERROR_LOG_REPORT(Log::G3D, "Invalid DL PC %08x on return", currentList->pc);
972
UpdateState(GPUSTATE_ERROR);
973
}
974
#endif
975
}
976
}
977
978
void GPUCommon::Execute_End(u32 op, u32 diff) {
979
if (flushOnParams_)
980
Flush();
981
982
const u32 prev = Memory::ReadUnchecked_U32(currentList->pc - 4);
983
UpdatePC(currentList->pc, currentList->pc);
984
// Count in a few extra cycles on END.
985
cyclesExecuted += 60;
986
987
switch (prev >> 24) {
988
case GE_CMD_SIGNAL:
989
{
990
// TODO: see http://code.google.com/p/jpcsp/source/detail?r=2935#
991
SignalBehavior behaviour = static_cast<SignalBehavior>((prev >> 16) & 0xFF);
992
const int signal = prev & 0xFFFF;
993
const int enddata = op & 0xFFFF;
994
bool trigger = true;
995
currentList->subIntrToken = signal;
996
997
switch (behaviour) {
998
case PSP_GE_SIGNAL_HANDLER_SUSPEND:
999
// Suspend the list, and call the signal handler. When it's done, resume.
1000
// Before sdkver 0x02000010, listsync should return paused.
1001
if (sceKernelGetCompiledSdkVersion() <= 0x02000010)
1002
currentList->state = PSP_GE_DL_STATE_PAUSED;
1003
currentList->signal = behaviour;
1004
DEBUG_LOG(Log::G3D, "Signal with wait. signal/end: %04x %04x", signal, enddata);
1005
break;
1006
case PSP_GE_SIGNAL_HANDLER_CONTINUE:
1007
// Resume the list right away, then call the handler.
1008
currentList->signal = behaviour;
1009
DEBUG_LOG(Log::G3D, "Signal without wait. signal/end: %04x %04x", signal, enddata);
1010
break;
1011
case PSP_GE_SIGNAL_HANDLER_PAUSE:
1012
// Pause the list instead of ending at the next FINISH.
1013
// Call the handler with the PAUSE signal value at that FINISH.
1014
// Technically, this ought to trigger an interrupt, but it won't do anything.
1015
// But right now, signal is always reset by interrupts, so that causes pause to not work.
1016
trigger = false;
1017
currentList->signal = behaviour;
1018
DEBUG_LOG(Log::G3D, "Signal with Pause. signal/end: %04x %04x", signal, enddata);
1019
break;
1020
case PSP_GE_SIGNAL_SYNC:
1021
// Acts as a memory barrier, never calls any user code.
1022
// Technically, this ought to trigger an interrupt, but it won't do anything.
1023
// Triggering here can cause incorrect rescheduling, which breaks 3rd Birthday.
1024
// However, this is likely a bug in how GE signal interrupts are handled.
1025
trigger = false;
1026
currentList->signal = behaviour;
1027
DEBUG_LOG(Log::G3D, "Signal with Sync. signal/end: %04x %04x", signal, enddata);
1028
break;
1029
case PSP_GE_SIGNAL_JUMP:
1030
case PSP_GE_SIGNAL_RJUMP:
1031
case PSP_GE_SIGNAL_OJUMP:
1032
{
1033
trigger = false;
1034
currentList->signal = behaviour;
1035
// pc will be increased after we return, counteract that.
1036
u32 target = (((signal << 16) | enddata) & 0xFFFFFFFC) - 4;
1037
const char *targetType = "absolute";
1038
if (behaviour == PSP_GE_SIGNAL_RJUMP) {
1039
target += currentList->pc - 4;
1040
targetType = "relative";
1041
} else if (behaviour == PSP_GE_SIGNAL_OJUMP) {
1042
target = gstate_c.getRelativeAddress(target);
1043
targetType = "origin";
1044
}
1045
1046
if (!Memory::IsValidAddress(target)) {
1047
ERROR_LOG_REPORT(Log::G3D, "Signal with Jump (%s): bad address. signal/end: %04x %04x", targetType, signal, enddata);
1048
UpdateState(GPUSTATE_ERROR);
1049
} else {
1050
UpdatePC(currentList->pc, target);
1051
currentList->pc = target;
1052
DEBUG_LOG(Log::G3D, "Signal with Jump (%s). signal/end: %04x %04x", targetType, signal, enddata);
1053
}
1054
}
1055
break;
1056
case PSP_GE_SIGNAL_CALL:
1057
case PSP_GE_SIGNAL_RCALL:
1058
case PSP_GE_SIGNAL_OCALL:
1059
{
1060
trigger = false;
1061
currentList->signal = behaviour;
1062
// pc will be increased after we return, counteract that.
1063
u32 target = (((signal << 16) | enddata) & 0xFFFFFFFC) - 4;
1064
const char *targetType = "absolute";
1065
if (behaviour == PSP_GE_SIGNAL_RCALL) {
1066
target += currentList->pc - 4;
1067
targetType = "relative";
1068
} else if (behaviour == PSP_GE_SIGNAL_OCALL) {
1069
target = gstate_c.getRelativeAddress(target);
1070
targetType = "origin";
1071
}
1072
1073
if (currentList->stackptr == ARRAY_SIZE(currentList->stack)) {
1074
ERROR_LOG_REPORT(Log::G3D, "Signal with Call (%s): stack full. signal/end: %04x %04x", targetType, signal, enddata);
1075
} else if (!Memory::IsValidAddress(target)) {
1076
ERROR_LOG_REPORT(Log::G3D, "Signal with Call (%s): bad address. signal/end: %04x %04x", targetType, signal, enddata);
1077
UpdateState(GPUSTATE_ERROR);
1078
} else {
1079
// TODO: This might save/restore other state...
1080
auto &stackEntry = currentList->stack[currentList->stackptr++];
1081
stackEntry.pc = currentList->pc;
1082
stackEntry.offsetAddr = gstate_c.offsetAddr;
1083
stackEntry.baseAddr = gstate.base;
1084
UpdatePC(currentList->pc, target);
1085
currentList->pc = target;
1086
DEBUG_LOG(Log::G3D, "Signal with Call (%s). signal/end: %04x %04x", targetType, signal, enddata);
1087
}
1088
}
1089
break;
1090
case PSP_GE_SIGNAL_RET:
1091
{
1092
trigger = false;
1093
currentList->signal = behaviour;
1094
if (currentList->stackptr == 0) {
1095
ERROR_LOG_REPORT(Log::G3D, "Signal with Return: stack empty. signal/end: %04x %04x", signal, enddata);
1096
} else {
1097
// TODO: This might save/restore other state...
1098
auto &stackEntry = currentList->stack[--currentList->stackptr];
1099
gstate_c.offsetAddr = stackEntry.offsetAddr;
1100
gstate.base = stackEntry.baseAddr;
1101
UpdatePC(currentList->pc, stackEntry.pc);
1102
currentList->pc = stackEntry.pc;
1103
DEBUG_LOG(Log::G3D, "Signal with Return. signal/end: %04x %04x", signal, enddata);
1104
}
1105
}
1106
break;
1107
default:
1108
ERROR_LOG_REPORT(Log::G3D, "UNKNOWN Signal UNIMPLEMENTED %i ! signal/end: %04x %04x", behaviour, signal, enddata);
1109
break;
1110
}
1111
// TODO: Technically, jump/call/ret should generate an interrupt, but before the pc change maybe?
1112
if (currentList->interruptsEnabled && trigger) {
1113
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
1114
currentList->pendingInterrupt = true;
1115
UpdateState(GPUSTATE_INTERRUPT);
1116
}
1117
}
1118
}
1119
break;
1120
case GE_CMD_FINISH:
1121
switch (currentList->signal) {
1122
case PSP_GE_SIGNAL_HANDLER_PAUSE:
1123
currentList->state = PSP_GE_DL_STATE_PAUSED;
1124
if (currentList->interruptsEnabled) {
1125
if (__GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
1126
currentList->pendingInterrupt = true;
1127
UpdateState(GPUSTATE_INTERRUPT);
1128
}
1129
}
1130
break;
1131
1132
case PSP_GE_SIGNAL_SYNC:
1133
currentList->signal = PSP_GE_SIGNAL_NONE;
1134
// TODO: Technically this should still cause an interrupt. Probably for memory sync.
1135
break;
1136
1137
default:
1138
FlushImm();
1139
currentList->subIntrToken = prev & 0xFFFF;
1140
UpdateState(GPUSTATE_DONE);
1141
// Since we marked done, we have to restore the context now before the next list runs.
1142
if (currentList->started && currentList->context.IsValid()) {
1143
gstate.Restore(currentList->context);
1144
ReapplyGfxState();
1145
// Don't restore the context again.
1146
currentList->started = false;
1147
}
1148
1149
if (currentList->interruptsEnabled && __GeTriggerInterrupt(currentList->id, currentList->pc, startingTicks + cyclesExecuted)) {
1150
currentList->pendingInterrupt = true;
1151
} else {
1152
currentList->state = PSP_GE_DL_STATE_COMPLETED;
1153
currentList->waitTicks = startingTicks + cyclesExecuted;
1154
busyTicks = std::max(busyTicks, currentList->waitTicks);
1155
__GeTriggerSync(GPU_SYNC_LIST, currentList->id, currentList->waitTicks);
1156
}
1157
break;
1158
}
1159
break;
1160
default:
1161
DEBUG_LOG(Log::G3D,"Ah, not finished: %06x", prev & 0xFFFFFF);
1162
break;
1163
}
1164
}
1165
1166
void GPUCommon::Execute_BoundingBox(u32 op, u32 diff) {
1167
// Just resetting, nothing to check bounds for.
1168
const u32 count = op & 0xFFFF;
1169
if (count == 0) {
1170
currentList->bboxResult = false;
1171
return;
1172
}
1173
1174
// Approximate based on timings of several counts on a PSP.
1175
cyclesExecuted += count * 22;
1176
1177
const bool useInds = (gstate.vertType & GE_VTYPE_IDX_MASK) != 0;
1178
VertexDecoder *dec = drawEngineCommon_->GetVertexDecoder(gstate.vertType);
1179
int bytesRead = (useInds ? 1 : dec->VertexSize()) * count;
1180
1181
if (Memory::IsValidRange(gstate_c.vertexAddr, bytesRead)) {
1182
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
1183
if (!control_points) {
1184
ERROR_LOG_REPORT_ONCE(boundingbox, Log::G3D, "Invalid verts in bounding box check");
1185
currentList->bboxResult = true;
1186
return;
1187
}
1188
1189
const void *inds = nullptr;
1190
if (useInds) {
1191
int indexShift = ((gstate.vertType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
1192
inds = Memory::GetPointerUnchecked(gstate_c.indexAddr);
1193
if (!inds || !Memory::IsValidRange(gstate_c.indexAddr, count << indexShift)) {
1194
ERROR_LOG_REPORT_ONCE(boundingboxInds, Log::G3D, "Invalid inds in bounding box check");
1195
currentList->bboxResult = true;
1196
return;
1197
}
1198
}
1199
1200
// Test if the bounding box is within the drawing region.
1201
// The PSP only seems to vary the result based on a single range of 0x100.
1202
if (count > 0x200) {
1203
// The second to last set of 0x100 is checked (even for odd counts.)
1204
size_t skipSize = (count - 0x200) * dec->VertexSize();
1205
currentList->bboxResult = drawEngineCommon_->TestBoundingBox((const uint8_t *)control_points + skipSize, inds, 0x100, gstate.vertType);
1206
} else if (count > 0x100) {
1207
int checkSize = count - 0x100;
1208
currentList->bboxResult = drawEngineCommon_->TestBoundingBox(control_points, inds, checkSize, gstate.vertType);
1209
} else {
1210
currentList->bboxResult = drawEngineCommon_->TestBoundingBox(control_points, inds, count, gstate.vertType);
1211
}
1212
AdvanceVerts(gstate.vertType, count, bytesRead);
1213
} else {
1214
ERROR_LOG_REPORT_ONCE(boundingbox, Log::G3D, "Bad bounding box data: %06x", count);
1215
// Data seems invalid. Let's assume the box test passed.
1216
currentList->bboxResult = true;
1217
}
1218
}
1219
1220
void GPUCommon::Execute_MorphWeight(u32 op, u32 diff) {
1221
gstate_c.morphWeights[(op >> 24) - GE_CMD_MORPHWEIGHT0] = getFloat24(op);
1222
}
1223
1224
void GPUCommon::Execute_ImmVertexAlphaPrim(u32 op, u32 diff) {
1225
// Safety check.
1226
if (immCount_ >= MAX_IMMBUFFER_SIZE) {
1227
// Only print once for each overrun.
1228
if (immCount_ == MAX_IMMBUFFER_SIZE) {
1229
ERROR_LOG_REPORT_ONCE(exceed_imm_buffer, Log::G3D, "Exceeded immediate draw buffer size. gstate.imm_ap=%06x , prim=%d", gstate.imm_ap & 0xFFFFFF, (int)immPrim_);
1230
}
1231
if (immCount_ < 0x7fffffff) // Paranoia :)
1232
immCount_++;
1233
return;
1234
}
1235
1236
int prim = (op >> 8) & 0x7;
1237
if (prim != GE_PRIM_KEEP_PREVIOUS) {
1238
// Flush before changing the prim type. Only continue can be used to continue a prim.
1239
FlushImm();
1240
}
1241
1242
TransformedVertex &v = immBuffer_[immCount_++];
1243
1244
// ThrillVille does a clear with this, additional parameters found via tests.
1245
// The current vtype affects how the coordinate is processed.
1246
if (gstate.isModeThrough()) {
1247
v.x = ((int)(gstate.imm_vscx & 0xFFFF) - 0x8000) / 16.0f;
1248
v.y = ((int)(gstate.imm_vscy & 0xFFFF) - 0x8000) / 16.0f;
1249
} else {
1250
int offsetX = gstate.getOffsetX16();
1251
int offsetY = gstate.getOffsetY16();
1252
v.x = ((int)(gstate.imm_vscx & 0xFFFF) - offsetX) / 16.0f;
1253
v.y = ((int)(gstate.imm_vscy & 0xFFFF) - offsetY) / 16.0f;
1254
}
1255
v.z = gstate.imm_vscz & 0xFFFF;
1256
v.pos_w = 1.0f;
1257
v.u = getFloat24(gstate.imm_vtcs);
1258
v.v = getFloat24(gstate.imm_vtct);
1259
v.uv_w = getFloat24(gstate.imm_vtcq);
1260
v.color0_32 = (gstate.imm_cv & 0xFFFFFF) | (gstate.imm_ap << 24);
1261
// TODO: When !gstate.isModeThrough(), direct fog coefficient (0 = entirely fog), ignore fog flag (also GE_IMM_FOG.)
1262
v.fog = (gstate.imm_fc & 0xFF) / 255.0f;
1263
// TODO: Apply if gstate.isUsingSecondaryColor() && !gstate.isModeThrough(), ignore lighting flag.
1264
v.color1_32 = gstate.imm_scv & 0xFFFFFF;
1265
if (prim != GE_PRIM_KEEP_PREVIOUS) {
1266
immPrim_ = (GEPrimitiveType)prim;
1267
// Flags seem to only be respected from the first prim.
1268
immFlags_ = op & 0x00FFF800;
1269
immFirstSent_ = false;
1270
} else if (prim == GE_PRIM_KEEP_PREVIOUS && immPrim_ != GE_PRIM_INVALID) {
1271
static constexpr int flushPrimCount[] = { 1, 2, 0, 3, 0, 0, 2, 0 };
1272
// Instead of finding a proper point to flush, we just emit prims when we can.
1273
if (immCount_ == flushPrimCount[immPrim_ & 7])
1274
FlushImm();
1275
} else {
1276
ERROR_LOG_REPORT_ONCE(imm_draw_prim, Log::G3D, "Immediate draw: Unexpected primitive %d at count %d", prim, immCount_);
1277
}
1278
}
1279
1280
void GPUCommon::FlushImm() {
1281
if (immCount_ == 0 || immPrim_ == GE_PRIM_INVALID)
1282
return;
1283
1284
SetDrawType(DRAW_PRIM, immPrim_);
1285
VirtualFramebuffer *vfb = nullptr;
1286
if (framebufferManager_)
1287
vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason);
1288
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
1289
// No idea how many cycles to skip, heh.
1290
immCount_ = 0;
1291
return;
1292
}
1293
gstate_c.UpdateUVScaleOffset();
1294
if (vfb) {
1295
CheckDepthUsage(vfb);
1296
}
1297
1298
bool antialias = (immFlags_ & GE_IMM_ANTIALIAS) != 0;
1299
bool prevAntialias = gstate.isAntiAliasEnabled();
1300
bool shading = (immFlags_ & GE_IMM_SHADING) != 0;
1301
bool prevShading = gstate.getShadeMode() == GE_SHADE_GOURAUD;
1302
bool cullEnable = (immFlags_ & GE_IMM_CULLENABLE) != 0;
1303
bool prevCullEnable = gstate.isCullEnabled();
1304
int cullMode = (immFlags_ & GE_IMM_CULLFACE) != 0 ? 1 : 0;
1305
bool texturing = (immFlags_ & GE_IMM_TEXTURE) != 0;
1306
bool prevTexturing = gstate.isTextureMapEnabled();
1307
bool fog = (immFlags_ & GE_IMM_FOG) != 0;
1308
bool prevFog = gstate.isFogEnabled();
1309
bool dither = (immFlags_ & GE_IMM_DITHER) != 0;
1310
bool prevDither = gstate.isDitherEnabled();
1311
1312
if ((immFlags_ & GE_IMM_CLIPMASK) != 0) {
1313
WARN_LOG_REPORT_ONCE(geimmclipvalue, Log::G3D, "Imm vertex used clip value, flags=%06x", immFlags_);
1314
}
1315
1316
bool changed = texturing != prevTexturing || cullEnable != prevCullEnable || dither != prevDither;
1317
changed = changed || prevShading != shading || prevFog != fog;
1318
if (changed) {
1319
DispatchFlush();
1320
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)antialias;
1321
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)shading;
1322
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)cullEnable;
1323
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)texturing;
1324
gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)fog;
1325
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)dither;
1326
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
1327
}
1328
1329
drawEngineCommon_->DispatchSubmitImm(immPrim_, immBuffer_, immCount_, cullMode, immFirstSent_);
1330
immCount_ = 0;
1331
immFirstSent_ = true;
1332
1333
if (changed) {
1334
DispatchFlush();
1335
gstate.antiAliasEnable = (GE_CMD_ANTIALIASENABLE << 24) | (int)prevAntialias;
1336
gstate.shademodel = (GE_CMD_SHADEMODE << 24) | (int)prevShading;
1337
gstate.cullfaceEnable = (GE_CMD_CULLFACEENABLE << 24) | (int)prevCullEnable;
1338
gstate.textureMapEnable = (GE_CMD_TEXTUREMAPENABLE << 24) | (int)prevTexturing;
1339
gstate.fogEnable = (GE_CMD_FOGENABLE << 24) | (int)prevFog;
1340
gstate.ditherEnable = (GE_CMD_DITHERENABLE << 24) | (int)prevDither;
1341
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_UVSCALEOFFSET | DIRTY_CULLRANGE);
1342
}
1343
}
1344
1345
void GPUCommon::Execute_Unknown(u32 op, u32 diff) {
1346
if ((op & 0xFFFFFF) != 0)
1347
WARN_LOG_REPORT_ONCE(unknowncmd, Log::G3D, "Unknown GE command : %08x ", op);
1348
}
1349
1350
void GPUCommon::FastLoadBoneMatrix(u32 target) {
1351
const u32 num = gstate.boneMatrixNumber & 0x7F;
1352
_dbg_assert_msg_(num + 12 <= 96, "FastLoadBoneMatrix would corrupt memory");
1353
const u32 mtxNum = num / 12;
1354
u32 uniformsToDirty = DIRTY_BONEMATRIX0 << mtxNum;
1355
if (num != 12 * mtxNum) {
1356
uniformsToDirty |= DIRTY_BONEMATRIX0 << ((mtxNum + 1) & 7);
1357
}
1358
1359
if (!g_Config.bSoftwareSkinning) {
1360
if (flushOnParams_)
1361
Flush();
1362
gstate_c.Dirty(uniformsToDirty);
1363
} else {
1364
gstate_c.deferredVertTypeDirty |= uniformsToDirty;
1365
}
1366
gstate.FastLoadBoneMatrix(target);
1367
1368
cyclesExecuted += 2 * 14; // one to reset the counter, 12 to load the matrix, and a return.
1369
1370
if (coreCollectDebugStats) {
1371
gpuStats.otherGPUCycles += 2 * 14;
1372
}
1373
}
1374
1375
struct DisplayList_v1 {
1376
int id;
1377
u32 startpc;
1378
u32 pc;
1379
u32 stall;
1380
DisplayListState state;
1381
SignalBehavior signal;
1382
int subIntrBase;
1383
u16 subIntrToken;
1384
DisplayListStackEntry stack[32];
1385
int stackptr;
1386
bool interrupted;
1387
u64 waitTicks;
1388
bool interruptsEnabled;
1389
bool pendingInterrupt;
1390
bool started;
1391
size_t contextPtr;
1392
u32 offsetAddr;
1393
bool bboxResult;
1394
};
1395
1396
struct DisplayList_v2 {
1397
int id;
1398
u32 startpc;
1399
u32 pc;
1400
u32 stall;
1401
DisplayListState state;
1402
SignalBehavior signal;
1403
int subIntrBase;
1404
u16 subIntrToken;
1405
DisplayListStackEntry stack[32];
1406
int stackptr;
1407
bool interrupted;
1408
u64 waitTicks;
1409
bool interruptsEnabled;
1410
bool pendingInterrupt;
1411
bool started;
1412
PSPPointer<u32_le> context;
1413
u32 offsetAddr;
1414
bool bboxResult;
1415
};
1416
1417
void GPUCommon::DoState(PointerWrap &p) {
1418
auto s = p.Section("GPUCommon", 1, 6);
1419
if (!s)
1420
return;
1421
1422
Do<int>(p, dlQueue);
1423
if (s >= 4) {
1424
DoArray(p, dls, ARRAY_SIZE(dls));
1425
} else if (s >= 3) {
1426
// This may have been saved with or without padding, depending on platform.
1427
// We need to upconvert it to our consistently-padded struct.
1428
static const size_t DisplayList_v3_size = 452;
1429
static const size_t DisplayList_v4_size = 456;
1430
static_assert(DisplayList_v4_size == sizeof(DisplayList), "Make sure to change here when updating DisplayList");
1431
1432
p.DoVoid(&dls[0], DisplayList_v3_size);
1433
dls[0].padding = 0;
1434
1435
const u8 *savedPtr = *p.GetPPtr();
1436
const u32 *savedPtr32 = (const u32 *)savedPtr;
1437
// Here's the trick: the first member (id) is always the same as the index.
1438
// The second member (startpc) is always an address, or 0, never 1. So we can see the padding.
1439
const bool hasPadding = savedPtr32[1] == 1;
1440
if (hasPadding) {
1441
u32 padding;
1442
Do(p, padding);
1443
}
1444
1445
for (size_t i = 1; i < ARRAY_SIZE(dls); ++i) {
1446
p.DoVoid(&dls[i], DisplayList_v3_size);
1447
dls[i].padding = 0;
1448
if (hasPadding) {
1449
u32 padding;
1450
Do(p, padding);
1451
}
1452
}
1453
} else if (s >= 2) {
1454
for (size_t i = 0; i < ARRAY_SIZE(dls); ++i) {
1455
DisplayList_v2 oldDL;
1456
Do(p, oldDL);
1457
// Copy over everything except the last, new member (stackAddr.)
1458
memcpy(&dls[i], &oldDL, sizeof(DisplayList_v2));
1459
dls[i].stackAddr = 0;
1460
}
1461
} else {
1462
// Can only be in read mode here.
1463
for (size_t i = 0; i < ARRAY_SIZE(dls); ++i) {
1464
DisplayList_v1 oldDL;
1465
Do(p, oldDL);
1466
// On 32-bit, they're the same, on 64-bit oldDL is bigger.
1467
memcpy(&dls[i], &oldDL, sizeof(DisplayList_v1));
1468
// Fix the other fields. Let's hope context wasn't important, it was a pointer.
1469
dls[i].context = 0;
1470
dls[i].offsetAddr = oldDL.offsetAddr;
1471
dls[i].bboxResult = oldDL.bboxResult;
1472
dls[i].stackAddr = 0;
1473
}
1474
}
1475
int currentID = 0;
1476
if (currentList != nullptr) {
1477
currentID = (int)(currentList - &dls[0]);
1478
}
1479
Do(p, currentID);
1480
if (currentID == 0) {
1481
currentList = nullptr;
1482
} else {
1483
currentList = &dls[currentID];
1484
}
1485
Do(p, interruptRunning);
1486
Do(p, gpuState);
1487
Do(p, isbreak);
1488
Do(p, drawCompleteTicks);
1489
Do(p, busyTicks);
1490
1491
if (s >= 5) {
1492
Do(p, matrixVisible.all);
1493
}
1494
if (s >= 6) {
1495
Do(p, edramTranslation_);
1496
}
1497
}
1498
1499
void GPUCommon::InterruptStart(int listid) {
1500
interruptRunning = true;
1501
}
1502
void GPUCommon::InterruptEnd(int listid) {
1503
interruptRunning = false;
1504
isbreak = false;
1505
1506
DisplayList &dl = dls[listid];
1507
dl.pendingInterrupt = false;
1508
// TODO: Unless the signal handler could change it?
1509
if (dl.state == PSP_GE_DL_STATE_COMPLETED || dl.state == PSP_GE_DL_STATE_NONE) {
1510
if (dl.started && dl.context.IsValid()) {
1511
gstate.Restore(dl.context);
1512
ReapplyGfxState();
1513
}
1514
dl.waitTicks = 0;
1515
__GeTriggerWait(GPU_SYNC_LIST, listid);
1516
1517
// Make sure the list isn't still queued since it's now completed.
1518
if (!dlQueue.empty()) {
1519
if (listid == dlQueue.front())
1520
PopDLQueue();
1521
else
1522
dlQueue.remove(listid);
1523
}
1524
}
1525
1526
ProcessDLQueue();
1527
}
1528
1529
// TODO: Maybe cleaner to keep this in GE and trigger the clear directly?
1530
void GPUCommon::SyncEnd(GPUSyncType waitType, int listid, bool wokeThreads) {
1531
if (waitType == GPU_SYNC_DRAW && wokeThreads)
1532
{
1533
for (int i = 0; i < DisplayListMaxCount; ++i) {
1534
if (dls[i].state == PSP_GE_DL_STATE_COMPLETED) {
1535
dls[i].state = PSP_GE_DL_STATE_NONE;
1536
}
1537
}
1538
}
1539
}
1540
1541
bool GPUCommon::GetCurrentDisplayList(DisplayList &list) {
1542
if (!currentList) {
1543
return false;
1544
}
1545
list = *currentList;
1546
return true;
1547
}
1548
1549
std::vector<DisplayList> GPUCommon::ActiveDisplayLists() {
1550
std::vector<DisplayList> result;
1551
1552
for (int it : dlQueue) {
1553
result.push_back(dls[it]);
1554
}
1555
1556
return result;
1557
}
1558
1559
void GPUCommon::ResetListPC(int listID, u32 pc) {
1560
if (listID < 0 || listID >= DisplayListMaxCount) {
1561
_dbg_assert_msg_(false, "listID out of range: %d", listID);
1562
return;
1563
}
1564
1565
Reporting::NotifyDebugger();
1566
dls[listID].pc = pc;
1567
downcount = 0;
1568
}
1569
1570
void GPUCommon::ResetListStall(int listID, u32 stall) {
1571
if (listID < 0 || listID >= DisplayListMaxCount) {
1572
_dbg_assert_msg_(false, "listID out of range: %d", listID);
1573
return;
1574
}
1575
1576
Reporting::NotifyDebugger();
1577
dls[listID].stall = stall;
1578
downcount = 0;
1579
}
1580
1581
void GPUCommon::ResetListState(int listID, DisplayListState state) {
1582
if (listID < 0 || listID >= DisplayListMaxCount) {
1583
_dbg_assert_msg_(false, "listID out of range: %d", listID);
1584
return;
1585
}
1586
1587
Reporting::NotifyDebugger();
1588
dls[listID].state = state;
1589
downcount = 0;
1590
}
1591
1592
GPUDebugOp GPUCommon::DissassembleOp(u32 pc, u32 op) {
1593
char buffer[1024];
1594
u32 prev = Memory::IsValidAddress(pc - 4) ? Memory::ReadUnchecked_U32(pc - 4) : 0;
1595
GeDisassembleOp(pc, op, prev, buffer, sizeof(buffer));
1596
1597
GPUDebugOp info;
1598
info.pc = pc;
1599
info.cmd = op >> 24;
1600
info.op = op;
1601
info.desc = buffer;
1602
return info;
1603
}
1604
1605
std::vector<GPUDebugOp> GPUCommon::DissassembleOpRange(u32 startpc, u32 endpc) {
1606
char buffer[1024];
1607
std::vector<GPUDebugOp> result;
1608
GPUDebugOp info;
1609
1610
// Don't trigger a pause.
1611
u32 prev = Memory::IsValidAddress(startpc - 4) ? Memory::Read_U32(startpc - 4) : 0;
1612
result.reserve((endpc - startpc) / 4);
1613
for (u32 pc = startpc; pc < endpc; pc += 4) {
1614
u32 op = Memory::IsValidAddress(pc) ? Memory::Read_U32(pc) : 0;
1615
GeDisassembleOp(pc, op, prev, buffer, sizeof(buffer));
1616
prev = op;
1617
1618
info.pc = pc;
1619
info.cmd = op >> 24;
1620
info.op = op;
1621
info.desc = buffer;
1622
result.push_back(info);
1623
}
1624
return result;
1625
}
1626
1627
u32 GPUCommon::GetRelativeAddress(u32 data) {
1628
return gstate_c.getRelativeAddress(data);
1629
}
1630
1631
u32 GPUCommon::GetVertexAddress() {
1632
return gstate_c.vertexAddr;
1633
}
1634
1635
u32 GPUCommon::GetIndexAddress() {
1636
return gstate_c.indexAddr;
1637
}
1638
1639
GPUgstate GPUCommon::GetGState() {
1640
return gstate;
1641
}
1642
1643
void GPUCommon::SetCmdValue(u32 op) {
1644
u32 cmd = op >> 24;
1645
u32 diff = op ^ gstate.cmdmem[cmd];
1646
1647
Reporting::NotifyDebugger();
1648
PreExecuteOp(op, diff);
1649
gstate.cmdmem[cmd] = op;
1650
ExecuteOp(op, diff);
1651
downcount = 0;
1652
}
1653
1654
void GPUCommon::DoBlockTransfer(u32 skipDrawReason) {
1655
u32 srcBasePtr = gstate.getTransferSrcAddress();
1656
u32 srcStride = gstate.getTransferSrcStride();
1657
1658
u32 dstBasePtr = gstate.getTransferDstAddress();
1659
u32 dstStride = gstate.getTransferDstStride();
1660
1661
int srcX = gstate.getTransferSrcX();
1662
int srcY = gstate.getTransferSrcY();
1663
1664
int dstX = gstate.getTransferDstX();
1665
int dstY = gstate.getTransferDstY();
1666
1667
int width = gstate.getTransferWidth();
1668
int height = gstate.getTransferHeight();
1669
1670
int bpp = gstate.getTransferBpp();
1671
1672
DEBUG_LOG(Log::G3D, "Block transfer: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
1673
gpuStats.numBlockTransfers++;
1674
1675
// For VRAM, we wrap around when outside valid memory (mirrors still work.)
1676
if ((srcBasePtr & 0x04800000) == 0x04800000)
1677
srcBasePtr &= ~0x00800000;
1678
if ((dstBasePtr & 0x04800000) == 0x04800000)
1679
dstBasePtr &= ~0x00800000;
1680
1681
// Use height less one to account for width, which can be greater or less than stride, and then add it on for the last line.
1682
// NOTE: The sizes are only used for validity checks and memory info tracking.
1683
const uint32_t src = srcBasePtr + (srcY * srcStride + srcX) * bpp;
1684
const uint32_t dst = dstBasePtr + (dstY * dstStride + dstX) * bpp;
1685
const uint32_t srcSize = ((height - 1) * srcStride) + width * bpp;
1686
const uint32_t dstSize = ((height - 1) * dstStride) + width * bpp;
1687
1688
bool srcDstOverlap = src + srcSize > dst && dst + dstSize > src;
1689
bool srcValid = Memory::IsValidRange(src, srcSize);
1690
bool dstValid = Memory::IsValidRange(dst, dstSize);
1691
bool srcWraps = Memory::IsVRAMAddress(srcBasePtr) && !srcValid;
1692
bool dstWraps = Memory::IsVRAMAddress(dstBasePtr) && !dstValid;
1693
1694
char tag[128];
1695
size_t tagSize;
1696
1697
// Tell the framebuffer manager to take action if possible. If it does the entire thing, let's just return.
1698
if (!framebufferManager_ || !framebufferManager_->NotifyBlockTransferBefore(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason)) {
1699
// Do the copy! (Hm, if we detect a drawn video frame (see below) then we could maybe skip this?)
1700
// Can use GetPointerUnchecked because we checked the addresses above. We could also avoid them
1701
// entirely by walking a couple of pointers...
1702
1703
// Simple case: just a straight copy, no overlap or wrapping.
1704
if (srcStride == dstStride && (u32)width == srcStride && !srcDstOverlap && srcValid && dstValid) {
1705
u32 srcLineStartAddr = srcBasePtr + (srcY * srcStride + srcX) * bpp;
1706
u32 dstLineStartAddr = dstBasePtr + (dstY * dstStride + dstX) * bpp;
1707
u32 bytesToCopy = width * height * bpp;
1708
1709
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
1710
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
1711
memcpy(dstp, srcp, bytesToCopy);
1712
1713
if (MemBlockInfoDetailed(bytesToCopy)) {
1714
NotifyMemInfoCopy(dst, src, bytesToCopy, "GPUBlockTransfer/");
1715
}
1716
} else if ((srcDstOverlap || srcWraps || dstWraps) && (srcValid || srcWraps) && (dstValid || dstWraps)) {
1717
// This path means we have either src/dst overlap, OR one or both of src and dst wrap.
1718
// This should be uncommon so it's the slowest path.
1719
u32 bytesToCopy = width * bpp;
1720
bool notifyDetail = MemBlockInfoDetailed(srcWraps || dstWraps ? 64 : bytesToCopy);
1721
bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);
1722
if (notifyDetail || notifyAll) {
1723
tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUBlockTransfer/", src, srcSize);
1724
}
1725
1726
auto notifyingMemmove = [&](u32 d, u32 s, u32 sz) {
1727
const u8 *srcp = Memory::GetPointer(s);
1728
u8 *dstp = Memory::GetPointerWrite(d);
1729
memmove(dstp, srcp, sz);
1730
1731
if (notifyDetail) {
1732
NotifyMemInfo(MemBlockFlags::READ, s, sz, tag, tagSize);
1733
NotifyMemInfo(MemBlockFlags::WRITE, d, sz, tag, tagSize);
1734
}
1735
};
1736
1737
for (int y = 0; y < height; y++) {
1738
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
1739
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
1740
// If we already passed a wrap, we can use the quicker path.
1741
if ((srcLineStartAddr & 0x04800000) == 0x04800000)
1742
srcLineStartAddr &= ~0x00800000;
1743
if ((dstLineStartAddr & 0x04800000) == 0x04800000)
1744
dstLineStartAddr &= ~0x00800000;
1745
// These flags mean there's a wrap inside this line.
1746
bool srcLineWrap = !Memory::IsValidRange(srcLineStartAddr, bytesToCopy);
1747
bool dstLineWrap = !Memory::IsValidRange(dstLineStartAddr, bytesToCopy);
1748
1749
if (!srcLineWrap && !dstLineWrap) {
1750
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
1751
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
1752
for (u32 i = 0; i < bytesToCopy; i += 64) {
1753
u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;
1754
memmove(dstp + i, srcp + i, chunk);
1755
}
1756
1757
// If we're tracking detail, it's useful to have the gaps illustrated properly.
1758
if (notifyDetail) {
1759
NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag, tagSize);
1760
NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag, tagSize);
1761
}
1762
} else {
1763
// We can wrap at any point, so along with overlap this gets a bit complicated.
1764
// We're just going to do this the slow and easy way.
1765
u32 srcLinePos = srcLineStartAddr;
1766
u32 dstLinePos = dstLineStartAddr;
1767
for (u32 i = 0; i < bytesToCopy; i += 64) {
1768
u32 chunk = i + 64 > bytesToCopy ? bytesToCopy - i : 64;
1769
u32 srcValid = Memory::ValidSize(srcLinePos, chunk);
1770
u32 dstValid = Memory::ValidSize(dstLinePos, chunk);
1771
1772
// First chunk, for which both are valid.
1773
u32 bothSize = std::min(srcValid, dstValid);
1774
if (bothSize != 0)
1775
notifyingMemmove(dstLinePos, srcLinePos, bothSize);
1776
1777
// Now, whichever side has more valid (or the rest, if only one side must wrap.)
1778
u32 exclusiveSize = std::max(srcValid, dstValid) - bothSize;
1779
if (exclusiveSize != 0 && srcValid >= dstValid) {
1780
notifyingMemmove(PSP_GetVidMemBase(), srcLineStartAddr + bothSize, exclusiveSize);
1781
} else if (exclusiveSize != 0 && srcValid < dstValid) {
1782
notifyingMemmove(dstLineStartAddr + bothSize, PSP_GetVidMemBase(), exclusiveSize);
1783
}
1784
1785
// Finally, if both src and dst wrapped, that portion.
1786
u32 wrappedSize = chunk - bothSize - exclusiveSize;
1787
if (wrappedSize != 0 && srcValid >= dstValid) {
1788
notifyingMemmove(PSP_GetVidMemBase() + exclusiveSize, PSP_GetVidMemBase(), wrappedSize);
1789
} else if (wrappedSize != 0 && srcValid < dstValid) {
1790
notifyingMemmove(PSP_GetVidMemBase(), PSP_GetVidMemBase() + exclusiveSize, wrappedSize);
1791
}
1792
1793
srcLinePos += chunk;
1794
dstLinePos += chunk;
1795
if ((srcLinePos & 0x04800000) == 0x04800000)
1796
srcLinePos &= ~0x00800000;
1797
if ((dstLinePos & 0x04800000) == 0x04800000)
1798
dstLinePos &= ~0x00800000;
1799
}
1800
}
1801
}
1802
1803
if (notifyAll) {
1804
if (srcWraps) {
1805
u32 validSize = Memory::ValidSize(src, srcSize);
1806
NotifyMemInfo(MemBlockFlags::READ, src, validSize, tag, tagSize);
1807
NotifyMemInfo(MemBlockFlags::READ, PSP_GetVidMemBase(), srcSize - validSize, tag, tagSize);
1808
} else {
1809
NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag, tagSize);
1810
}
1811
if (dstWraps) {
1812
u32 validSize = Memory::ValidSize(dst, dstSize);
1813
NotifyMemInfo(MemBlockFlags::WRITE, dst, validSize, tag, tagSize);
1814
NotifyMemInfo(MemBlockFlags::WRITE, PSP_GetVidMemBase(), dstSize - validSize, tag, tagSize);
1815
} else {
1816
NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag, tagSize);
1817
}
1818
}
1819
} else if (srcValid && dstValid) {
1820
u32 bytesToCopy = width * bpp;
1821
bool notifyDetail = MemBlockInfoDetailed(bytesToCopy);
1822
bool notifyAll = !notifyDetail && MemBlockInfoDetailed(srcSize, dstSize);
1823
if (notifyDetail || notifyAll) {
1824
tagSize = FormatMemWriteTagAt(tag, sizeof(tag), "GPUBlockTransfer/", src, srcSize);
1825
}
1826
1827
for (int y = 0; y < height; y++) {
1828
u32 srcLineStartAddr = srcBasePtr + ((y + srcY) * srcStride + srcX) * bpp;
1829
u32 dstLineStartAddr = dstBasePtr + ((y + dstY) * dstStride + dstX) * bpp;
1830
1831
const u8 *srcp = Memory::GetPointer(srcLineStartAddr);
1832
u8 *dstp = Memory::GetPointerWrite(dstLineStartAddr);
1833
memcpy(dstp, srcp, bytesToCopy);
1834
1835
// If we're tracking detail, it's useful to have the gaps illustrated properly.
1836
if (notifyDetail) {
1837
NotifyMemInfo(MemBlockFlags::READ, srcLineStartAddr, bytesToCopy, tag, tagSize);
1838
NotifyMemInfo(MemBlockFlags::WRITE, dstLineStartAddr, bytesToCopy, tag, tagSize);
1839
}
1840
}
1841
1842
if (notifyAll) {
1843
NotifyMemInfo(MemBlockFlags::READ, src, srcSize, tag, tagSize);
1844
NotifyMemInfo(MemBlockFlags::WRITE, dst, dstSize, tag, tagSize);
1845
}
1846
} else {
1847
// This seems to cause the GE to require a break/reset on a PSP.
1848
// TODO: Handle that and figure out which bytes are still copied?
1849
ERROR_LOG_REPORT_ONCE(invalidtransfer, Log::G3D, "Block transfer invalid: %08x/%x -> %08x/%x, %ix%ix%i (%i,%i)->(%i,%i)", srcBasePtr, srcStride, dstBasePtr, dstStride, width, height, bpp, srcX, srcY, dstX, dstY);
1850
}
1851
1852
if (framebufferManager_) {
1853
// Fixes Gran Turismo's funky text issue, since it overwrites the current texture.
1854
textureCache_->Invalidate(dstBasePtr + (dstY * dstStride + dstX) * bpp, height * dstStride * bpp, GPU_INVALIDATE_HINT);
1855
framebufferManager_->NotifyBlockTransferAfter(dstBasePtr, dstStride, dstX, dstY, srcBasePtr, srcStride, srcX, srcY, width, height, bpp, skipDrawReason);
1856
}
1857
}
1858
1859
// TODO: Correct timing appears to be 1.9, but erring a bit low since some of our other timing is inaccurate.
1860
cyclesExecuted += ((height * width * bpp) * 16) / 10;
1861
}
1862
1863
bool GPUCommon::PerformMemoryCopy(u32 dest, u32 src, int size, GPUCopyFlag flags) {
1864
// Track stray copies of a framebuffer in RAM. MotoGP does this.
1865
if (framebufferManager_->MayIntersectFramebufferColor(src) || framebufferManager_->MayIntersectFramebufferColor(dest)) {
1866
if (!framebufferManager_->NotifyFramebufferCopy(src, dest, size, flags, gstate_c.skipDrawReason)) {
1867
// We use matching values in PerformReadbackToMemory/PerformWriteColorFromMemory.
1868
// Since they're identical we don't need to copy.
1869
if (dest != src) {
1870
if (Memory::IsValidRange(dest, size) && Memory::IsValidRange(src, size)) {
1871
memcpy(Memory::GetPointerWriteUnchecked(dest), Memory::GetPointerUnchecked(src), size);
1872
}
1873
if (MemBlockInfoDetailed(size)) {
1874
NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/");
1875
}
1876
}
1877
}
1878
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
1879
return true;
1880
}
1881
1882
if (MemBlockInfoDetailed(size)) {
1883
NotifyMemInfoCopy(dest, src, size, "GPUMemcpy/");
1884
}
1885
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
1886
if (!(flags & GPUCopyFlag::DEBUG_NOTIFIED))
1887
GPURecord::NotifyMemcpy(dest, src, size);
1888
return false;
1889
}
1890
1891
bool GPUCommon::PerformMemorySet(u32 dest, u8 v, int size) {
1892
// This may indicate a memset, usually to 0, of a framebuffer.
1893
if (framebufferManager_->MayIntersectFramebufferColor(dest)) {
1894
Memory::Memset(dest, v, size, "GPUMemset");
1895
if (!framebufferManager_->NotifyFramebufferCopy(dest, dest, size, GPUCopyFlag::MEMSET, gstate_c.skipDrawReason)) {
1896
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
1897
}
1898
return true;
1899
}
1900
1901
NotifyMemInfo(MemBlockFlags::WRITE, dest, size, "GPUMemset");
1902
// Or perhaps a texture, let's invalidate.
1903
InvalidateCache(dest, size, GPU_INVALIDATE_HINT);
1904
GPURecord::NotifyMemset(dest, v, size);
1905
return false;
1906
}
1907
1908
bool GPUCommon::PerformReadbackToMemory(u32 dest, int size) {
1909
if (Memory::IsVRAMAddress(dest)) {
1910
return PerformMemoryCopy(dest, dest, size, GPUCopyFlag::FORCE_DST_MATCH_MEM);
1911
}
1912
return false;
1913
}
1914
1915
bool GPUCommon::PerformWriteColorFromMemory(u32 dest, int size) {
1916
if (Memory::IsVRAMAddress(dest)) {
1917
GPURecord::NotifyUpload(dest, size);
1918
return PerformMemoryCopy(dest, dest, size, GPUCopyFlag::FORCE_SRC_MATCH_MEM | GPUCopyFlag::DEBUG_NOTIFIED);
1919
}
1920
return false;
1921
}
1922
1923
void GPUCommon::PerformWriteFormattedFromMemory(u32 addr, int size, int frameWidth, GEBufferFormat format) {
1924
if (Memory::IsVRAMAddress(addr)) {
1925
framebufferManager_->PerformWriteFormattedFromMemory(addr, size, frameWidth, format);
1926
}
1927
textureCache_->NotifyWriteFormattedFromMemory(addr, size, frameWidth, format);
1928
InvalidateCache(addr, size, GPU_INVALIDATE_SAFE);
1929
}
1930
1931
bool GPUCommon::PerformWriteStencilFromMemory(u32 dest, int size, WriteStencil flags) {
1932
if (framebufferManager_->MayIntersectFramebufferColor(dest)) {
1933
framebufferManager_->PerformWriteStencilFromMemory(dest, size, flags);
1934
return true;
1935
}
1936
return false;
1937
}
1938
1939
bool GPUCommon::GetCurrentSimpleVertices(int count, std::vector<GPUDebugVertex> &vertices, std::vector<u16> &indices) {
1940
gstate_c.UpdateUVScaleOffset();
1941
return drawEngineCommon_->GetCurrentSimpleVertices(count, vertices, indices);
1942
}
1943
1944
bool GPUCommon::DescribeCodePtr(const u8 *ptr, std::string &name) {
1945
// The only part of GPU emulation (other than software) that jits is the vertex decoder, currently,
1946
// which is owned by the drawengine.
1947
return drawEngineCommon_->DescribeCodePtr(ptr, name);
1948
}
1949
1950