CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Debugger/Playback.cpp
Views: 1401
1
// Copyright (c) 2017- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <algorithm>
19
#include <cstring>
20
#include <functional>
21
#include <mutex>
22
#include <vector>
23
#include <snappy-c.h>
24
#include <zstd.h>
25
#include "Common/Profiler/Profiler.h"
26
#include "Common/CommonTypes.h"
27
#include "Common/Log.h"
28
#include "Core/Config.h"
29
#include "Core/Core.h"
30
#include "Core/CoreTiming.h"
31
#include "Core/Debugger/MemBlockInfo.h"
32
#include "Core/ELF/ParamSFO.h"
33
#include "Core/FileSystems/MetaFileSystem.h"
34
#include "Core/HLE/sceDisplay.h"
35
#include "Core/HLE/sceKernelMemory.h"
36
#include "Core/MemMap.h"
37
#include "Core/MIPS/MIPS.h"
38
#include "Core/System.h"
39
#include "GPU/GPUInterface.h"
40
#include "GPU/GPUState.h"
41
#include "GPU/ge_constants.h"
42
#include "GPU/Debugger/Playback.h"
43
#include "GPU/Debugger/Record.h"
44
#include "GPU/Debugger/RecordFormat.h"
45
46
namespace GPURecord {
47
48
static std::string lastExecFilename;
49
static uint32_t lastExecVersion;
50
static std::vector<Command> lastExecCommands;
51
static std::vector<u8> lastExecPushbuf;
52
static std::mutex executeLock;
53
54
// This class maps pushbuffer (dump data) sections to PSP memory.
55
// Dumps can be larger than available PSP memory, because they include generated data too.
56
//
57
// If possible, it maps to dynamically allocated "slabs" so nearby access is fast.
58
// Otherwise it uses "extra" allocations to manage sections that straddle two slabs.
59
// Slabs are managed with LRU, extra buffers are round-robin.
60
class BufMapping {
61
public:
62
BufMapping(const std::vector<u8> &pushbuf) : pushbuf_(pushbuf) {
63
}
64
65
// Returns a pointer to contiguous memory for this access, or else 0 (failure).
66
u32 Map(u32 bufpos, u32 sz, const std::function<void()> &flush);
67
68
// Clear and reset allocations made.
69
void Reset() {
70
slabGeneration_ = 0;
71
extraOffset_ = 0;
72
for (int i = 0; i < SLAB_COUNT; ++i) {
73
slabs_[i].Free();
74
}
75
for (int i = 0; i < EXTRA_COUNT; ++i) {
76
extra_[i].Free();
77
}
78
}
79
80
protected:
81
u32 MapSlab(u32 bufpos, const std::function<void()> &flush);
82
u32 MapExtra(u32 bufpos, u32 sz, const std::function<void()> &flush);
83
84
enum {
85
// These numbers kept low because we only have 24 MB of user memory to map into.
86
SLAB_SIZE = 1 * 1024 * 1024,
87
// 10 is the number of texture units + verts + inds.
88
// In the worst case, we could concurrently need 10 slabs/extras at the same time.
89
SLAB_COUNT = 10,
90
EXTRA_COUNT = 10,
91
};
92
93
// The current "generation". Static simply as a convenience for access.
94
// This increments on every allocation, for a simple LRU.
95
static int slabGeneration_;
96
97
// An aligned large mapping of the pushbuffer in PSP RAM.
98
struct SlabInfo {
99
u32 psp_pointer_ = 0;
100
u32 buf_pointer_ = 0;
101
int last_used_ = 0;
102
103
bool Matches(u32 bufpos) {
104
// We check psp_pointer_ because bufpos = 0 is valid, and the initial value.
105
return buf_pointer_ == bufpos && psp_pointer_ != 0;
106
}
107
108
// Automatically marks used for LRU purposes.
109
u32 Ptr(u32 bufpos) {
110
last_used_ = slabGeneration_;
111
return psp_pointer_ + (bufpos - buf_pointer_);
112
}
113
114
int Age() const {
115
// If not allocated, it's as expired as it's gonna get.
116
if (psp_pointer_ == 0)
117
return std::numeric_limits<int>::max();
118
return slabGeneration_ - last_used_;
119
}
120
121
bool Alloc();
122
void Free();
123
bool Setup(u32 bufpos, const std::vector<u8> &pushbuf_);
124
};
125
126
// An adhoc mapping of the pushbuffer (either larger than a slab or straddling slabs.)
127
// Remember: texture data, verts, etc. must be contiguous.
128
struct ExtraInfo {
129
u32 psp_pointer_ = 0;
130
u32 buf_pointer_ = 0;
131
u32 size_ = 0;
132
133
bool Matches(u32 bufpos, u32 sz) {
134
// We check psp_pointer_ because bufpos = 0 is valid, and the initial value.
135
return buf_pointer_ == bufpos && psp_pointer_ != 0 && size_ >= sz;
136
}
137
138
u32 Ptr() {
139
return psp_pointer_;
140
}
141
142
bool Alloc(u32 bufpos, u32 sz, const std::vector<u8> &pushbuf_);
143
void Free();
144
};
145
146
SlabInfo slabs_[SLAB_COUNT]{};
147
u32 lastSlab_ = 0;
148
u32 extraOffset_ = 0;
149
ExtraInfo extra_[EXTRA_COUNT]{};
150
151
const std::vector<u8> &pushbuf_;
152
};
153
154
u32 BufMapping::Map(u32 bufpos, u32 sz, const std::function<void()> &flush) {
155
int slab1 = bufpos / SLAB_SIZE;
156
int slab2 = (bufpos + sz - 1) / SLAB_SIZE;
157
158
if (slab1 == slab2) {
159
// Shortcut in case it's simply the most recent slab.
160
if (slabs_[lastSlab_].Matches(slab1 * SLAB_SIZE))
161
return slabs_[lastSlab_].Ptr(bufpos);
162
// Doesn't straddle, so we can just map to a slab.
163
return MapSlab(bufpos, flush);
164
} else {
165
// We need contiguous, so we'll just allocate separately.
166
return MapExtra(bufpos, sz, flush);
167
}
168
}
169
170
u32 BufMapping::MapSlab(u32 bufpos, const std::function<void()> &flush) {
171
u32 slab_pos = (bufpos / SLAB_SIZE) * SLAB_SIZE;
172
173
int best = 0;
174
for (int i = 0; i < SLAB_COUNT; ++i) {
175
if (slabs_[i].Matches(slab_pos)) {
176
return slabs_[i].Ptr(bufpos);
177
}
178
179
if (slabs_[i].Age() > slabs_[best].Age()) {
180
best = i;
181
}
182
}
183
184
// Stall before mapping a new slab.
185
flush();
186
187
// Okay, we need to allocate.
188
if (!slabs_[best].Setup(slab_pos, pushbuf_)) {
189
return 0;
190
}
191
lastSlab_ = best;
192
return slabs_[best].Ptr(bufpos);
193
}
194
195
u32 BufMapping::MapExtra(u32 bufpos, u32 sz, const std::function<void()> &flush) {
196
for (int i = 0; i < EXTRA_COUNT; ++i) {
197
// Might be likely to reuse larger buffers straddling slabs.
198
if (extra_[i].Matches(bufpos, sz)) {
199
return extra_[i].Ptr();
200
}
201
}
202
203
// Stall first, so we don't stomp existing RAM.
204
flush();
205
206
int i = extraOffset_;
207
extraOffset_ = (extraOffset_ + 1) % EXTRA_COUNT;
208
209
if (!extra_[i].Alloc(bufpos, sz, pushbuf_)) {
210
// Let's try to power on - hopefully none of these are still in use.
211
for (int i = 0; i < EXTRA_COUNT; ++i) {
212
extra_[i].Free();
213
}
214
if (!extra_[i].Alloc(bufpos, sz, pushbuf_)) {
215
return 0;
216
}
217
}
218
return extra_[i].Ptr();
219
}
220
221
bool BufMapping::SlabInfo::Alloc() {
222
u32 sz = SLAB_SIZE;
223
psp_pointer_ = userMemory.Alloc(sz, false, "Slab");
224
if (psp_pointer_ == -1) {
225
psp_pointer_ = 0;
226
}
227
return psp_pointer_ != 0;
228
}
229
230
void BufMapping::SlabInfo::Free() {
231
if (psp_pointer_) {
232
userMemory.Free(psp_pointer_);
233
psp_pointer_ = 0;
234
buf_pointer_ = 0;
235
last_used_ = 0;
236
}
237
}
238
239
bool BufMapping::ExtraInfo::Alloc(u32 bufpos, u32 sz, const std::vector<u8> &pushbuf_) {
240
// Make sure we've freed any previous allocation first.
241
Free();
242
243
u32 allocSize = sz;
244
psp_pointer_ = userMemory.Alloc(allocSize, false, "Straddle extra");
245
if (psp_pointer_ == -1) {
246
psp_pointer_ = 0;
247
}
248
if (psp_pointer_ == 0) {
249
return false;
250
}
251
252
buf_pointer_ = bufpos;
253
size_ = sz;
254
Memory::MemcpyUnchecked(psp_pointer_, pushbuf_.data() + bufpos, sz);
255
return true;
256
}
257
258
void BufMapping::ExtraInfo::Free() {
259
if (psp_pointer_) {
260
userMemory.Free(psp_pointer_);
261
psp_pointer_ = 0;
262
buf_pointer_ = 0;
263
}
264
}
265
266
bool BufMapping::SlabInfo::Setup(u32 bufpos, const std::vector<u8> &pushbuf_) {
267
// If it already has RAM, we're simply taking it over. Slabs come only in one size.
268
if (psp_pointer_ == 0) {
269
if (!Alloc()) {
270
return false;
271
}
272
}
273
274
buf_pointer_ = bufpos;
275
u32 sz = std::min((u32)SLAB_SIZE, (u32)pushbuf_.size() - bufpos);
276
Memory::MemcpyUnchecked(psp_pointer_, pushbuf_.data() + bufpos, sz);
277
278
slabGeneration_++;
279
last_used_ = slabGeneration_;
280
return true;
281
}
282
283
int BufMapping::slabGeneration_ = 0;
284
285
class DumpExecute {
286
public:
287
DumpExecute(const std::vector<u8> &pushbuf, const std::vector<Command> &commands, uint32_t version)
288
: pushbuf_(pushbuf), commands_(commands), mapping_(pushbuf), version_(version) {
289
}
290
~DumpExecute();
291
292
bool Run();
293
294
private:
295
void SyncStall();
296
bool SubmitCmds(const void *p, u32 sz);
297
void SubmitListEnd();
298
299
void Init(u32 ptr, u32 sz);
300
void Registers(u32 ptr, u32 sz);
301
void Vertices(u32 ptr, u32 sz);
302
void Indices(u32 ptr, u32 sz);
303
void ClutAddr(u32 ptr, u32 sz);
304
void Clut(u32 ptr, u32 sz);
305
void TransferSrc(u32 ptr, u32 sz);
306
void Memset(u32 ptr, u32 sz);
307
void MemcpyDest(u32 ptr, u32 sz);
308
void Memcpy(u32 ptr, u32 sz);
309
void Texture(int level, u32 ptr, u32 sz);
310
void Framebuf(int level, u32 ptr, u32 sz);
311
void Display(u32 ptr, u32 sz, bool allowFlip);
312
void EdramTrans(u32 ptr, u32 sz);
313
314
u32 execMemcpyDest = 0;
315
u32 execClutAddr = 0;
316
u32 execClutFlags = 0;
317
u32 execListBuf = 0;
318
u32 execListPos = 0;
319
u32 execListID = 0;
320
const int LIST_BUF_SIZE = 256 * 1024;
321
std::vector<u32> execListQueue;
322
u16 lastBufw_[8]{};
323
u32 lastTex_[8]{};
324
u32 lastBase_ = 0;
325
326
const std::vector<u8> &pushbuf_;
327
const std::vector<Command> &commands_;
328
BufMapping mapping_;
329
uint32_t version_ = 0;
330
};
331
332
void DumpExecute::SyncStall() {
333
if (execListBuf == 0) {
334
return;
335
}
336
337
gpu->UpdateStall(execListID, execListPos);
338
s64 listTicks = gpu->GetListTicks(execListID);
339
if (listTicks != -1) {
340
s64 nowTicks = CoreTiming::GetTicks();
341
if (listTicks > nowTicks) {
342
currentMIPS->downcount -= listTicks - nowTicks;
343
}
344
}
345
346
// Make sure downcount doesn't overflow.
347
CoreTiming::ForceCheck();
348
}
349
350
bool DumpExecute::SubmitCmds(const void *p, u32 sz) {
351
if (execListBuf == 0) {
352
u32 allocSize = LIST_BUF_SIZE;
353
execListBuf = userMemory.Alloc(allocSize, true, "List buf");
354
if (execListBuf == -1) {
355
execListBuf = 0;
356
}
357
if (execListBuf == 0) {
358
ERROR_LOG(Log::System, "Unable to allocate for display list");
359
return false;
360
}
361
362
execListPos = execListBuf;
363
Memory::Write_U32(GE_CMD_NOP << 24, execListPos);
364
execListPos += 4;
365
366
gpu->EnableInterrupts(false);
367
auto optParam = PSPPointer<PspGeListArgs>::Create(0);
368
execListID = gpu->EnqueueList(execListBuf, execListPos, -1, optParam, false);
369
gpu->EnableInterrupts(true);
370
}
371
372
u32 pendingSize = (u32)execListQueue.size() * sizeof(u32);
373
// Validate space for jump.
374
u32 allocSize = pendingSize + sz + 8;
375
if (execListPos + allocSize >= execListBuf + LIST_BUF_SIZE) {
376
Memory::Write_U32((GE_CMD_BASE << 24) | ((execListBuf >> 8) & 0x00FF0000), execListPos);
377
Memory::Write_U32((GE_CMD_JUMP << 24) | (execListBuf & 0x00FFFFFF), execListPos + 4);
378
379
execListPos = execListBuf;
380
lastBase_ = execListBuf & 0xFF000000;
381
382
// Don't continue until we've stalled.
383
SyncStall();
384
}
385
386
Memory::MemcpyUnchecked(execListPos, execListQueue.data(), pendingSize);
387
execListPos += pendingSize;
388
u32 writePos = execListPos;
389
Memory::MemcpyUnchecked(execListPos, p, sz);
390
execListPos += sz;
391
392
// TODO: Unfortunate. Maybe Texture commands should contain the bufw instead.
393
// The goal here is to realistically combine prims in dumps. Stalling for the bufw flushes.
394
u32_le *ops = (u32_le *)Memory::GetPointerUnchecked(writePos);
395
396
u32 lastTexHigh[8]{};
397
for (int i = 0; i < 8; ++i)
398
lastTexHigh[i] = ((lastTex_[i] & 0xFF000000) >> 8) | ((GE_CMD_TEXBUFWIDTH0 + i) << 24);
399
400
for (u32 i = 0; i < sz / 4; ++i) {
401
u32 cmd = ops[i] >> 24;
402
if (cmd >= GE_CMD_TEXBUFWIDTH0 && cmd <= GE_CMD_TEXBUFWIDTH7) {
403
int level = cmd - GE_CMD_TEXBUFWIDTH0;
404
u16 bufw = ops[i] & 0xFFFF;
405
406
// NOP the address part of the command to avoid a flush too.
407
if (bufw == lastBufw_[level])
408
ops[i] = GE_CMD_NOP << 24;
409
else
410
ops[i] = lastTexHigh[level] | bufw;
411
lastBufw_[level] = bufw;
412
}
413
414
// Since we're here anyway, also NOP out texture addresses.
415
// This makes Step Tex not hit phantom textures, but we rely on it for lastTex_[].
416
if (cmd >= GE_CMD_TEXADDR0 && cmd <= GE_CMD_TEXADDR7) {
417
ops[i] = GE_CMD_NOP << 24;
418
}
419
if (cmd == GE_CMD_SIGNAL || cmd == GE_CMD_BASE) {
420
lastBase_ = 0xFFFFFFFF;
421
}
422
}
423
424
execListQueue.clear();
425
426
return true;
427
}
428
429
void DumpExecute::SubmitListEnd() {
430
if (execListPos == 0) {
431
return;
432
}
433
434
// There's always space for the end, same size as a jump.
435
Memory::Write_U32(GE_CMD_FINISH << 24, execListPos);
436
Memory::Write_U32(GE_CMD_END << 24, execListPos + 4);
437
execListPos += 8;
438
439
for (int i = 0; i < 8; ++i)
440
lastTex_[i] = 0;
441
lastBase_ = 0xFFFFFFFF;
442
443
SyncStall();
444
gpu->ListSync(execListID, 0);
445
}
446
447
void DumpExecute::Init(u32 ptr, u32 sz) {
448
gstate.Restore((u32_le *)(pushbuf_.data() + ptr));
449
gpu->ReapplyGfxState();
450
451
for (int i = 0; i < 8; ++i) {
452
lastBufw_[i] = 0;
453
lastTex_[i] = 0;
454
}
455
lastBase_ = 0xFFFFFFFF;
456
}
457
458
void DumpExecute::Registers(u32 ptr, u32 sz) {
459
SubmitCmds(pushbuf_.data() + ptr, sz);
460
}
461
462
void DumpExecute::Vertices(u32 ptr, u32 sz) {
463
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
464
if (psp == 0) {
465
ERROR_LOG(Log::System, "Unable to allocate for vertices");
466
return;
467
}
468
469
if (lastBase_ != (psp & 0xFF000000)) {
470
execListQueue.push_back((GE_CMD_BASE << 24) | ((psp >> 8) & 0x00FF0000));
471
lastBase_ = psp & 0xFF000000;
472
}
473
execListQueue.push_back((GE_CMD_VADDR << 24) | (psp & 0x00FFFFFF));
474
}
475
476
void DumpExecute::Indices(u32 ptr, u32 sz) {
477
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
478
if (psp == 0) {
479
ERROR_LOG(Log::System, "Unable to allocate for indices");
480
return;
481
}
482
483
if (lastBase_ != (psp & 0xFF000000)) {
484
execListQueue.push_back((GE_CMD_BASE << 24) | ((psp >> 8) & 0x00FF0000));
485
lastBase_ = psp & 0xFF000000;
486
}
487
execListQueue.push_back((GE_CMD_IADDR << 24) | (psp & 0x00FFFFFF));
488
}
489
490
void DumpExecute::ClutAddr(u32 ptr, u32 sz) {
491
struct ClutAddrData {
492
u32 addr;
493
u32 flags;
494
};
495
const ClutAddrData *data = (const ClutAddrData *)(pushbuf_.data() + ptr);
496
execClutAddr = data->addr;
497
execClutFlags = data->flags;
498
}
499
500
void DumpExecute::Clut(u32 ptr, u32 sz) {
501
// This is always run when we have the actual address set.
502
if (execClutAddr != 0) {
503
const bool isTarget = (execClutFlags & 1) != 0;
504
505
// Could potentially always skip if !isTarget, but playing it safe for offset texture behavior.
506
if (Memory::IsValidRange(execClutAddr, sz) && (!isTarget || !g_Config.bSoftwareRendering)) {
507
// Intentionally don't trigger an upload here.
508
Memory::MemcpyUnchecked(execClutAddr, pushbuf_.data() + ptr, sz);
509
NotifyMemInfo(MemBlockFlags::WRITE, execClutAddr, sz, "ReplayClut");
510
}
511
512
execClutAddr = 0;
513
} else {
514
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
515
if (psp == 0) {
516
ERROR_LOG(Log::System, "Unable to allocate for clut");
517
return;
518
}
519
520
execListQueue.push_back((GE_CMD_CLUTADDRUPPER << 24) | ((psp >> 8) & 0x00FF0000));
521
execListQueue.push_back((GE_CMD_CLUTADDR << 24) | (psp & 0x00FFFFFF));
522
}
523
}
524
525
void DumpExecute::TransferSrc(u32 ptr, u32 sz) {
526
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
527
if (psp == 0) {
528
ERROR_LOG(Log::System, "Unable to allocate for transfer");
529
return;
530
}
531
532
// Need to sync in order to access gstate.transfersrcw.
533
SyncStall();
534
535
execListQueue.push_back((gstate.transfersrcw & 0xFF00FFFF) | ((psp >> 8) & 0x00FF0000));
536
execListQueue.push_back(((GE_CMD_TRANSFERSRC) << 24) | (psp & 0x00FFFFFF));
537
}
538
539
void DumpExecute::Memset(u32 ptr, u32 sz) {
540
PROFILE_THIS_SCOPE("ReplayMemset");
541
struct MemsetCommand {
542
u32 dest;
543
int value;
544
u32 sz;
545
};
546
547
const MemsetCommand *data = (const MemsetCommand *)(pushbuf_.data() + ptr);
548
549
if (Memory::IsVRAMAddress(data->dest)) {
550
SyncStall();
551
gpu->PerformMemorySet(data->dest, (u8)data->value, data->sz);
552
}
553
}
554
555
void DumpExecute::MemcpyDest(u32 ptr, u32 sz) {
556
execMemcpyDest = *(const u32 *)(pushbuf_.data() + ptr);
557
}
558
559
void DumpExecute::Memcpy(u32 ptr, u32 sz) {
560
PROFILE_THIS_SCOPE("ReplayMemcpy");
561
if (Memory::IsVRAMAddress(execMemcpyDest)) {
562
SyncStall();
563
Memory::MemcpyUnchecked(execMemcpyDest, pushbuf_.data() + ptr, sz);
564
NotifyMemInfo(MemBlockFlags::WRITE, execMemcpyDest, sz, "ReplayMemcpy");
565
gpu->PerformWriteColorFromMemory(execMemcpyDest, sz);
566
}
567
}
568
569
void DumpExecute::Texture(int level, u32 ptr, u32 sz) {
570
u32 psp = mapping_.Map(ptr, sz, std::bind(&DumpExecute::SyncStall, this));
571
if (psp == 0) {
572
ERROR_LOG(Log::System, "Unable to allocate for texture");
573
return;
574
}
575
576
if (lastTex_[level] != psp) {
577
u32 bufwCmd = GE_CMD_TEXBUFWIDTH0 + level;
578
u32 addrCmd = GE_CMD_TEXADDR0 + level;
579
execListQueue.push_back((bufwCmd << 24) | ((psp >> 8) & 0x00FF0000) | lastBufw_[level]);
580
execListQueue.push_back((addrCmd << 24) | (psp & 0x00FFFFFF));
581
lastTex_[level] = psp;
582
}
583
}
584
585
void DumpExecute::Framebuf(int level, u32 ptr, u32 sz) {
586
PROFILE_THIS_SCOPE("ReplayFramebuf");
587
struct FramebufData {
588
u32 addr;
589
int bufw;
590
u32 flags;
591
u32 pad;
592
};
593
594
FramebufData *framebuf = (FramebufData *)(pushbuf_.data() + ptr);
595
596
if (lastTex_[level] != framebuf->addr || lastBufw_[level] != framebuf->bufw) {
597
u32 bufwCmd = GE_CMD_TEXBUFWIDTH0 + level;
598
u32 addrCmd = GE_CMD_TEXADDR0 + level;
599
execListQueue.push_back((bufwCmd << 24) | ((framebuf->addr >> 8) & 0x00FF0000) | framebuf->bufw);
600
execListQueue.push_back((addrCmd << 24) | (framebuf->addr & 0x00FFFFFF));
601
lastTex_[level] = framebuf->addr;
602
lastBufw_[level] = framebuf->bufw;
603
}
604
605
// And now also copy the data into VRAM (in case it wasn't actually rendered.)
606
u32 headerSize = (u32)sizeof(FramebufData);
607
u32 pspSize = sz - headerSize;
608
const bool isTarget = (framebuf->flags & 1) != 0;
609
const bool unchangedVRAM = version_ >= 6 && (framebuf->flags & 2) != 0;
610
// TODO: Could use drawnVRAM flag, but it can be wrong.
611
// Could potentially always skip if !isTarget, but playing it safe for offset texture behavior.
612
if (Memory::IsValidRange(framebuf->addr, pspSize) && !unchangedVRAM && (!isTarget || !g_Config.bSoftwareRendering)) {
613
// Intentionally don't trigger an upload here.
614
Memory::MemcpyUnchecked(framebuf->addr, pushbuf_.data() + ptr + headerSize, pspSize);
615
NotifyMemInfo(MemBlockFlags::WRITE, framebuf->addr, pspSize, "ReplayTex");
616
}
617
}
618
619
void DumpExecute::Display(u32 ptr, u32 sz, bool allowFlip) {
620
struct DisplayBufData {
621
PSPPointer<u8> topaddr;
622
int linesize, pixelFormat;
623
};
624
625
DisplayBufData *disp = (DisplayBufData *)(pushbuf_.data() + ptr);
626
627
// Sync up drawing.
628
SyncStall();
629
630
__DisplaySetFramebuf(disp->topaddr.ptr, disp->linesize, disp->pixelFormat, 1);
631
if (allowFlip) {
632
__DisplaySetFramebuf(disp->topaddr.ptr, disp->linesize, disp->pixelFormat, 0);
633
}
634
}
635
636
void DumpExecute::EdramTrans(u32 ptr, u32 sz) {
637
uint32_t value;
638
memcpy(&value, pushbuf_.data() + ptr, 4);
639
640
// Sync up drawing.
641
SyncStall();
642
643
if (gpu)
644
gpu->SetAddrTranslation(value);
645
}
646
647
DumpExecute::~DumpExecute() {
648
execMemcpyDest = 0;
649
if (execListBuf) {
650
userMemory.Free(execListBuf);
651
execListBuf = 0;
652
}
653
execListPos = 0;
654
mapping_.Reset();
655
}
656
657
bool DumpExecute::Run() {
658
// Start with the default value.
659
if (gpu)
660
gpu->SetAddrTranslation(0x400);
661
662
for (size_t i = 0; i < commands_.size(); i++) {
663
const Command &cmd = commands_[i];
664
switch (cmd.type) {
665
case CommandType::INIT:
666
Init(cmd.ptr, cmd.sz);
667
break;
668
669
case CommandType::REGISTERS:
670
Registers(cmd.ptr, cmd.sz);
671
break;
672
673
case CommandType::VERTICES:
674
Vertices(cmd.ptr, cmd.sz);
675
break;
676
677
case CommandType::INDICES:
678
Indices(cmd.ptr, cmd.sz);
679
break;
680
681
case CommandType::CLUTADDR:
682
ClutAddr(cmd.ptr, cmd.sz);
683
break;
684
685
case CommandType::CLUT:
686
Clut(cmd.ptr, cmd.sz);
687
break;
688
689
case CommandType::TRANSFERSRC:
690
TransferSrc(cmd.ptr, cmd.sz);
691
break;
692
693
case CommandType::MEMSET:
694
Memset(cmd.ptr, cmd.sz);
695
break;
696
697
case CommandType::MEMCPYDEST:
698
MemcpyDest(cmd.ptr, cmd.sz);
699
break;
700
701
case CommandType::MEMCPYDATA:
702
Memcpy(cmd.ptr, cmd.sz);
703
break;
704
705
case CommandType::EDRAMTRANS:
706
EdramTrans(cmd.ptr, cmd.sz);
707
break;
708
709
case CommandType::TEXTURE0:
710
case CommandType::TEXTURE1:
711
case CommandType::TEXTURE2:
712
case CommandType::TEXTURE3:
713
case CommandType::TEXTURE4:
714
case CommandType::TEXTURE5:
715
case CommandType::TEXTURE6:
716
case CommandType::TEXTURE7:
717
Texture((int)cmd.type - (int)CommandType::TEXTURE0, cmd.ptr, cmd.sz);
718
break;
719
720
case CommandType::FRAMEBUF0:
721
case CommandType::FRAMEBUF1:
722
case CommandType::FRAMEBUF2:
723
case CommandType::FRAMEBUF3:
724
case CommandType::FRAMEBUF4:
725
case CommandType::FRAMEBUF5:
726
case CommandType::FRAMEBUF6:
727
case CommandType::FRAMEBUF7:
728
Framebuf((int)cmd.type - (int)CommandType::FRAMEBUF0, cmd.ptr, cmd.sz);
729
break;
730
731
case CommandType::DISPLAY:
732
Display(cmd.ptr, cmd.sz, i == commands_.size() - 1);
733
break;
734
735
default:
736
ERROR_LOG(Log::System, "Unsupported GE dump command: %d", (int)cmd.type);
737
return false;
738
}
739
}
740
741
SubmitListEnd();
742
return true;
743
}
744
745
static bool ReadCompressed(u32 fp, void *dest, size_t sz, uint32_t version) {
746
u32 compressed_size = 0;
747
if (pspFileSystem.ReadFile(fp, (u8 *)&compressed_size, sizeof(compressed_size)) != sizeof(compressed_size)) {
748
return false;
749
}
750
751
u8 *compressed = new u8[compressed_size];
752
if (pspFileSystem.ReadFile(fp, compressed, compressed_size) != compressed_size) {
753
delete[] compressed;
754
return false;
755
}
756
757
size_t real_size = sz;
758
if (version < 5)
759
snappy_uncompress((const char *)compressed, compressed_size, (char *)dest, &real_size);
760
else
761
real_size = ZSTD_decompress(dest, real_size, compressed, compressed_size);
762
delete[] compressed;
763
764
return real_size == sz;
765
}
766
767
static void ReplayStop() {
768
// This can happen from a separate thread.
769
std::lock_guard<std::mutex> guard(executeLock);
770
lastExecFilename.clear();
771
lastExecCommands.clear();
772
lastExecPushbuf.clear();
773
lastExecVersion = 0;
774
}
775
776
bool RunMountedReplay(const std::string &filename) {
777
_assert_msg_(!GPURecord::IsActivePending(), "Cannot run replay while recording.");
778
779
std::lock_guard<std::mutex> guard(executeLock);
780
Core_ListenStopRequest(&ReplayStop);
781
782
uint32_t version = lastExecVersion;
783
if (lastExecFilename != filename) {
784
PROFILE_THIS_SCOPE("ReplayLoad");
785
u32 fp = pspFileSystem.OpenFile(filename, FILEACCESS_READ);
786
Header header;
787
pspFileSystem.ReadFile(fp, (u8 *)&header, sizeof(header));
788
version = header.version;
789
790
if (memcmp(header.magic, HEADER_MAGIC, sizeof(header.magic)) != 0 || header.version > VERSION || header.version < MIN_VERSION) {
791
ERROR_LOG(Log::System, "Invalid GE dump or unsupported version");
792
pspFileSystem.CloseFile(fp);
793
return false;
794
}
795
if (header.version <= 3) {
796
pspFileSystem.SeekFile(fp, 12, FILEMOVE_BEGIN);
797
memset(header.gameID, 0, sizeof(header.gameID));
798
}
799
800
size_t gameIDLength = strnlen(header.gameID, sizeof(header.gameID));
801
if (gameIDLength != 0) {
802
g_paramSFO.SetValue("DISC_ID", std::string(header.gameID, gameIDLength), (int)sizeof(header.gameID));
803
}
804
805
u32 sz = 0;
806
pspFileSystem.ReadFile(fp, (u8 *)&sz, sizeof(sz));
807
u32 bufsz = 0;
808
pspFileSystem.ReadFile(fp, (u8 *)&bufsz, sizeof(bufsz));
809
810
lastExecCommands.resize(sz);
811
lastExecPushbuf.resize(bufsz);
812
813
bool truncated = false;
814
truncated = truncated || !ReadCompressed(fp, lastExecCommands.data(), sizeof(Command) * sz, header.version);
815
truncated = truncated || !ReadCompressed(fp, lastExecPushbuf.data(), bufsz, header.version);
816
817
pspFileSystem.CloseFile(fp);
818
819
if (truncated) {
820
ERROR_LOG(Log::System, "Truncated GE dump");
821
return false;
822
}
823
824
lastExecFilename = filename;
825
lastExecVersion = version;
826
}
827
828
DumpExecute executor(lastExecPushbuf, lastExecCommands, version);
829
return executor.Run();
830
}
831
832
};
833
834