Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
stenzek
GitHub Repository: stenzek/duckstation
Path: blob/master/src/core/cpu_code_cache.cpp
4214 views
1
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>
2
// SPDX-License-Identifier: CC-BY-NC-ND-4.0
3
4
#include "bus.h"
5
#include "cpu_code_cache_private.h"
6
#include "cpu_core.h"
7
#include "cpu_core_private.h"
8
#include "cpu_disasm.h"
9
#include "host.h"
10
#include "settings.h"
11
#include "system.h"
12
#include "timing_event.h"
13
14
#include "util/page_fault_handler.h"
15
16
#include "common/align.h"
17
#include "common/assert.h"
18
#include "common/error.h"
19
#include "common/intrin.h"
20
#include "common/log.h"
21
#include "common/memmap.h"
22
23
LOG_CHANNEL(CodeCache);
24
25
// Enable dumping of recompiled block code size statistics.
26
// #define DUMP_CODE_SIZE_STATS 1
27
28
// Enable profiling of JIT blocks.
29
// #define ENABLE_RECOMPILER_PROFILING 1
30
31
#ifdef ENABLE_RECOMPILER
32
#include "cpu_recompiler.h"
33
#endif
34
35
#include <map>
36
#include <unordered_set>
37
#include <zlib.h>
38
39
namespace CPU::CodeCache {
40
41
using LUTRangeList = std::array<std::pair<VirtualMemoryAddress, VirtualMemoryAddress>, 9>;
42
using PageProtectionArray = std::array<PageProtectionInfo, Bus::RAM_8MB_CODE_PAGE_COUNT>;
43
using BlockInstructionInfoPair = std::pair<Instruction, InstructionInfo>;
44
using BlockInstructionList = std::vector<BlockInstructionInfoPair>;
45
46
// Switch to manual protection if we invalidate more than 4 times within 60 frames.
47
// Fall blocks back to interpreter if we recompile more than 3 times within 15 frames.
48
// The interpreter fallback is set before the manual protection switch, so that if it's just a single block
49
// which is constantly getting mutated, we won't hurt the performance of the rest in the page.
50
static constexpr u32 RECOMPILE_COUNT_FOR_INTERPRETER_FALLBACK = 3;
51
static constexpr u32 RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK = 15;
52
static constexpr u32 INVALIDATE_COUNT_FOR_MANUAL_PROTECTION = 4;
53
static constexpr u32 INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION = 60;
54
55
static void AllocateLUTs();
56
static void DeallocateLUTs();
57
static void ResetCodeLUT();
58
static void SetCodeLUT(u32 pc, const void* function);
59
static void InvalidateBlock(Block* block, BlockState new_state);
60
static void ClearBlocks();
61
62
static Block* LookupBlock(u32 pc);
63
static Block* CreateBlock(u32 pc, const BlockInstructionList& instructions, const BlockMetadata& metadata);
64
static bool HasBlockLUT(u32 pc);
65
static bool IsBlockCodeCurrent(const Block* block);
66
static bool RevalidateBlock(Block* block);
67
static PageProtectionMode GetProtectionModeForPC(u32 pc);
68
static PageProtectionMode GetProtectionModeForBlock(const Block* block);
69
static bool ReadBlockInstructions(u32 start_pc, BlockInstructionList* instructions, BlockMetadata* metadata);
70
static void FillBlockRegInfo(Block* block);
71
static void CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src);
72
static void SetRegAccess(InstructionInfo* inst, Reg reg, bool write);
73
static void AddBlockToPageList(Block* block);
74
static void RemoveBlockFromPageList(Block* block);
75
76
static Block* CreateCachedInterpreterBlock(u32 pc);
77
[[noreturn]] static void ExecuteCachedInterpreter();
78
template<PGXPMode pgxp_mode>
79
[[noreturn]] static void ExecuteCachedInterpreterImpl();
80
81
// Fast map provides lookup from PC to function
82
// Function pointers are offset so that you don't need to subtract
83
CodeLUTArray g_code_lut;
84
static BlockLUTArray s_block_lut;
85
static std::unique_ptr<const void*[]> s_lut_code_pointers;
86
static std::unique_ptr<Block*[]> s_lut_block_pointers;
87
static PageProtectionArray s_page_protection = {};
88
static std::vector<Block*> s_blocks;
89
90
// for compiling - reuse to avoid allocations
91
static BlockInstructionList s_block_instructions;
92
93
static void BacklinkBlocks(u32 pc, const void* dst);
94
static void UnlinkBlockExits(Block* block);
95
static void ResetCodeBuffer();
96
97
static void CompileASMFunctions();
98
static bool CompileBlock(Block* block);
99
static PageFaultHandler::HandlerResult HandleFastmemException(void* exception_pc, void* fault_address, bool is_write);
100
static void BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info);
101
static void RemoveBackpatchInfoForRange(const void* host_code, u32 size);
102
103
static BlockLinkMap s_block_links;
104
static std::map<const void*, LoadstoreBackpatchInfo> s_fastmem_backpatch_info;
105
static std::unordered_set<u32> s_fastmem_faulting_pcs;
106
107
NORETURN_FUNCTION_POINTER void (*g_enter_recompiler)();
108
const void* g_compile_or_revalidate_block;
109
const void* g_run_events_and_dispatch;
110
const void* g_dispatcher;
111
const void* g_interpret_block;
112
const void* g_discard_and_recompile_block;
113
114
#ifdef ENABLE_RECOMPILER_PROFILING
115
116
PerfScope MIPSPerfScope("MIPS");
117
118
#endif
119
120
#if defined(CPU_ARCH_ARM32)
121
// Use a smaller code buffer size on AArch32 to have a better chance of being in range.
122
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 16 * 1024 * 1024;
123
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 4 * 1024 * 1024;
124
#else
125
static constexpr u32 RECOMPILER_CODE_CACHE_SIZE = 48 * 1024 * 1024;
126
static constexpr u32 RECOMPILER_FAR_CODE_CACHE_SIZE = 16 * 1024 * 1024;
127
#endif
128
129
// On Linux ARM32/ARM64, we use a dedicated section in the ELF for storing code. This is because without
130
// ASLR, or on certain ASLR offsets, the sbrk() heap ends up immediately following the text/data sections,
131
// which means there isn't a large enough gap to fit within range on ARM32. Also enable it for Android,
132
// because MAP_FIXED_NOREPLACE may not exist on older kernels.
133
#if (defined(__linux__) && (defined(CPU_ARCH_ARM32) || defined(CPU_ARCH_ARM64))) || defined(__ANDROID__)
134
#define USE_CODE_BUFFER_SECTION 1
135
#ifdef __clang__
136
#pragma clang section bss = ".jitstorage"
137
__attribute__((aligned(MAX_HOST_PAGE_SIZE))) static u8 s_code_buffer_ptr[RECOMPILER_CODE_CACHE_SIZE];
138
#pragma clang section bss = ""
139
#endif
140
#else
141
static u8* s_code_buffer_ptr = nullptr;
142
#endif
143
144
static u8* s_code_ptr = nullptr;
145
static u8* s_free_code_ptr = nullptr;
146
static u32 s_code_size = 0;
147
static u32 s_code_used = 0;
148
149
static u8* s_far_code_ptr = nullptr;
150
static u8* s_free_far_code_ptr = nullptr;
151
static u32 s_far_code_size = 0;
152
static u32 s_far_code_used = 0;
153
154
#ifdef DUMP_CODE_SIZE_STATS
155
static u32 s_total_instructions_compiled = 0;
156
static u32 s_total_host_instructions_emitted = 0;
157
static u32 s_total_host_code_used_by_instructions = 0;
158
#endif
159
} // namespace CPU::CodeCache
160
161
bool CPU::CodeCache::IsUsingRecompiler()
162
{
163
return (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler);
164
}
165
166
bool CPU::CodeCache::IsUsingFastmem()
167
{
168
return (g_settings.cpu_fastmem_mode != CPUFastmemMode::Disabled);
169
}
170
171
bool CPU::CodeCache::ProcessStartup(Error* error)
172
{
173
#ifdef USE_CODE_BUFFER_SECTION
174
const u8* module_base = static_cast<const u8*>(MemMap::GetBaseAddress());
175
INFO_LOG("Using JIT buffer section of size {} at {} (0x{:X} bytes / {} MB away)", sizeof(s_code_buffer_ptr),
176
static_cast<void*>(s_code_buffer_ptr), std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)),
177
(std::abs(static_cast<ptrdiff_t>(s_code_buffer_ptr - module_base)) + (1024 * 1024 - 1)) / (1024 * 1024));
178
const bool code_buffer_allocated =
179
MemMap::MemProtect(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE, PageProtect::ReadWriteExecute);
180
#else
181
s_code_buffer_ptr = static_cast<u8*>(MemMap::AllocateJITMemory(RECOMPILER_CODE_CACHE_SIZE));
182
const bool code_buffer_allocated = (s_code_buffer_ptr != nullptr);
183
#endif
184
if (!code_buffer_allocated) [[unlikely]]
185
{
186
Error::SetStringView(error, "Failed to allocate code storage. The log may contain more information, you will need "
187
"to run DuckStation with -earlyconsole in the command line.");
188
return false;
189
}
190
191
AllocateLUTs();
192
193
if (!PageFaultHandler::Install(error))
194
return false;
195
196
return true;
197
}
198
199
void CPU::CodeCache::ProcessShutdown()
200
{
201
DeallocateLUTs();
202
203
#ifndef USE_CODE_BUFFER_SECTION
204
MemMap::ReleaseJITMemory(s_code_buffer_ptr, RECOMPILER_CODE_CACHE_SIZE);
205
#endif
206
}
207
208
void CPU::CodeCache::Reset()
209
{
210
ClearBlocks();
211
212
if (IsUsingRecompiler())
213
{
214
ResetCodeBuffer();
215
CompileASMFunctions();
216
ResetCodeLUT();
217
}
218
}
219
220
void CPU::CodeCache::Shutdown()
221
{
222
ClearBlocks();
223
}
224
225
void CPU::CodeCache::Execute()
226
{
227
if (IsUsingRecompiler())
228
{
229
g_enter_recompiler();
230
UnreachableCode();
231
}
232
else
233
{
234
ExecuteCachedInterpreter();
235
}
236
}
237
238
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
239
// MARK: - Block Management
240
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
241
242
namespace CPU::CodeCache {
243
static constexpr u32 GetLUTTableCount(u32 start, u32 end)
244
{
245
return ((end >> LUT_TABLE_SHIFT) - (start >> LUT_TABLE_SHIFT)) + 1;
246
}
247
248
static constexpr LUTRangeList GetLUTRanges()
249
{
250
const LUTRangeList ranges = {{
251
{0x00000000, 0x00800000}, // RAM
252
{0x1F000000, 0x1F060000}, // EXP1
253
{0x1FC00000, 0x1FC80000}, // BIOS
254
255
{0x80000000, 0x80800000}, // RAM
256
{0x9F000000, 0x9F060000}, // EXP1
257
{0x9FC00000, 0x9FC80000}, // BIOS
258
259
{0xA0000000, 0xA0800000}, // RAM
260
{0xBF000000, 0xBF060000}, // EXP1
261
{0xBFC00000, 0xBFC80000} // BIOS
262
}};
263
return ranges;
264
}
265
266
static constexpr u32 GetLUTSlotCount(bool include_unreachable)
267
{
268
u32 tables = include_unreachable ? 1 : 0; // unreachable table
269
for (const auto& [start, end] : GetLUTRanges())
270
tables += GetLUTTableCount(start, end);
271
272
return tables * LUT_TABLE_SIZE;
273
}
274
} // namespace CPU::CodeCache
275
276
void CPU::CodeCache::AllocateLUTs()
277
{
278
constexpr u32 num_code_slots = GetLUTSlotCount(true);
279
constexpr u32 num_block_slots = GetLUTSlotCount(false);
280
281
Assert(!s_lut_code_pointers && !s_lut_block_pointers);
282
s_lut_code_pointers = std::make_unique<const void*[]>(num_code_slots);
283
s_lut_block_pointers = std::make_unique<Block*[]>(num_block_slots);
284
std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * num_block_slots);
285
286
CodeLUT code_table_ptr = s_lut_code_pointers.get();
287
Block** block_table_ptr = s_lut_block_pointers.get();
288
CodeLUT const code_table_ptr_end = code_table_ptr + num_code_slots;
289
Block** const block_table_ptr_end = block_table_ptr + num_block_slots;
290
291
// Make the unreachable table jump to the invalid code callback.
292
MemsetPtrs(code_table_ptr, static_cast<const void*>(nullptr), LUT_TABLE_COUNT);
293
294
// Mark everything as unreachable to begin with.
295
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
296
{
297
g_code_lut[i] = code_table_ptr;
298
s_block_lut[i] = nullptr;
299
}
300
301
// Exclude unreachable.
302
code_table_ptr += LUT_TABLE_SIZE;
303
304
// Allocate ranges.
305
for (const auto& [start, end] : GetLUTRanges())
306
{
307
const u32 start_slot = start >> LUT_TABLE_SHIFT;
308
const u32 count = GetLUTTableCount(start, end);
309
for (u32 i = 0; i < count; i++)
310
{
311
const u32 slot = start_slot + i;
312
313
g_code_lut[slot] = code_table_ptr;
314
code_table_ptr += LUT_TABLE_SIZE;
315
316
s_block_lut[slot] = block_table_ptr;
317
block_table_ptr += LUT_TABLE_SIZE;
318
}
319
}
320
321
Assert(code_table_ptr == code_table_ptr_end);
322
Assert(block_table_ptr == block_table_ptr_end);
323
}
324
325
void CPU::CodeCache::DeallocateLUTs()
326
{
327
s_lut_block_pointers.reset();
328
s_lut_code_pointers.reset();
329
}
330
331
void CPU::CodeCache::ResetCodeLUT()
332
{
333
// Make the unreachable table jump to the invalid code callback.
334
MemsetPtrs(s_lut_code_pointers.get(), g_interpret_block, LUT_TABLE_COUNT);
335
336
for (u32 i = 0; i < LUT_TABLE_COUNT; i++)
337
{
338
// Don't overwrite anything bound to unreachable.
339
CodeLUT ptr = g_code_lut[i];
340
if (ptr == s_lut_code_pointers.get())
341
continue;
342
343
MemsetPtrs(ptr, g_compile_or_revalidate_block, LUT_TABLE_SIZE);
344
}
345
}
346
347
void CPU::CodeCache::SetCodeLUT(u32 pc, const void* function)
348
{
349
const u32 table = pc >> LUT_TABLE_SHIFT;
350
const u32 idx = (pc & 0xFFFF) >> 2;
351
DebugAssert(g_code_lut[table] != s_lut_code_pointers.get());
352
g_code_lut[table][idx] = function;
353
}
354
355
CPU::CodeCache::Block* CPU::CodeCache::LookupBlock(u32 pc)
356
{
357
const u32 table = pc >> LUT_TABLE_SHIFT;
358
if (!s_block_lut[table])
359
return nullptr;
360
361
const u32 idx = (pc & 0xFFFF) >> 2;
362
return s_block_lut[table][idx];
363
}
364
365
bool CPU::CodeCache::HasBlockLUT(u32 pc)
366
{
367
const u32 table = pc >> LUT_TABLE_SHIFT;
368
return (s_block_lut[table] != nullptr);
369
}
370
371
CPU::CodeCache::Block* CPU::CodeCache::CreateBlock(u32 pc, const BlockInstructionList& instructions,
372
const BlockMetadata& metadata)
373
{
374
const u32 size = static_cast<u32>(instructions.size());
375
const u32 table = pc >> LUT_TABLE_SHIFT;
376
Assert(s_block_lut[table]);
377
378
// retain from old block
379
const u32 frame_number = System::GetFrameNumber();
380
u32 recompile_frame = System::GetFrameNumber();
381
u8 recompile_count = 0;
382
383
const u32 idx = (pc & 0xFFFF) >> 2;
384
Block* block = s_block_lut[table][idx];
385
if (block)
386
{
387
// shouldn't be in the page list.. since we should come here after invalidating
388
Assert(!block->next_block_in_page);
389
390
// keep recompile stats before resetting, that way we actually count recompiles
391
recompile_frame = block->compile_frame;
392
recompile_count = block->compile_count;
393
394
// if it has the same number of instructions, we can reuse it
395
if (block->size != size)
396
{
397
// this sucks.. hopefully won't happen very often
398
// TODO: allocate max size, allow shrink but not grow
399
auto it = std::find(s_blocks.begin(), s_blocks.end(), block);
400
Assert(it != s_blocks.end());
401
s_blocks.erase(it);
402
403
block->~Block();
404
Common::AlignedFree(block);
405
block = nullptr;
406
}
407
}
408
409
if (!block)
410
{
411
block = static_cast<Block*>(Common::AlignedMalloc(
412
sizeof(Block) + (sizeof(Instruction) * size) + (sizeof(InstructionInfo) * size), alignof(Block)));
413
Assert(block);
414
new (block) Block();
415
s_blocks.push_back(block);
416
}
417
418
block->pc = pc;
419
block->size = size;
420
block->host_code = nullptr;
421
block->next_block_in_page = nullptr;
422
block->num_exit_links = 0;
423
block->state = BlockState::Valid;
424
block->flags = metadata.flags;
425
block->protection = GetProtectionModeForBlock(block);
426
block->uncached_fetch_ticks = metadata.uncached_fetch_ticks;
427
block->icache_line_count = metadata.icache_line_count;
428
block->host_code_size = 0;
429
block->compile_frame = recompile_frame;
430
block->compile_count = recompile_count + 1;
431
432
// copy instructions/info
433
{
434
const std::pair<Instruction, InstructionInfo>* ip = instructions.data();
435
Instruction* dsti = block->Instructions();
436
InstructionInfo* dstii = block->InstructionsInfo();
437
438
for (u32 i = 0; i < size; i++, ip++, dsti++, dstii++)
439
{
440
dsti->bits = ip->first.bits;
441
*dstii = ip->second;
442
}
443
}
444
445
s_block_lut[table][idx] = block;
446
447
// if the block is being recompiled too often, leave it in the list, but don't compile it.
448
const u32 frame_delta = frame_number - recompile_frame;
449
if (frame_delta >= RECOMPILE_FRAMES_FOR_INTERPRETER_FALLBACK)
450
{
451
block->compile_frame = frame_number;
452
block->compile_count = 1;
453
}
454
else if (block->compile_count >= RECOMPILE_COUNT_FOR_INTERPRETER_FALLBACK)
455
{
456
DEV_LOG("{} recompiles in {} frames to block 0x{:08X}, not caching.", block->compile_count, frame_delta, block->pc);
457
block->size = 0;
458
}
459
460
// cached interpreter creates empty blocks when falling back
461
if (block->size == 0)
462
{
463
block->state = BlockState::FallbackToInterpreter;
464
block->protection = PageProtectionMode::Unprotected;
465
return block;
466
}
467
468
// populate backpropogation information for liveness queries
469
FillBlockRegInfo(block);
470
471
// add it to the tracking list for its page
472
AddBlockToPageList(block);
473
474
return block;
475
}
476
477
bool CPU::CodeCache::IsBlockCodeCurrent(const Block* block)
478
{
479
// blocks shouldn't be wrapping..
480
const PhysicalMemoryAddress phys_addr = VirtualAddressToPhysical(block->pc);
481
DebugAssert((phys_addr + (sizeof(Instruction) * block->size)) <= Bus::g_ram_size);
482
483
// can just do a straight memcmp..
484
return (std::memcmp(Bus::g_ram + phys_addr, block->Instructions(), sizeof(Instruction) * block->size) == 0);
485
}
486
487
bool CPU::CodeCache::RevalidateBlock(Block* block)
488
{
489
DebugAssert(block->state != BlockState::Valid);
490
DebugAssert(AddressInRAM(block->pc) || block->state == BlockState::NeedsRecompile);
491
492
if (block->state >= BlockState::NeedsRecompile)
493
return false;
494
495
// Protection may have changed if we didn't execute before it got invalidated again. e.g. THPS2.
496
if (block->protection != GetProtectionModeForBlock(block))
497
return false;
498
499
if (!IsBlockCodeCurrent(block))
500
{
501
// changed, needs recompiling
502
DEBUG_LOG("Block at PC {:08X} has changed and needs recompiling", block->pc);
503
return false;
504
}
505
506
block->state = BlockState::Valid;
507
AddBlockToPageList(block);
508
return true;
509
}
510
511
void CPU::CodeCache::AddBlockToPageList(Block* block)
512
{
513
DebugAssert(block->size > 0);
514
if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected)
515
return;
516
517
const u32 page_idx = block->StartPageIndex();
518
PageProtectionInfo& entry = s_page_protection[page_idx];
519
Bus::SetRAMCodePage(page_idx);
520
521
if (entry.last_block_in_page)
522
{
523
entry.last_block_in_page->next_block_in_page = block;
524
entry.last_block_in_page = block;
525
}
526
else
527
{
528
entry.first_block_in_page = block;
529
entry.last_block_in_page = block;
530
}
531
}
532
533
void CPU::CodeCache::RemoveBlockFromPageList(Block* block)
534
{
535
DebugAssert(block->size > 0);
536
if (!AddressInRAM(block->pc) || block->protection != PageProtectionMode::WriteProtected)
537
return;
538
539
const u32 page_idx = block->StartPageIndex();
540
PageProtectionInfo& entry = s_page_protection[page_idx];
541
542
// unlink from list
543
Block* prev_block = nullptr;
544
Block* cur_block = entry.first_block_in_page;
545
while (cur_block)
546
{
547
if (cur_block != block)
548
{
549
prev_block = cur_block;
550
cur_block = cur_block->next_block_in_page;
551
continue;
552
}
553
554
if (prev_block)
555
prev_block->next_block_in_page = cur_block->next_block_in_page;
556
else
557
entry.first_block_in_page = cur_block->next_block_in_page;
558
if (!cur_block->next_block_in_page)
559
entry.last_block_in_page = prev_block;
560
561
cur_block->next_block_in_page = nullptr;
562
break;
563
}
564
}
565
566
void CPU::CodeCache::InvalidateBlocksWithPageIndex(u32 index)
567
{
568
DebugAssert(index < Bus::RAM_8MB_CODE_PAGE_COUNT);
569
Bus::ClearRAMCodePage(index);
570
571
BlockState new_block_state = BlockState::Invalidated;
572
PageProtectionInfo& ppi = s_page_protection[index];
573
574
const u32 frame_number = System::GetFrameNumber();
575
const u32 frame_delta = frame_number - ppi.invalidate_frame;
576
ppi.invalidate_count++;
577
578
if (frame_delta >= INVALIDATE_FRAMES_FOR_MANUAL_PROTECTION)
579
{
580
ppi.invalidate_count = 1;
581
ppi.invalidate_frame = frame_number;
582
}
583
else if (ppi.invalidate_count > INVALIDATE_COUNT_FOR_MANUAL_PROTECTION)
584
{
585
DEV_LOG("{} invalidations in {} frames to page {} [0x{:08X} -> 0x{:08X}], switching to manual protection",
586
ppi.invalidate_count, frame_delta, index, (index << HOST_PAGE_SHIFT), ((index + 1) << HOST_PAGE_SHIFT));
587
ppi.mode = PageProtectionMode::ManualCheck;
588
new_block_state = BlockState::NeedsRecompile;
589
}
590
591
if (!ppi.first_block_in_page)
592
return;
593
594
MemMap::BeginCodeWrite();
595
596
Block* block = ppi.first_block_in_page;
597
while (block)
598
{
599
InvalidateBlock(block, new_block_state);
600
block = std::exchange(block->next_block_in_page, nullptr);
601
}
602
603
ppi.first_block_in_page = nullptr;
604
ppi.last_block_in_page = nullptr;
605
606
MemMap::EndCodeWrite();
607
}
608
609
CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForPC(u32 pc)
610
{
611
if (!AddressInRAM(pc))
612
return PageProtectionMode::Unprotected;
613
614
const u32 page_idx = Bus::GetRAMCodePageIndex(pc);
615
return s_page_protection[page_idx].mode;
616
}
617
618
CPU::CodeCache::PageProtectionMode CPU::CodeCache::GetProtectionModeForBlock(const Block* block)
619
{
620
// if the block has a branch delay slot crossing a page, we must use manual protection.
621
// no other way about it.
622
if (block->HasFlag(BlockFlags::BranchDelaySpansPages))
623
return PageProtectionMode::ManualCheck;
624
625
return GetProtectionModeForPC(block->pc);
626
}
627
628
void CPU::CodeCache::InvalidateBlock(Block* block, BlockState new_state)
629
{
630
if (block->state == BlockState::Valid)
631
{
632
SetCodeLUT(block->pc, g_compile_or_revalidate_block);
633
BacklinkBlocks(block->pc, g_compile_or_revalidate_block);
634
}
635
636
block->state = new_state;
637
}
638
639
void CPU::CodeCache::InvalidateAllRAMBlocks()
640
{
641
// TODO: maybe combine the backlink into one big instruction flush cache?
642
MemMap::BeginCodeWrite();
643
644
for (Block* block : s_blocks)
645
{
646
if (AddressInRAM(block->pc))
647
{
648
InvalidateBlock(block, BlockState::Invalidated);
649
block->next_block_in_page = nullptr;
650
}
651
}
652
653
for (PageProtectionInfo& ppi : s_page_protection)
654
{
655
ppi.first_block_in_page = nullptr;
656
ppi.last_block_in_page = nullptr;
657
}
658
659
MemMap::EndCodeWrite();
660
Bus::ClearRAMCodePageFlags();
661
}
662
663
void CPU::CodeCache::ClearBlocks()
664
{
665
for (u32 i = 0; i < Bus::RAM_8MB_CODE_PAGE_COUNT; i++)
666
{
667
PageProtectionInfo& ppi = s_page_protection[i];
668
if (ppi.mode == PageProtectionMode::WriteProtected && ppi.first_block_in_page)
669
Bus::ClearRAMCodePage(i);
670
671
ppi = {};
672
}
673
674
s_fastmem_backpatch_info.clear();
675
s_fastmem_faulting_pcs.clear();
676
s_block_links.clear();
677
678
for (Block* block : s_blocks)
679
{
680
block->~Block();
681
Common::AlignedFree(block);
682
}
683
s_blocks.clear();
684
685
std::memset(s_lut_block_pointers.get(), 0, sizeof(Block*) * GetLUTSlotCount(false));
686
}
687
688
PageFaultHandler::HandlerResult PageFaultHandler::HandlePageFault(void* exception_pc, void* fault_address,
689
bool is_write)
690
{
691
if (static_cast<const u8*>(fault_address) >= Bus::g_ram &&
692
static_cast<const u8*>(fault_address) < (Bus::g_ram + Bus::RAM_8MB_SIZE))
693
{
694
// Writing to protected RAM.
695
DebugAssert(is_write);
696
const u32 guest_address = static_cast<u32>(static_cast<const u8*>(fault_address) - Bus::g_ram);
697
const u32 page_index = Bus::GetRAMCodePageIndex(guest_address);
698
DEV_LOG("Page fault on protected RAM @ 0x{:08X} (page #{}), invalidating code cache.", guest_address, page_index);
699
CPU::CodeCache::InvalidateBlocksWithPageIndex(page_index);
700
return PageFaultHandler::HandlerResult::ContinueExecution;
701
}
702
703
return CPU::CodeCache::HandleFastmemException(exception_pc, fault_address, is_write);
704
}
705
706
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
707
// MARK: - Cached Interpreter
708
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
709
710
CPU::CodeCache::Block* CPU::CodeCache::CreateCachedInterpreterBlock(u32 pc)
711
{
712
BlockMetadata metadata = {};
713
ReadBlockInstructions(pc, &s_block_instructions, &metadata);
714
return CreateBlock(pc, s_block_instructions, metadata);
715
}
716
717
template<PGXPMode pgxp_mode>
718
[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreterImpl()
719
{
720
#define CHECK_DOWNCOUNT() \
721
if (g_state.pending_ticks >= g_state.downcount) \
722
break;
723
724
if (g_state.pending_ticks >= g_state.downcount)
725
TimingEvents::RunEvents();
726
727
for (;;)
728
{
729
for (;;)
730
{
731
#if 0
732
LogCurrentState();
733
#endif
734
#if 0
735
if ((g_state.pending_ticks + TimingEvents::GetGlobalTickCounter()) == 3301006214)
736
__debugbreak();
737
#endif
738
// Manually done because we don't want to compile blocks without a LUT.
739
const u32 pc = g_state.pc;
740
const u32 table = pc >> LUT_TABLE_SHIFT;
741
Block* block;
742
if (s_block_lut[table])
743
{
744
const u32 idx = (pc & 0xFFFF) >> 2;
745
block = s_block_lut[table][idx];
746
}
747
else
748
{
749
// Likely invalid code...
750
goto interpret_block;
751
}
752
753
reexecute_block:
754
if (!block)
755
{
756
if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]]
757
goto interpret_block;
758
}
759
else
760
{
761
if (block->state == BlockState::FallbackToInterpreter) [[unlikely]]
762
goto interpret_block;
763
764
if ((block->state != BlockState::Valid && !RevalidateBlock(block)) ||
765
(block->protection == PageProtectionMode::ManualCheck && !IsBlockCodeCurrent(block)))
766
{
767
if ((block = CreateCachedInterpreterBlock(pc))->size == 0) [[unlikely]]
768
goto interpret_block;
769
}
770
}
771
772
DebugAssert(!(HasPendingInterrupt()));
773
if (block->HasFlag(BlockFlags::IsUsingICache))
774
{
775
CheckAndUpdateICacheTags(block->icache_line_count);
776
}
777
else if (block->HasFlag(BlockFlags::NeedsDynamicFetchTicks))
778
{
779
AddPendingTicks(static_cast<TickCount>(
780
block->size * static_cast<u32>(*Bus::GetMemoryAccessTimePtr(block->pc & KSEG_MASK, MemoryAccessSize::Word))));
781
}
782
else
783
{
784
AddPendingTicks(block->uncached_fetch_ticks);
785
}
786
787
InterpretCachedBlock<pgxp_mode>(block);
788
789
CHECK_DOWNCOUNT();
790
791
// Handle self-looping blocks
792
if (g_state.pc == block->pc)
793
goto reexecute_block;
794
else
795
continue;
796
797
interpret_block:
798
InterpretUncachedBlock<pgxp_mode>();
799
CHECK_DOWNCOUNT();
800
continue;
801
}
802
803
TimingEvents::RunEvents();
804
}
805
}
806
807
[[noreturn]] void CPU::CodeCache::ExecuteCachedInterpreter()
808
{
809
if (g_settings.gpu_pgxp_enable)
810
{
811
if (g_settings.gpu_pgxp_cpu)
812
ExecuteCachedInterpreterImpl<PGXPMode::CPU>();
813
else
814
ExecuteCachedInterpreterImpl<PGXPMode::Memory>();
815
}
816
else
817
{
818
ExecuteCachedInterpreterImpl<PGXPMode::Disabled>();
819
}
820
}
821
822
void CPU::CodeCache::LogCurrentState()
823
{
824
#if 0
825
if (System::GetGlobalTickCounter() == 2546728915)
826
__debugbreak();
827
#endif
828
#if 0
829
if (System::GetGlobalTickCounter() < 2546729174)
830
return;
831
#endif
832
833
const auto& regs = g_state.regs;
834
WriteToExecutionLog(
835
"tick=%" PRIu64
836
" dc=%u/%u pc=%08X at=%08X v0=%08X v1=%08X a0=%08X a1=%08X a2=%08X a3=%08X t0=%08X t1=%08X t2=%08X t3=%08X t4=%08X "
837
"t5=%08X t6=%08X t7=%08X s0=%08X s1=%08X s2=%08X s3=%08X s4=%08X s5=%08X s6=%08X s7=%08X t8=%08X t9=%08X k0=%08X "
838
"k1=%08X gp=%08X sp=%08X fp=%08X ra=%08X hi=%08X lo=%08X ldr=%s ldv=%08X cause=%08X sr=%08X gte=%08X\n",
839
System::GetGlobalTickCounter(), g_state.pending_ticks, g_state.downcount, g_state.pc, regs.at, regs.v0, regs.v1,
840
regs.a0, regs.a1, regs.a2, regs.a3, regs.t0, regs.t1, regs.t2, regs.t3, regs.t4, regs.t5, regs.t6, regs.t7, regs.s0,
841
regs.s1, regs.s2, regs.s3, regs.s4, regs.s5, regs.s6, regs.s7, regs.t8, regs.t9, regs.k0, regs.k1, regs.gp, regs.sp,
842
regs.fp, regs.ra, regs.hi, regs.lo,
843
(g_state.next_load_delay_reg == Reg::count) ? "NONE" : GetRegName(g_state.next_load_delay_reg),
844
(g_state.next_load_delay_reg == Reg::count) ? 0 : g_state.next_load_delay_value, g_state.cop0_regs.cause.bits,
845
g_state.cop0_regs.sr.bits, static_cast<u32>(crc32(0, (const Bytef*)&g_state.gte_regs, sizeof(g_state.gte_regs))));
846
}
847
848
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
849
// MARK: - Block Compilation: Shared Code
850
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
851
852
bool CPU::CodeCache::ReadBlockInstructions(u32 start_pc, BlockInstructionList* instructions, BlockMetadata* metadata)
853
{
854
// TODO: Jump to other block if it exists at this pc?
855
856
const PageProtectionMode protection = GetProtectionModeForPC(start_pc);
857
const bool use_icache = CPU::IsCachedAddress(start_pc);
858
const bool dynamic_fetch_ticks =
859
(!use_icache && Bus::GetMemoryAccessTimePtr(VirtualAddressToPhysical(start_pc), MemoryAccessSize::Word) != nullptr);
860
u32 pc = start_pc;
861
bool is_branch_delay_slot = false;
862
bool is_load_delay_slot = false;
863
864
#if 0
865
if (pc == 0x0005aa90)
866
__debugbreak();
867
#endif
868
869
instructions->clear();
870
metadata->icache_line_count = 0;
871
metadata->uncached_fetch_ticks = 0;
872
metadata->flags = use_icache ? BlockFlags::IsUsingICache :
873
(dynamic_fetch_ticks ? BlockFlags::NeedsDynamicFetchTicks : BlockFlags::None);
874
875
u32 last_cache_line = ICACHE_LINES;
876
u32 last_page = (protection == PageProtectionMode::WriteProtected) ? Bus::GetRAMCodePageIndex(start_pc) : 0;
877
878
for (;;)
879
{
880
if (protection == PageProtectionMode::WriteProtected)
881
{
882
const u32 this_page = Bus::GetRAMCodePageIndex(pc);
883
if (this_page != last_page)
884
{
885
// if we're just crossing the page and not in a branch delay slot, jump directly to the next block
886
if (!is_branch_delay_slot)
887
{
888
DEV_LOG("Breaking block 0x{:08X} at 0x{:08X} due to page crossing", start_pc, pc);
889
metadata->flags |= BlockFlags::SpansPages;
890
break;
891
}
892
else
893
{
894
// otherwise, we need to use manual protection in case the delay slot changes.
895
// may as well keep going then, since we're doing manual check anyways.
896
DEV_LOG("Block 0x{:08X} has branch delay slot crossing page at 0x{:08X}, forcing manual protection", start_pc,
897
pc);
898
metadata->flags |= BlockFlags::BranchDelaySpansPages;
899
}
900
}
901
}
902
903
Instruction instruction;
904
if (!SafeReadInstruction(pc, &instruction.bits) || !IsValidInstruction(instruction))
905
{
906
// Away to the int you go!
907
ERROR_LOG("Instruction read failed at PC=0x{:08X}, truncating block.", pc);
908
909
// If the last instruction was a branch, we need the delay slot in the block to compile it.
910
if (is_branch_delay_slot)
911
instructions->pop_back();
912
913
break;
914
}
915
916
InstructionInfo info;
917
std::memset(&info, 0, sizeof(info));
918
919
info.is_branch_delay_slot = is_branch_delay_slot;
920
info.is_load_delay_slot = is_load_delay_slot;
921
info.is_branch_instruction = IsBranchInstruction(instruction);
922
info.is_direct_branch_instruction = IsDirectBranchInstruction(instruction);
923
info.is_unconditional_branch_instruction = IsUnconditionalBranchInstruction(instruction);
924
info.is_load_instruction = IsMemoryLoadInstruction(instruction);
925
info.is_store_instruction = IsMemoryStoreInstruction(instruction);
926
info.has_load_delay = InstructionHasLoadDelay(instruction);
927
928
if (use_icache)
929
{
930
if (g_settings.cpu_recompiler_icache)
931
{
932
const u32 icache_line = GetICacheLine(pc);
933
if (icache_line != last_cache_line)
934
{
935
metadata->icache_line_count++;
936
last_cache_line = icache_line;
937
}
938
}
939
}
940
else if (!dynamic_fetch_ticks)
941
{
942
metadata->uncached_fetch_ticks += GetInstructionReadTicks(pc);
943
}
944
945
if (info.is_load_instruction || info.is_store_instruction)
946
metadata->flags |= BlockFlags::ContainsLoadStoreInstructions;
947
948
pc += sizeof(Instruction);
949
950
if (is_branch_delay_slot && info.is_branch_instruction)
951
{
952
const BlockInstructionInfoPair& prev = instructions->back();
953
if (!prev.second.is_unconditional_branch_instruction || !prev.second.is_direct_branch_instruction)
954
{
955
WARNING_LOG("Conditional or indirect branch delay slot at {:08X}, skipping block", pc);
956
return false;
957
}
958
if (!IsDirectBranchInstruction(instruction))
959
{
960
WARNING_LOG("Indirect branch in delay slot at {:08X}, skipping block", pc);
961
return false;
962
}
963
964
// we _could_ fetch the delay slot from the first branch's target, but it's probably in a different
965
// page, and that's an invalidation nightmare. so just fallback to the int, this is very rare anyway.
966
WARNING_LOG("Direct branch in delay slot at {:08X}, skipping block", pc);
967
return false;
968
}
969
970
// instruction is decoded now
971
instructions->emplace_back(instruction, info);
972
973
// if we're in a branch delay slot, the block is now done
974
// except if this is a branch in a branch delay slot, then we grab the one after that, and so on...
975
if (is_branch_delay_slot && !info.is_branch_instruction)
976
break;
977
978
// if this is a branch, we grab the next instruction (delay slot), and then exit
979
is_branch_delay_slot = info.is_branch_instruction;
980
981
// same for load delay
982
is_load_delay_slot = info.has_load_delay;
983
984
// is this a non-branchy exit? (e.g. syscall)
985
if (IsExitBlockInstruction(instruction))
986
break;
987
}
988
989
if (instructions->empty())
990
{
991
WARNING_LOG("Empty block compiled at 0x{:08X}", start_pc);
992
return false;
993
}
994
995
instructions->back().second.is_last_instruction = true;
996
997
#if defined(_DEBUG) || defined(_DEVEL)
998
SmallString disasm;
999
u32 disasm_pc = start_pc;
1000
DEBUG_LOG("Block at 0x{:08X}", start_pc);
1001
DEBUG_LOG(" Uncached fetch ticks: {}", metadata->uncached_fetch_ticks);
1002
DEBUG_LOG(" ICache line count: {}", metadata->icache_line_count);
1003
for (const auto& cbi : *instructions)
1004
{
1005
CPU::DisassembleInstruction(&disasm, disasm_pc, cbi.first.bits);
1006
DEBUG_LOG("[{} {} 0x{:08X}] {:08X} {}", cbi.second.is_branch_delay_slot ? "BD" : " ",
1007
cbi.second.is_load_delay_slot ? "LD" : " ", disasm_pc, cbi.first.bits, disasm);
1008
disasm_pc += sizeof(Instruction);
1009
}
1010
#endif
1011
1012
return true;
1013
}
1014
1015
void CPU::CodeCache::CopyRegInfo(InstructionInfo* dst, const InstructionInfo* src)
1016
{
1017
std::memcpy(dst->reg_flags, src->reg_flags, sizeof(dst->reg_flags));
1018
std::memcpy(dst->read_reg, src->read_reg, sizeof(dst->read_reg));
1019
}
1020
1021
void CPU::CodeCache::SetRegAccess(InstructionInfo* inst, Reg reg, bool write)
1022
{
1023
if (reg == Reg::zero)
1024
return;
1025
1026
if (!write)
1027
{
1028
for (u32 i = 0; i < std::size(inst->read_reg); i++)
1029
{
1030
if (inst->read_reg[i] == Reg::zero)
1031
{
1032
inst->read_reg[i] = reg;
1033
break;
1034
}
1035
}
1036
}
1037
else
1038
{
1039
#if 0
1040
for (u32 i = 0; i < std::size(inst->write_reg); i++)
1041
{
1042
if (inst->write_reg[i] == Reg::zero)
1043
{
1044
inst->write_reg[i] = reg;
1045
break;
1046
}
1047
}
1048
#endif
1049
}
1050
}
1051
1052
#define BackpropSetReads(reg) \
1053
do \
1054
{ \
1055
if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED)) \
1056
inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE; \
1057
prev->reg_flags[static_cast<u8>(reg)] |= RI_LIVE | RI_USED; \
1058
inst->reg_flags[static_cast<u8>(reg)] |= RI_USED; \
1059
SetRegAccess(inst, reg, false); \
1060
} while (0)
1061
1062
#define BackpropSetWrites(reg) \
1063
do \
1064
{ \
1065
prev->reg_flags[static_cast<u8>(reg)] &= ~(RI_LIVE | RI_USED); \
1066
if (!(inst->reg_flags[static_cast<u8>(reg)] & RI_USED)) \
1067
inst->reg_flags[static_cast<u8>(reg)] |= RI_LASTUSE; \
1068
inst->reg_flags[static_cast<u8>(reg)] |= RI_USED; \
1069
SetRegAccess(inst, reg, true); \
1070
} while (0)
1071
1072
// TODO: memory loads should be delayed one instruction because of stupid load delays.
1073
#define BackpropSetWritesDelayed(reg) BackpropSetWrites(reg)
1074
1075
void CPU::CodeCache::FillBlockRegInfo(Block* block)
1076
{
1077
const Instruction* iinst = block->Instructions() + (block->size - 1);
1078
InstructionInfo* const start = block->InstructionsInfo();
1079
InstructionInfo* inst = start + (block->size - 1);
1080
std::memset(inst->reg_flags, RI_LIVE, sizeof(inst->reg_flags));
1081
std::memset(inst->read_reg, 0, sizeof(inst->read_reg));
1082
// std::memset(inst->write_reg, 0, sizeof(inst->write_reg));
1083
1084
while (inst != start)
1085
{
1086
InstructionInfo* prev = inst - 1;
1087
CopyRegInfo(prev, inst);
1088
1089
const Reg rs = iinst->r.rs;
1090
const Reg rt = iinst->r.rt;
1091
1092
switch (iinst->op)
1093
{
1094
case InstructionOp::funct:
1095
{
1096
const Reg rd = iinst->r.rd;
1097
1098
switch (iinst->r.funct)
1099
{
1100
case InstructionFunct::sll:
1101
case InstructionFunct::srl:
1102
case InstructionFunct::sra:
1103
BackpropSetWrites(rd);
1104
BackpropSetReads(rt);
1105
break;
1106
1107
case InstructionFunct::sllv:
1108
case InstructionFunct::srlv:
1109
case InstructionFunct::srav:
1110
case InstructionFunct::add:
1111
case InstructionFunct::addu:
1112
case InstructionFunct::sub:
1113
case InstructionFunct::subu:
1114
case InstructionFunct::and_:
1115
case InstructionFunct::or_:
1116
case InstructionFunct::xor_:
1117
case InstructionFunct::nor:
1118
case InstructionFunct::slt:
1119
case InstructionFunct::sltu:
1120
BackpropSetWrites(rd);
1121
BackpropSetReads(rt);
1122
BackpropSetReads(rs);
1123
break;
1124
1125
case InstructionFunct::jr:
1126
BackpropSetReads(rs);
1127
break;
1128
1129
case InstructionFunct::jalr:
1130
BackpropSetReads(rs);
1131
BackpropSetWrites(rd);
1132
break;
1133
1134
case InstructionFunct::mfhi:
1135
BackpropSetWrites(rd);
1136
BackpropSetReads(Reg::hi);
1137
break;
1138
1139
case InstructionFunct::mflo:
1140
BackpropSetWrites(rd);
1141
BackpropSetReads(Reg::lo);
1142
break;
1143
1144
case InstructionFunct::mthi:
1145
BackpropSetWrites(Reg::hi);
1146
BackpropSetReads(rs);
1147
break;
1148
1149
case InstructionFunct::mtlo:
1150
BackpropSetWrites(Reg::lo);
1151
BackpropSetReads(rs);
1152
break;
1153
1154
case InstructionFunct::mult:
1155
case InstructionFunct::multu:
1156
case InstructionFunct::div:
1157
case InstructionFunct::divu:
1158
BackpropSetWrites(Reg::hi);
1159
BackpropSetWrites(Reg::lo);
1160
BackpropSetReads(rs);
1161
BackpropSetReads(rt);
1162
break;
1163
1164
case InstructionFunct::syscall:
1165
case InstructionFunct::break_:
1166
break;
1167
1168
default:
1169
ERROR_LOG("Unknown funct {}", static_cast<u32>(iinst->r.funct.GetValue()));
1170
break;
1171
}
1172
}
1173
break;
1174
1175
case InstructionOp::b:
1176
{
1177
if ((static_cast<u8>(iinst->i.rt.GetValue()) & u8(0x1E)) == u8(0x10))
1178
BackpropSetWrites(Reg::ra);
1179
BackpropSetReads(rs);
1180
}
1181
break;
1182
1183
case InstructionOp::j:
1184
break;
1185
1186
case InstructionOp::jal:
1187
BackpropSetWrites(Reg::ra);
1188
break;
1189
1190
case InstructionOp::beq:
1191
case InstructionOp::bne:
1192
BackpropSetReads(rs);
1193
BackpropSetReads(rt);
1194
break;
1195
1196
case InstructionOp::blez:
1197
case InstructionOp::bgtz:
1198
BackpropSetReads(rs);
1199
break;
1200
1201
case InstructionOp::addi:
1202
case InstructionOp::addiu:
1203
case InstructionOp::slti:
1204
case InstructionOp::sltiu:
1205
case InstructionOp::andi:
1206
case InstructionOp::ori:
1207
case InstructionOp::xori:
1208
BackpropSetWrites(rt);
1209
BackpropSetReads(rs);
1210
break;
1211
1212
case InstructionOp::lui:
1213
BackpropSetWrites(rt);
1214
break;
1215
1216
case InstructionOp::lb:
1217
case InstructionOp::lh:
1218
case InstructionOp::lw:
1219
case InstructionOp::lbu:
1220
case InstructionOp::lhu:
1221
BackpropSetWritesDelayed(rt);
1222
BackpropSetReads(rs);
1223
break;
1224
1225
case InstructionOp::lwl:
1226
case InstructionOp::lwr:
1227
BackpropSetWritesDelayed(rt);
1228
BackpropSetReads(rs);
1229
BackpropSetReads(rt);
1230
break;
1231
1232
case InstructionOp::sb:
1233
case InstructionOp::sh:
1234
case InstructionOp::swl:
1235
case InstructionOp::sw:
1236
case InstructionOp::swr:
1237
BackpropSetReads(rt);
1238
BackpropSetReads(rs);
1239
break;
1240
1241
case InstructionOp::cop0:
1242
case InstructionOp::cop2:
1243
{
1244
if (iinst->cop.IsCommonInstruction())
1245
{
1246
switch (iinst->cop.CommonOp())
1247
{
1248
case CopCommonInstruction::mfcn:
1249
case CopCommonInstruction::cfcn:
1250
BackpropSetWritesDelayed(rt);
1251
break;
1252
1253
case CopCommonInstruction::mtcn:
1254
case CopCommonInstruction::ctcn:
1255
BackpropSetReads(rt);
1256
break;
1257
}
1258
}
1259
break;
1260
1261
case InstructionOp::lwc2:
1262
case InstructionOp::swc2:
1263
BackpropSetReads(rs);
1264
BackpropSetReads(rt);
1265
break;
1266
1267
default:
1268
ERROR_LOG("Unknown op {}", static_cast<u32>(iinst->op.GetValue()));
1269
break;
1270
}
1271
} // end switch
1272
1273
inst--;
1274
iinst--;
1275
} // end while
1276
}
1277
1278
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1279
// MARK: - Recompiler Glue
1280
////////////////////////////////////////////////////////////////////////////////////////////////////////////////////////
1281
1282
void CPU::CodeCache::CompileOrRevalidateBlock(u32 start_pc)
1283
{
1284
// TODO: this doesn't currently handle when the cache overflows...
1285
DebugAssert(IsUsingRecompiler());
1286
MemMap::BeginCodeWrite();
1287
1288
Block* block = LookupBlock(start_pc);
1289
if (block)
1290
{
1291
// we should only be here if the block got invalidated
1292
DebugAssert(block->state != BlockState::Valid);
1293
if (RevalidateBlock(block))
1294
{
1295
DebugAssert(block->host_code);
1296
SetCodeLUT(start_pc, block->host_code);
1297
BacklinkBlocks(start_pc, block->host_code);
1298
MemMap::EndCodeWrite();
1299
return;
1300
}
1301
1302
// remove outward links from this block, since we're recompiling it
1303
UnlinkBlockExits(block);
1304
1305
// clean up backpatch info so it doesn't keep growing indefinitely
1306
if (block->HasFlag(BlockFlags::ContainsLoadStoreInstructions))
1307
RemoveBackpatchInfoForRange(block->host_code, block->host_code_size);
1308
}
1309
1310
BlockMetadata metadata = {};
1311
if (!ReadBlockInstructions(start_pc, &s_block_instructions, &metadata))
1312
{
1313
ERROR_LOG("Failed to read block at 0x{:08X}, falling back to uncached interpreter", start_pc);
1314
SetCodeLUT(start_pc, g_interpret_block);
1315
BacklinkBlocks(start_pc, g_interpret_block);
1316
MemMap::EndCodeWrite();
1317
return;
1318
}
1319
1320
// Ensure we're not going to run out of space while compiling this block.
1321
// We could definitely do better here...
1322
const u32 block_size = static_cast<u32>(s_block_instructions.size());
1323
const u32 free_code_space = GetFreeCodeSpace();
1324
const u32 free_far_code_space = GetFreeFarCodeSpace();
1325
if (free_code_space < (block_size * Recompiler::MAX_NEAR_HOST_BYTES_PER_INSTRUCTION) ||
1326
free_code_space < Recompiler::MIN_CODE_RESERVE_FOR_BLOCK ||
1327
free_far_code_space < Recompiler::MIN_CODE_RESERVE_FOR_BLOCK)
1328
{
1329
ERROR_LOG("Out of code space while compiling {:08X}. Resetting code cache.", start_pc);
1330
CodeCache::Reset();
1331
}
1332
1333
if ((block = CreateBlock(start_pc, s_block_instructions, metadata)) == nullptr || block->size == 0 ||
1334
!CompileBlock(block))
1335
{
1336
ERROR_LOG("Failed to compile block at 0x{:08X}, falling back to uncached interpreter", start_pc);
1337
SetCodeLUT(start_pc, g_interpret_block);
1338
BacklinkBlocks(start_pc, g_interpret_block);
1339
MemMap::EndCodeWrite();
1340
return;
1341
}
1342
1343
SetCodeLUT(start_pc, block->host_code);
1344
BacklinkBlocks(start_pc, block->host_code);
1345
MemMap::EndCodeWrite();
1346
}
1347
1348
void CPU::CodeCache::DiscardAndRecompileBlock(u32 start_pc)
1349
{
1350
MemMap::BeginCodeWrite();
1351
1352
DEV_LOG("Discard block {:08X} with manual protection", start_pc);
1353
Block* block = LookupBlock(start_pc);
1354
DebugAssert(block && block->state == BlockState::Valid);
1355
InvalidateBlock(block, BlockState::NeedsRecompile);
1356
CompileOrRevalidateBlock(start_pc);
1357
1358
MemMap::EndCodeWrite();
1359
}
1360
1361
const void* CPU::CodeCache::CreateBlockLink(Block* block, void* code, u32 newpc)
1362
{
1363
// self-linking should be handled by the caller
1364
DebugAssert(newpc != block->pc);
1365
1366
const void* dst = g_dispatcher;
1367
if (g_settings.cpu_recompiler_block_linking)
1368
{
1369
const Block* next_block = LookupBlock(newpc);
1370
if (next_block)
1371
{
1372
dst = (next_block->state == BlockState::Valid) ?
1373
next_block->host_code :
1374
((next_block->state == BlockState::FallbackToInterpreter) ? g_interpret_block :
1375
g_compile_or_revalidate_block);
1376
DebugAssert(dst);
1377
}
1378
else
1379
{
1380
dst = HasBlockLUT(newpc) ? g_compile_or_revalidate_block : g_interpret_block;
1381
}
1382
1383
BlockLinkMap::iterator iter = s_block_links.emplace(newpc, code);
1384
DebugAssert(block->num_exit_links < MAX_BLOCK_EXIT_LINKS);
1385
block->exit_links[block->num_exit_links++] = iter;
1386
}
1387
1388
DEBUG_LOG("Linking {} with dst pc {:08X} to {}{}", code, newpc, dst,
1389
(dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
1390
return dst;
1391
}
1392
1393
const void* CPU::CodeCache::CreateSelfBlockLink(Block* block, void* code, const void* block_start)
1394
{
1395
const void* dst = g_dispatcher;
1396
if (g_settings.cpu_recompiler_block_linking)
1397
{
1398
dst = block_start;
1399
1400
BlockLinkMap::iterator iter = s_block_links.emplace(block->pc, code);
1401
DebugAssert(block->num_exit_links < MAX_BLOCK_EXIT_LINKS);
1402
block->exit_links[block->num_exit_links++] = iter;
1403
}
1404
1405
DEBUG_LOG("Self linking {} with dst pc {:08X} to {}", code, block->pc, dst);
1406
return dst;
1407
}
1408
1409
void CPU::CodeCache::BacklinkBlocks(u32 pc, const void* dst)
1410
{
1411
if (!g_settings.cpu_recompiler_block_linking)
1412
return;
1413
1414
const auto link_range = s_block_links.equal_range(pc);
1415
for (auto it = link_range.first; it != link_range.second; ++it)
1416
{
1417
DEBUG_LOG("Backlinking {} with dst pc {:08X} to {}{}", it->second, pc, dst,
1418
(dst == g_compile_or_revalidate_block) ? "[compiler]" : "");
1419
EmitJump(it->second, dst, true);
1420
}
1421
}
1422
1423
void CPU::CodeCache::UnlinkBlockExits(Block* block)
1424
{
1425
const u32 num_exit_links = block->num_exit_links;
1426
for (u32 i = 0; i < num_exit_links; i++)
1427
s_block_links.erase(block->exit_links[i]);
1428
block->num_exit_links = 0;
1429
}
1430
1431
void CPU::CodeCache::ResetCodeBuffer()
1432
{
1433
if (s_code_used > 0 || s_far_code_used > 0)
1434
{
1435
MemMap::BeginCodeWrite();
1436
1437
if (s_code_used > 0)
1438
{
1439
std::memset(s_code_ptr, 0, s_code_used);
1440
MemMap::FlushInstructionCache(s_code_ptr, s_code_used);
1441
}
1442
1443
if (s_far_code_used > 0)
1444
{
1445
std::memset(s_far_code_ptr, 0, s_far_code_used);
1446
MemMap::FlushInstructionCache(s_far_code_ptr, s_far_code_used);
1447
}
1448
1449
MemMap::EndCodeWrite();
1450
}
1451
1452
s_code_ptr = static_cast<u8*>(s_code_buffer_ptr);
1453
s_free_code_ptr = s_code_ptr;
1454
s_code_size = RECOMPILER_CODE_CACHE_SIZE - RECOMPILER_FAR_CODE_CACHE_SIZE;
1455
s_code_used = 0;
1456
1457
// Use half the far code size when memory exceptions aren't enabled. It's only used for backpatching.
1458
const u32 far_code_size = (!g_settings.cpu_recompiler_memory_exceptions) ? (RECOMPILER_FAR_CODE_CACHE_SIZE / 2) :
1459
RECOMPILER_FAR_CODE_CACHE_SIZE;
1460
s_far_code_size = far_code_size;
1461
s_far_code_ptr = (far_code_size > 0) ? (static_cast<u8*>(s_code_ptr) + s_code_size) : nullptr;
1462
s_free_far_code_ptr = s_far_code_ptr;
1463
s_far_code_used = 0;
1464
}
1465
1466
u8* CPU::CodeCache::GetFreeCodePointer()
1467
{
1468
return s_free_code_ptr;
1469
}
1470
1471
u32 CPU::CodeCache::GetFreeCodeSpace()
1472
{
1473
return s_code_size - s_code_used;
1474
}
1475
1476
void CPU::CodeCache::CommitCode(u32 length)
1477
{
1478
if (length == 0) [[unlikely]]
1479
return;
1480
1481
MemMap::FlushInstructionCache(s_free_code_ptr, length);
1482
1483
Assert(length <= (s_code_size - s_code_used));
1484
s_free_code_ptr += length;
1485
s_code_used += length;
1486
}
1487
1488
u8* CPU::CodeCache::GetFreeFarCodePointer()
1489
{
1490
return s_free_far_code_ptr;
1491
}
1492
1493
u32 CPU::CodeCache::GetFreeFarCodeSpace()
1494
{
1495
return s_far_code_size - s_far_code_used;
1496
}
1497
1498
void CPU::CodeCache::CommitFarCode(u32 length)
1499
{
1500
if (length == 0) [[unlikely]]
1501
return;
1502
1503
MemMap::FlushInstructionCache(s_free_far_code_ptr, length);
1504
1505
Assert(length <= (s_far_code_size - s_far_code_used));
1506
s_free_far_code_ptr += length;
1507
s_far_code_used += length;
1508
}
1509
1510
void CPU::CodeCache::AlignCode(u32 alignment)
1511
{
1512
DebugAssert(Common::IsPow2(alignment));
1513
const u32 num_padding_bytes =
1514
std::min(static_cast<u32>(Common::AlignUpPow2(reinterpret_cast<uintptr_t>(s_free_code_ptr), alignment) -
1515
reinterpret_cast<uintptr_t>(s_free_code_ptr)),
1516
GetFreeCodeSpace());
1517
1518
if (num_padding_bytes > 0)
1519
EmitAlignmentPadding(s_free_code_ptr, num_padding_bytes);
1520
1521
s_free_code_ptr += num_padding_bytes;
1522
s_code_used += num_padding_bytes;
1523
}
1524
1525
const void* CPU::CodeCache::GetInterpretUncachedBlockFunction()
1526
{
1527
if (g_settings.gpu_pgxp_enable)
1528
{
1529
if (g_settings.gpu_pgxp_cpu)
1530
return reinterpret_cast<const void*>(InterpretUncachedBlock<PGXPMode::CPU>);
1531
else
1532
return reinterpret_cast<const void*>(InterpretUncachedBlock<PGXPMode::Memory>);
1533
}
1534
else
1535
{
1536
return reinterpret_cast<const void*>(InterpretUncachedBlock<PGXPMode::Disabled>);
1537
}
1538
}
1539
1540
void CPU::CodeCache::CompileASMFunctions()
1541
{
1542
MemMap::BeginCodeWrite();
1543
1544
#ifdef DUMP_CODE_SIZE_STATS
1545
s_total_instructions_compiled = 0;
1546
s_total_host_instructions_emitted = 0;
1547
s_total_host_code_used_by_instructions = 0;
1548
#endif
1549
1550
const u32 asm_size = EmitASMFunctions(GetFreeCodePointer(), GetFreeCodeSpace());
1551
1552
#ifdef ENABLE_RECOMPILER_PROFILING
1553
MIPSPerfScope.Register(GetFreeCodePointer(), asm_size, "ASMFunctions");
1554
#endif
1555
1556
CommitCode(asm_size);
1557
MemMap::EndCodeWrite();
1558
}
1559
1560
bool CPU::CodeCache::CompileBlock(Block* block)
1561
{
1562
const void* host_code = nullptr;
1563
u32 host_code_size = 0;
1564
u32 host_far_code_size = 0;
1565
1566
#ifdef ENABLE_RECOMPILER
1567
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
1568
host_code = g_compiler->CompileBlock(block, &host_code_size, &host_far_code_size);
1569
#endif
1570
1571
block->host_code = host_code;
1572
block->host_code_size = host_code_size;
1573
1574
if (!host_code)
1575
{
1576
ERROR_LOG("Failed to compile host code for block at 0x{:08X}", block->pc);
1577
block->state = BlockState::FallbackToInterpreter;
1578
return false;
1579
}
1580
1581
#ifdef DUMP_CODE_SIZE_STATS
1582
const u32 host_instructions = GetHostInstructionCount(host_code, host_code_size);
1583
s_total_instructions_compiled += block->size;
1584
s_total_host_instructions_emitted += host_instructions;
1585
s_total_host_code_used_by_instructions += host_code_size;
1586
1587
DEV_LOG(
1588
"0x{:08X}: {}/{}b for {}b ({}i), blowup: {:.2f}x, cache: {:.2f}%/{:.2f}%, ipi: {:.2f}/{:.2f}, bpi: {:.2f}/{:.2f}",
1589
block->pc, host_code_size, host_far_code_size, block->size * 4, block->size,
1590
static_cast<float>(host_code_size) / static_cast<float>(block->size * 4),
1591
(static_cast<float>(s_code_used) / static_cast<float>(s_code_size)) * 100.0f,
1592
(static_cast<float>(s_far_code_used) / static_cast<float>(s_far_code_size)) * 100.0f,
1593
static_cast<float>(host_instructions) / static_cast<float>(block->size),
1594
static_cast<float>(s_total_host_instructions_emitted) / static_cast<float>(s_total_instructions_compiled),
1595
static_cast<float>(block->host_code_size) / static_cast<float>(block->size),
1596
static_cast<float>(s_total_host_code_used_by_instructions) / static_cast<float>(s_total_instructions_compiled));
1597
#endif
1598
1599
#if 0
1600
Log_DebugPrint("***HOST CODE**");
1601
DisassembleAndLogHostCode(host_code, host_code_size);
1602
#endif
1603
1604
#ifdef ENABLE_RECOMPILER_PROFILING
1605
MIPSPerfScope.RegisterPC(host_code, host_code_size, block->pc);
1606
#endif
1607
1608
return true;
1609
}
1610
1611
void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, const void* thunk_address)
1612
{
1613
DebugAssert(code_size < std::numeric_limits<u8>::max());
1614
1615
auto iter = s_fastmem_backpatch_info.find(code_address);
1616
if (iter != s_fastmem_backpatch_info.end())
1617
s_fastmem_backpatch_info.erase(iter);
1618
1619
LoadstoreBackpatchInfo info;
1620
info.thunk_address = thunk_address;
1621
info.guest_pc = guest_pc;
1622
info.guest_block = 0;
1623
info.code_size = static_cast<u8>(code_size);
1624
s_fastmem_backpatch_info.emplace(code_address, info);
1625
}
1626
1627
void CPU::CodeCache::AddLoadStoreInfo(void* code_address, u32 code_size, u32 guest_pc, u32 guest_block,
1628
TickCount cycles, u32 gpr_bitmask, u8 address_register, u8 data_register,
1629
MemoryAccessSize size, bool is_signed, bool is_load)
1630
{
1631
DebugAssert(code_size < std::numeric_limits<u8>::max());
1632
DebugAssert(cycles >= 0 && cycles < std::numeric_limits<u16>::max());
1633
1634
auto iter = s_fastmem_backpatch_info.find(code_address);
1635
if (iter != s_fastmem_backpatch_info.end())
1636
s_fastmem_backpatch_info.erase(iter);
1637
1638
LoadstoreBackpatchInfo info;
1639
info.thunk_address = nullptr;
1640
info.guest_pc = guest_pc;
1641
info.guest_block = guest_block;
1642
info.gpr_bitmask = gpr_bitmask;
1643
info.cycles = static_cast<u16>(cycles);
1644
info.address_register = address_register;
1645
info.data_register = data_register;
1646
info.size = static_cast<u16>(size);
1647
info.is_signed = is_signed;
1648
info.is_load = is_load;
1649
info.code_size = static_cast<u8>(code_size);
1650
s_fastmem_backpatch_info.emplace(code_address, info);
1651
}
1652
1653
PageFaultHandler::HandlerResult CPU::CodeCache::HandleFastmemException(void* exception_pc, void* fault_address,
1654
bool is_write)
1655
{
1656
PhysicalMemoryAddress guest_address;
1657
1658
#ifdef ENABLE_MMAP_FASTMEM
1659
if (g_settings.cpu_fastmem_mode == CPUFastmemMode::MMap)
1660
{
1661
if (static_cast<u8*>(fault_address) < static_cast<u8*>(g_state.fastmem_base) ||
1662
(static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base)) >=
1663
static_cast<ptrdiff_t>(Bus::FASTMEM_ARENA_SIZE))
1664
{
1665
return PageFaultHandler::HandlerResult::ExecuteNextHandler;
1666
}
1667
1668
guest_address = static_cast<PhysicalMemoryAddress>(
1669
static_cast<ptrdiff_t>(static_cast<u8*>(fault_address) - static_cast<u8*>(g_state.fastmem_base)));
1670
1671
// if we're writing to ram, let it go through a few times, and use manual block protection to sort it out
1672
// TODO: path for manual protection to return back to read-only pages
1673
if (!g_state.cop0_regs.sr.Isc && GetSegmentForAddress(guest_address) != CPU::Segment::KSEG2 &&
1674
AddressInRAM(guest_address))
1675
{
1676
DebugAssert(is_write);
1677
DEV_LOG("Ignoring fault due to RAM write @ 0x{:08X}", guest_address);
1678
InvalidateBlocksWithPageIndex(Bus::GetRAMCodePageIndex(guest_address));
1679
return PageFaultHandler::HandlerResult::ContinueExecution;
1680
}
1681
}
1682
else
1683
#endif
1684
{
1685
// LUT fastmem - we can't compute the address.
1686
guest_address = std::numeric_limits<PhysicalMemoryAddress>::max();
1687
}
1688
1689
auto iter = s_fastmem_backpatch_info.find(exception_pc);
1690
if (iter == s_fastmem_backpatch_info.end())
1691
return PageFaultHandler::HandlerResult::ExecuteNextHandler;
1692
1693
DEV_LOG("Page fault handler invoked at PC={} Address={} {}, fastmem offset {:08X}", exception_pc, fault_address,
1694
is_write ? "(write)" : "(read)", guest_address);
1695
1696
LoadstoreBackpatchInfo& info = iter->second;
1697
DEV_LOG("Backpatching {} at {}[{}] (pc {:08X} addr {:08X}): Bitmask {:08X} Addr {} Data {} Size {} Signed {:02X}",
1698
info.is_load ? "load" : "store", exception_pc, info.code_size, info.guest_pc, guest_address, info.gpr_bitmask,
1699
static_cast<unsigned>(info.address_register), static_cast<unsigned>(info.data_register),
1700
info.AccessSizeInBytes(), static_cast<unsigned>(info.is_signed));
1701
1702
MemMap::BeginCodeWrite();
1703
1704
BackpatchLoadStore(exception_pc, info);
1705
1706
// queue block for recompilation later
1707
Block* block = LookupBlock(info.guest_block);
1708
if (block)
1709
{
1710
// This is a bit annoying, we have to remove it from the page list if it's a RAM block.
1711
DEV_LOG("Queuing block {:08X} for recompilation due to backpatch", block->pc);
1712
RemoveBlockFromPageList(block);
1713
InvalidateBlock(block, BlockState::NeedsRecompile);
1714
1715
// Need to reset the recompile count, otherwise it'll get trolled into an interpreter fallback.
1716
block->compile_frame = System::GetFrameNumber();
1717
block->compile_count = 1;
1718
}
1719
1720
MemMap::EndCodeWrite();
1721
1722
// and store the pc in the faulting list, so that we don't emit another fastmem loadstore
1723
s_fastmem_faulting_pcs.insert(info.guest_pc);
1724
s_fastmem_backpatch_info.erase(iter);
1725
return PageFaultHandler::HandlerResult::ContinueExecution;
1726
}
1727
1728
bool CPU::CodeCache::HasPreviouslyFaultedOnPC(u32 guest_pc)
1729
{
1730
return (s_fastmem_faulting_pcs.find(guest_pc) != s_fastmem_faulting_pcs.end());
1731
}
1732
1733
void CPU::CodeCache::BackpatchLoadStore(void* host_pc, const LoadstoreBackpatchInfo& info)
1734
{
1735
#ifdef ENABLE_RECOMPILER
1736
if (g_settings.cpu_execution_mode == CPUExecutionMode::Recompiler)
1737
Recompiler::BackpatchLoadStore(host_pc, info);
1738
#endif
1739
}
1740
1741
void CPU::CodeCache::RemoveBackpatchInfoForRange(const void* host_code, u32 size)
1742
{
1743
const u8* start = static_cast<const u8*>(host_code);
1744
const u8* end = start + size;
1745
1746
auto start_iter = s_fastmem_backpatch_info.lower_bound(start);
1747
if (start_iter == s_fastmem_backpatch_info.end())
1748
return;
1749
1750
// this might point to another block, so bail out in that case
1751
if (start_iter->first >= end)
1752
return;
1753
1754
// find the end point, or last instruction in the range
1755
auto end_iter = start_iter;
1756
do
1757
{
1758
++end_iter;
1759
} while (end_iter != s_fastmem_backpatch_info.end() && end_iter->first < end);
1760
1761
// erase the whole range at once
1762
s_fastmem_backpatch_info.erase(start_iter, end_iter);
1763
}
1764
1765