Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/IR/IRJit.cpp
5670 views
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#include <set>
20
#include <algorithm>
21
22
#include "ext/xxhash.h"
23
#include "Common/Profiler/Profiler.h"
24
25
#include "Common/Log.h"
26
#include "Common/Serialize/Serializer.h"
27
#include "Common/StringUtils.h"
28
29
#include "Core/Config.h"
30
#include "Core/Core.h"
31
#include "Core/CoreTiming.h"
32
#include "Core/HLE/sceKernelMemory.h"
33
#include "Core/MemMap.h"
34
#include "Core/MIPS/MIPS.h"
35
#include "Core/MIPS/MIPSCodeUtils.h"
36
#include "Core/MIPS/MIPSInt.h"
37
#include "Core/MIPS/MIPSTables.h"
38
#include "Core/MIPS/IR/IRRegCache.h"
39
#include "Core/MIPS/IR/IRInterpreter.h"
40
#include "Core/MIPS/IR/IRJit.h"
41
#include "Core/MIPS/IR/IRNativeCommon.h"
42
#include "Core/MIPS/JitCommon/JitCommon.h"
43
#include "Core/Reporting.h"
44
#include "Common/TimeUtil.h"
45
#include "Core/MIPS/MIPSTracer.h"
46
47
48
namespace MIPSComp {
49
50
IRJit::IRJit(MIPSState *mipsState, bool actualJit) : frontend_(mipsState->HasDefaultPrefix()), mips_(mipsState), blocks_(actualJit) {
51
// u32 size = 128 * 1024;
52
InitIR();
53
54
compileToNative_ = actualJit;
55
56
// If this IRJit instance will be used to drive a "JIT using IR", don't optimize for interpretation.
57
jo.optimizeForInterpreter = !actualJit;
58
59
IROptions opts{};
60
opts.disableFlags = g_Config.uJitDisableFlags;
61
#if PPSSPP_ARCH(RISCV64)
62
// Assume RISC-V always has very slow unaligned memory accesses.
63
opts.unalignedLoadStore = false;
64
opts.unalignedLoadStoreVec4 = true;
65
opts.preferVec4 = cpu_info.RiscV_V;
66
#elif PPSSPP_ARCH(ARM) || PPSSPP_ARCH(ARM64)
67
opts.unalignedLoadStore = (opts.disableFlags & (uint32_t)JitDisable::LSU_UNALIGNED) == 0;
68
opts.unalignedLoadStoreVec4 = true;
69
opts.preferVec4 = true;
70
#else
71
opts.unalignedLoadStore = (opts.disableFlags & (uint32_t)JitDisable::LSU_UNALIGNED) == 0;
72
// TODO: Could allow on x86 pretty easily...
73
opts.unalignedLoadStoreVec4 = false;
74
opts.preferVec4 = true;
75
#endif
76
opts.optimizeForInterpreter = jo.optimizeForInterpreter;
77
frontend_.SetOptions(opts);
78
}
79
80
IRJit::~IRJit() {
81
}
82
83
void IRJit::DoState(PointerWrap &p) {
84
frontend_.DoState(p);
85
}
86
87
void IRJit::UpdateFCR31() {
88
}
89
90
void IRJit::ClearCache() {
91
INFO_LOG(Log::JIT, "IRJit: Clearing the block cache!");
92
blocks_.Clear();
93
}
94
95
void IRJit::InvalidateCacheAt(u32 em_address, int length) {
96
std::vector<int> numbers = blocks_.FindInvalidatedBlockNumbers(em_address, length);
97
if (numbers.empty()) {
98
return;
99
}
100
101
DEBUG_LOG(Log::JIT, "Invalidating IR block cache at %08x (%d bytes): %d blocks", em_address, length, (int)numbers.size());
102
103
for (int block_num : numbers) {
104
auto block = blocks_.GetBlock(block_num);
105
// TODO: We are invalidating a lot of blocks that are already invalid (yu gi oh).
106
// INFO_LOG(Log::JIT, "Block at %08x invalidated: valid: %d", block->GetOriginalStart(), block->IsValid());
107
// If we're a native JIT (IR->JIT, not just IR interpreter), we write native offsets into the blocks.
108
int cookie = compileToNative_ ? block->GetNativeOffset() : block->GetIRArenaOffset();
109
blocks_.RemoveBlockFromPageLookup(block_num);
110
block->Destroy(cookie);
111
}
112
}
113
114
void IRJit::Compile(u32 em_address) {
115
_dbg_assert_(compilerEnabled_);
116
117
PROFILE_THIS_SCOPE("jitc");
118
119
std::vector<IRInst> instructions;
120
u32 mipsBytes;
121
if (!CompileBlock(em_address, instructions, mipsBytes)) {
122
// Ran out of block numbers - need to reset.
123
ERROR_LOG(Log::JIT, "Ran out of block numbers, clearing cache");
124
ClearCache();
125
CompileBlock(em_address, instructions, mipsBytes);
126
}
127
128
if (frontend_.CheckRounding(em_address)) {
129
// Our assumptions are all wrong so it's clean-slate time.
130
ClearCache();
131
CompileBlock(em_address, instructions, mipsBytes);
132
}
133
}
134
135
// WARNING! This can be called from IRInterpret / the JIT, through the function preload stuff!
136
bool IRJit::CompileBlock(u32 em_address, std::vector<IRInst> &instructions, u32 &mipsBytes) {
137
_dbg_assert_(compilerEnabled_);
138
139
frontend_.DoJit(em_address, instructions, mipsBytes);
140
_dbg_assert_(!instructions.empty());
141
142
int block_num = blocks_.AllocateBlock(em_address, mipsBytes, instructions);
143
if ((block_num & ~MIPS_EMUHACK_VALUE_MASK) != 0) {
144
WARN_LOG(Log::JIT, "Failed to allocate block for %08x (%d instructions)", em_address, (int)instructions.size());
145
// Out of block numbers. Caller will handle.
146
return false;
147
}
148
149
IRBlock *b = blocks_.GetBlock(block_num);
150
if (mipsTracer.tracing_enabled) {
151
// Hash, then only update page stats, don't link yet.
152
// TODO: Should we always hash? Then we can reuse blocks.
153
b->UpdateHash();
154
}
155
156
if (!CompileNativeBlock(&blocks_, block_num))
157
return false;
158
159
if (mipsTracer.tracing_enabled) {
160
mipsTracer.prepare_block(b, blocks_);
161
}
162
163
// Updates stats, also patches the first MIPS instruction into an emuhack if 'preload == false'
164
blocks_.FinalizeBlock(block_num);
165
FinalizeNativeBlock(&blocks_, block_num);
166
return true;
167
}
168
169
void IRJit::RunLoopUntil(u64 globalticks) {
170
PROFILE_THIS_SCOPE("jit");
171
172
// ApplyRoundingMode(true);
173
// IR Dispatcher
174
175
while (true) {
176
// RestoreRoundingMode(true);
177
CoreTiming::Advance();
178
// ApplyRoundingMode(true);
179
if (coreState != 0) {
180
break;
181
}
182
183
MIPSState *mips = mips_;
184
#ifdef _DEBUG
185
compilerEnabled_ = false;
186
#endif
187
while (mips->downcount >= 0) {
188
u32 inst = Memory::ReadUnchecked_U32(mips->pc);
189
u32 opcode = inst & 0xFF000000;
190
if (opcode == MIPS_EMUHACK_OPCODE) {
191
u32 offset = inst & 0x00FFFFFF; // Alternatively, inst - opcode
192
const IRInst *instPtr = blocks_.GetArenaPtr() + offset;
193
// First op is always, except when using breakpoints, downcount, to save one dispatch inside IRInterpret.
194
// This branch is very cpu-branch-predictor-friendly so this still beats the dispatch.
195
if (instPtr->op == IROp::Downcount) {
196
mips->downcount -= instPtr->constant;
197
instPtr++;
198
}
199
#ifdef IR_PROFILING
200
IRBlock *block = blocks_.GetBlock(blocks_.GetBlockNumFromIRArenaOffset(offset));
201
Instant start = Instant::Now();
202
mips->pc = IRInterpret(mips, instPtr);
203
int64_t elapsedNanos = start.ElapsedNanos();
204
block->profileStats_.executions += 1;
205
block->profileStats_.totalNanos += elapsedNanos;
206
#else
207
mips->pc = IRInterpret(mips, instPtr);
208
#endif
209
// Note: this will "jump to zero" on a badly constructed block missing exits.
210
if (!Memory::IsValid4AlignedAddress(mips->pc)) {
211
int blockNum = blocks_.GetBlockNumFromIRArenaOffset(offset);
212
IRBlock *block = blocks_.GetBlockUnchecked(blockNum);
213
Core_ExecException(mips->pc, block->GetOriginalStart(), ExecExceptionType::JUMP);
214
break;
215
}
216
} else {
217
// RestoreRoundingMode(true);
218
#ifdef _DEBUG
219
compilerEnabled_ = true;
220
#endif
221
Compile(mips->pc);
222
#ifdef _DEBUG
223
compilerEnabled_ = false;
224
#endif
225
// ApplyRoundingMode(true);
226
}
227
}
228
#ifdef _DEBUG
229
compilerEnabled_ = true;
230
#endif
231
}
232
233
// RestoreRoundingMode(true);
234
}
235
236
bool IRJit::DescribeCodePtr(const u8 *ptr, std::string &name) {
237
// Used in native disassembly viewer.
238
return false;
239
}
240
241
void IRJit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) {
242
Crash();
243
}
244
245
void IRJit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
246
Crash();
247
}
248
249
void IRBlockCache::Clear() {
250
for (int i = 0; i < (int)blocks_.size(); ++i) {
251
int cookie = compileToNative_ ? blocks_[i].GetNativeOffset() : blocks_[i].GetIRArenaOffset();
252
blocks_[i].Destroy(cookie);
253
}
254
blocks_.clear();
255
byPage_.clear();
256
arena_.clear();
257
arena_.shrink_to_fit();
258
}
259
260
IRBlockCache::IRBlockCache(bool compileToNative) : compileToNative_(compileToNative) {}
261
262
int IRBlockCache::AllocateBlock(int emAddr, u32 origSize, const std::vector<IRInst> &insts) {
263
// We have 24 bits to represent offsets with.
264
const u32 MAX_ARENA_SIZE = 0x1000000 - 1;
265
int offset = (int)arena_.size();
266
if (offset >= MAX_ARENA_SIZE) {
267
WARN_LOG(Log::JIT, "Filled JIT arena, restarting");
268
return -1;
269
}
270
// TODO: Use memcpy.
271
for (int i = 0; i < insts.size(); i++) {
272
arena_.push_back(insts[i]);
273
}
274
int newBlockIndex = (int)blocks_.size();
275
blocks_.push_back(IRBlock(emAddr, origSize, offset, (u32)insts.size()));
276
return newBlockIndex;
277
}
278
279
int IRBlockCache::GetBlockNumFromIRArenaOffset(int offset) const {
280
// Block offsets are always in rising order (we don't go back and replace them when invalidated). So we can binary search.
281
int low = 0;
282
int high = (int)blocks_.size() - 1;
283
int found = -1;
284
while (low <= high) {
285
int mid = low + (high - low) / 2;
286
const int blockOffset = blocks_[mid].GetIRArenaOffset();
287
if (blockOffset == offset) {
288
found = mid;
289
break;
290
}
291
if (blockOffset < offset) {
292
low = mid + 1;
293
} else {
294
high = mid - 1;
295
}
296
}
297
return found;
298
#if 1
299
return found;
300
#else
301
// Cross check the result. This is not fast so normally not enabled. Called a lot when IR_PROFILING is on.
302
for (int i = 0; i < (int)blocks_.size(); i++) {
303
if (blocks_[i].GetIRArenaOffset() == offset) {
304
_dbg_assert_(i == found);
305
return i;
306
}
307
}
308
_dbg_assert_(found == -1);
309
return -1;
310
#endif
311
}
312
313
std::vector<int> IRBlockCache::FindInvalidatedBlockNumbers(u32 address, u32 lengthInBytes) {
314
u32 startPage = AddressToPage(address);
315
u32 endPage = AddressToPage(address + lengthInBytes);
316
317
std::vector<int> found;
318
for (u32 page = startPage; page <= endPage; ++page) {
319
const auto iter = byPage_.find(page);
320
if (iter == byPage_.end())
321
continue;
322
323
const std::vector<int> &blocksInPage = iter->second;
324
for (int i : blocksInPage) {
325
if (blocks_[i].OverlapsRange(address, lengthInBytes)) {
326
// We now try to remove these during invalidation.
327
found.push_back(i);
328
}
329
}
330
}
331
332
return found;
333
}
334
335
void IRBlockCache::FinalizeBlock(int blockIndex) {
336
// TODO: What's different about preload blocks?
337
IRBlock &block = blocks_[blockIndex];
338
int cookie = compileToNative_ ? block.GetNativeOffset() : block.GetIRArenaOffset();
339
block.Finalize(cookie);
340
341
u32 startAddr, size;
342
block.GetRange(&startAddr, &size);
343
344
u32 startPage = AddressToPage(startAddr);
345
u32 endPage = AddressToPage(startAddr + size);
346
347
for (u32 page = startPage; page <= endPage; ++page) {
348
byPage_[page].push_back(blockIndex);
349
}
350
}
351
352
// Call after Destroy-ing it.
353
void IRBlockCache::RemoveBlockFromPageLookup(int blockIndex) {
354
// We need to remove the block from the byPage lookup.
355
IRBlock &block = blocks_[blockIndex];
356
357
u32 startAddr, size;
358
block.GetRange(&startAddr, &size);
359
360
u32 startPage = AddressToPage(startAddr);
361
u32 endPage = AddressToPage(startAddr + size);
362
363
for (u32 page = startPage; page <= endPage; ++page) {
364
auto iter = std::find(byPage_[page].begin(), byPage_[page].end(), blockIndex);
365
if (iter != byPage_[page].end()) {
366
byPage_[page].erase(iter);
367
} else if (block.IsValid()) {
368
// If it was previously invalidated, we don't care, hence the above check.
369
WARN_LOG(Log::JIT, "RemoveBlock: Block at %08x was not found where expected in byPage table.", startAddr);
370
}
371
}
372
373
// Additionally, we'd like to zap the block in the IR arena.
374
// However, this breaks if calling sceKernelIcacheClearAll(), since as soon as we return, we'll be executing garbage.
375
/*
376
IRInst bad{ IROp::Bad };
377
for (int off = block.GetIRArenaOffset(); off < (int)(block.GetIRArenaOffset() + block.GetNumIRInstructions()); off++) {
378
arena_[off] = bad;
379
}
380
*/
381
}
382
383
u32 IRBlockCache::AddressToPage(u32 addr) const {
384
// Use relatively small pages since basic blocks are typically small.
385
return (addr & 0x3FFFFFFF) >> 10;
386
}
387
388
int IRBlockCache::FindPreloadBlock(u32 em_address) {
389
u32 page = AddressToPage(em_address);
390
auto iter = byPage_.find(page);
391
if (iter == byPage_.end())
392
return -1;
393
394
const std::vector<int> &blocksInPage = iter->second;
395
for (int i : blocksInPage) {
396
if (blocks_[i].GetOriginalStart() == em_address) {
397
if (blocks_[i].HashMatches()) {
398
return i;
399
}
400
}
401
}
402
403
return -1;
404
}
405
406
int IRBlockCache::FindByCookie(int cookie) {
407
if (blocks_.empty())
408
return -1;
409
410
// TODO: Maybe a flag to determine native offset mode?
411
if (!compileToNative_) {
412
return GetBlockNumFromIRArenaOffset(cookie);
413
}
414
415
// TODO: This could also use a binary search.
416
for (int i = 0; i < GetNumBlocks(); ++i) {
417
int offset = blocks_[i].GetNativeOffset();
418
if (offset == cookie)
419
return i;
420
}
421
return -1;
422
}
423
424
std::vector<u32> IRBlockCache::SaveAndClearEmuHackOps() {
425
std::vector<u32> result;
426
result.resize(blocks_.size());
427
428
for (int number = 0; number < (int)blocks_.size(); ++number) {
429
IRBlock &b = blocks_[number];
430
int cookie = compileToNative_ ? b.GetNativeOffset() : b.GetIRArenaOffset();
431
if (b.IsValid() && b.RestoreOriginalFirstOp(cookie)) {
432
result[number] = number;
433
} else {
434
result[number] = 0;
435
}
436
}
437
438
return result;
439
}
440
441
void IRBlockCache::RestoreSavedEmuHackOps(const std::vector<u32> &saved) {
442
if ((int)blocks_.size() != (int)saved.size()) {
443
ERROR_LOG(Log::JIT, "RestoreSavedEmuHackOps: Wrong saved block size.");
444
return;
445
}
446
447
for (int number = 0; number < (int)blocks_.size(); ++number) {
448
IRBlock &b = blocks_[number];
449
// Only if we restored it, write it back.
450
if (b.IsValid() && saved[number] != 0 && b.HasOriginalFirstOp()) {
451
int cookie = compileToNative_ ? b.GetNativeOffset() : b.GetIRArenaOffset();
452
b.Finalize(cookie);
453
}
454
}
455
}
456
457
JitBlockDebugInfo IRBlockCache::GetBlockDebugInfo(int blockNum) const {
458
const IRBlock &ir = blocks_[blockNum];
459
JitBlockDebugInfo debugInfo{};
460
uint32_t start, size;
461
ir.GetRange(&start, &size);
462
debugInfo.originalAddress = start; // TODO
463
if (!Memory::IsValid4AlignedAddress(start)) {
464
return debugInfo;
465
}
466
467
debugInfo.origDisasm.reserve(((start + size) - start) / 4);
468
for (u32 addr = start; addr < start + size; addr += 4) {
469
char temp[256];
470
MIPSDisAsm(Memory::Read_Instruction(addr), addr, temp, sizeof(temp), true);
471
std::string mipsDis = temp;
472
debugInfo.origDisasm.push_back(mipsDis);
473
}
474
475
debugInfo.irDisasm.reserve(ir.GetNumIRInstructions());
476
const IRInst *instructions = GetBlockInstructionPtr(ir);
477
for (int i = 0; i < ir.GetNumIRInstructions(); i++) {
478
IRInst inst = instructions[i];
479
char buffer[256];
480
DisassembleIR(buffer, sizeof(buffer), inst);
481
debugInfo.irDisasm.push_back(buffer);
482
}
483
return debugInfo;
484
}
485
486
void IRBlockCache::ComputeStats(BlockCacheStats &bcStats) const {
487
double totalBloat = 0.0;
488
double maxBloat = 0.0;
489
double minBloat = 1000000000.0;
490
for (const auto &b : blocks_) {
491
double codeSize = (double)b.GetNumIRInstructions() * 4; // We count bloat in instructions, not bytes. sizeof(IRInst);
492
if (codeSize == 0)
493
continue;
494
u32 origAddr, mipsBytes;
495
b.GetRange(&origAddr, &mipsBytes);
496
double origSize = (double)mipsBytes;
497
double bloat = codeSize / origSize;
498
if (bloat < minBloat) {
499
minBloat = bloat;
500
bcStats.minBloatBlock = origAddr;
501
}
502
if (bloat > maxBloat) {
503
maxBloat = bloat;
504
bcStats.maxBloatBlock = origAddr;
505
}
506
totalBloat += bloat;
507
}
508
bcStats.numBlocks = (int)blocks_.size();
509
bcStats.minBloat = minBloat;
510
bcStats.maxBloat = maxBloat;
511
bcStats.avgBloat = totalBloat / (double)blocks_.size();
512
}
513
514
int IRBlockCache::GetBlockNumberFromStartAddress(u32 em_address, bool realBlocksOnly) const {
515
u32 page = AddressToPage(em_address);
516
517
const auto iter = byPage_.find(page);
518
if (iter == byPage_.end())
519
return -1;
520
521
const std::vector<int> &blocksInPage = iter->second;
522
int best = -1;
523
for (int i : blocksInPage) {
524
if (blocks_[i].GetOriginalStart() == em_address) {
525
best = i;
526
if (blocks_[i].IsValid()) {
527
return i;
528
}
529
}
530
}
531
return best;
532
}
533
534
bool IRBlock::HasOriginalFirstOp() const {
535
return Memory::ReadUnchecked_U32(origAddr_) == origFirstOpcode_.encoding;
536
}
537
538
bool IRBlock::RestoreOriginalFirstOp(int cookie) {
539
const u32 emuhack = MIPS_EMUHACK_OPCODE | cookie;
540
if (Memory::ReadUnchecked_U32(origAddr_) == emuhack) {
541
Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_);
542
return true;
543
}
544
return false;
545
}
546
547
void IRBlock::Finalize(int cookie) {
548
// Check it wasn't invalidated, in case this is after preload.
549
// TODO: Allow reusing blocks when the code matches hash_ again, instead.
550
if (origAddr_) {
551
origFirstOpcode_ = Memory::Read_Opcode_JIT(origAddr_);
552
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie);
553
Memory::Write_Opcode_JIT(origAddr_, opcode);
554
} else {
555
WARN_LOG(Log::JIT, "Finalizing invalid block (cookie: %d)", cookie);
556
}
557
}
558
559
void IRBlock::Destroy(int cookie) {
560
if (origAddr_) {
561
MIPSOpcode opcode = MIPSOpcode(MIPS_EMUHACK_OPCODE | cookie);
562
u32 memOp = Memory::ReadUnchecked_U32(origAddr_);
563
if (memOp == opcode.encoding) {
564
Memory::Write_Opcode_JIT(origAddr_, origFirstOpcode_);
565
} else {
566
// NOTE: This is not an error. Just interesting to log.
567
DEBUG_LOG(Log::JIT, "IRBlock::Destroy: Note: Block at %08x was overwritten - checked for %08x, got %08x when restoring the MIPS op to %08x", origAddr_, opcode.encoding, memOp, origFirstOpcode_.encoding);
568
}
569
// TODO: Also wipe the block in the IR opcode arena.
570
// Let's mark this invalid so we don't try to clear it again.
571
origAddr_ = 0;
572
}
573
}
574
575
u64 IRBlock::CalculateHash() const {
576
if (origAddr_) {
577
// This is unfortunate. In case there are emuhacks, we have to make a copy.
578
// If we could hash while reading we could avoid this.
579
std::vector<u32> buffer;
580
buffer.resize(origSize_ / 4);
581
size_t pos = 0;
582
for (u32 off = 0; off < origSize_; off += 4) {
583
// Let's actually hash the replacement, if any.
584
MIPSOpcode instr = Memory::ReadUnchecked_Instruction(origAddr_ + off, false);
585
buffer[pos++] = instr.encoding;
586
}
587
return XXH3_64bits(&buffer[0], origSize_);
588
}
589
return 0;
590
}
591
592
bool IRBlock::OverlapsRange(u32 addr, u32 size) const {
593
addr &= 0x3FFFFFFF;
594
u32 origAddr = origAddr_ & 0x3FFFFFFF;
595
return addr + size > origAddr && addr < origAddr + origSize_;
596
}
597
598
MIPSOpcode IRJit::GetOriginalOp(MIPSOpcode op) {
599
IRBlock *b = blocks_.GetBlock(blocks_.FindByCookie(op.encoding & 0xFFFFFF));
600
if (b) {
601
return b->GetOriginalFirstOp();
602
}
603
return op;
604
}
605
606
} // namespace MIPSComp
607
608