CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/ARM64/Arm64Jit.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
20
#if PPSSPP_ARCH(ARM64)
21
22
#include "Common/Profiler/Profiler.h"
23
#include "Common/Log.h"
24
#include "Common/Serialize/Serializer.h"
25
#include "Common/Serialize/SerializeFuncs.h"
26
#include "Common/CPUDetect.h"
27
#include "Common/StringUtils.h"
28
29
#include "Core/Reporting.h"
30
#include "Core/Config.h"
31
#include "Core/Core.h"
32
#include "Core/CoreTiming.h"
33
#include "Core/Debugger/Breakpoints.h"
34
#include "Core/Debugger/SymbolMap.h"
35
#include "Core/MemMap.h"
36
37
#include "Core/MIPS/MIPS.h"
38
#include "Core/MIPS/MIPSAnalyst.h"
39
#include "Core/MIPS/MIPSCodeUtils.h"
40
#include "Core/MIPS/MIPSInt.h"
41
#include "Core/MIPS/MIPSTables.h"
42
#include "Core/HLE/ReplaceTables.h"
43
#include "Core/MIPS/ARM64/Arm64RegCache.h"
44
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
45
46
#include "Core/MIPS/ARM64/Arm64Jit.h"
47
#include "Core/MIPS/JitCommon/JitCommon.h"
48
49
using namespace Arm64JitConstants;
50
51
static void DisassembleArm64Print(const u8 *data, int size) {
52
std::vector<std::string> lines = DisassembleArm64(data, size);
53
for (auto s : lines) {
54
INFO_LOG(Log::JIT, "%s", s.c_str());
55
}
56
/*
57
INFO_LOG(Log::JIT, "+++");
58
// A format friendly to Online Disassembler which gets endianness wrong
59
for (size_t i = 0; i < lines.size(); i++) {
60
uint32_t opcode = ((const uint32_t *)data)[i];
61
INFO_LOG(Log::JIT, "%d/%d: %08x", (int)(i+1), (int)lines.size(), swap32(opcode));
62
}
63
INFO_LOG(Log::JIT, "===");
64
INFO_LOG(Log::JIT, "===");*/
65
}
66
67
static u32 JitBreakpoint(uint32_t addr) {
68
// Should we skip this breakpoint?
69
if (CBreakPoints::CheckSkipFirst() == currentMIPS->pc || CBreakPoints::CheckSkipFirst() == addr)
70
return 0;
71
72
BreakAction result = CBreakPoints::ExecBreakPoint(addr);
73
if ((result & BREAK_ACTION_PAUSE) == 0)
74
return 0;
75
76
return 1;
77
}
78
79
static u32 JitMemCheck(u32 pc) {
80
if (CBreakPoints::CheckSkipFirst() == currentMIPS->pc)
81
return 0;
82
83
// Note: pc may be the delay slot.
84
const auto op = Memory::Read_Instruction(pc, true);
85
s32 offset = SignExtend16ToS32(op & 0xFFFF);
86
if (MIPSGetInfo(op) & IS_VFPU)
87
offset &= 0xFFFC;
88
u32 addr = currentMIPS->r[MIPS_GET_RS(op)] + offset;
89
90
CBreakPoints::ExecOpMemCheck(addr, pc);
91
return coreState == CORE_RUNNING || coreState == CORE_NEXTFRAME ? 0 : 1;
92
}
93
94
namespace MIPSComp
95
{
96
using namespace Arm64Gen;
97
using namespace Arm64JitConstants;
98
99
Arm64Jit::Arm64Jit(MIPSState *mipsState) : blocks(mipsState, this), gpr(mipsState, &js, &jo), fpr(mipsState, &js, &jo), mips_(mipsState), fp(this) {
100
// Automatically disable incompatible options.
101
if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) {
102
jo.enablePointerify = false;
103
}
104
105
#ifdef MASKED_PSP_MEMORY
106
jo.enablePointerify = false;
107
#endif
108
109
logBlocks = 0;
110
dontLogBlocks = 0;
111
blocks.Init();
112
gpr.SetEmitter(this);
113
fpr.SetEmitter(this, &fp);
114
AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.
115
GenerateFixedCode(jo);
116
js.startDefaultPrefix = mips_->HasDefaultPrefix();
117
js.currentRoundingFunc = convertS0ToSCRATCH1[mips_->fcr31 & 3];
118
119
// The debugger sets this so that "go" on a breakpoint will actually... go.
120
// But if they reset, we can end up hitting it by mistake, since it's based on PC and ticks.
121
CBreakPoints::SetSkipFirst(0);
122
}
123
124
Arm64Jit::~Arm64Jit() {
125
}
126
127
void Arm64Jit::DoState(PointerWrap &p) {
128
auto s = p.Section("Jit", 1, 2);
129
if (!s)
130
return;
131
132
Do(p, js.startDefaultPrefix);
133
if (p.mode == PointerWrap::MODE_READ && !js.startDefaultPrefix) {
134
WARN_LOG(Log::CPU, "Jit: An uneaten prefix was previously detected. Jitting in unknown-prefix mode.");
135
}
136
if (s >= 2) {
137
Do(p, js.hasSetRounding);
138
if (p.mode == PointerWrap::MODE_READ) {
139
js.lastSetRounding = 0;
140
}
141
} else {
142
js.hasSetRounding = 1;
143
}
144
145
// Note: we can't update the currentRoundingFunc here because fcr31 wasn't loaded yet.
146
147
// The debugger sets this so that "go" on a breakpoint will actually... go.
148
// But if they reset, we can end up hitting it by mistake, since it's based on PC and ticks.
149
CBreakPoints::SetSkipFirst(0);
150
}
151
152
void Arm64Jit::UpdateFCR31() {
153
js.currentRoundingFunc = convertS0ToSCRATCH1[mips_->fcr31 & 3];
154
}
155
156
void Arm64Jit::FlushAll() {
157
gpr.FlushAll();
158
fpr.FlushAll();
159
FlushPrefixV();
160
}
161
162
void Arm64Jit::FlushPrefixV() {
163
if (js.startDefaultPrefix && !js.blockWrotePrefixes && js.HasNoPrefix()) {
164
// They started default, we never modified in memory, and they're default now.
165
// No reason to modify memory. This is common at end of blocks. Just clear dirty.
166
js.prefixSFlag = (JitState::PrefixState)(js.prefixSFlag & ~JitState::PREFIX_DIRTY);
167
js.prefixTFlag = (JitState::PrefixState)(js.prefixTFlag & ~JitState::PREFIX_DIRTY);
168
js.prefixDFlag = (JitState::PrefixState)(js.prefixDFlag & ~JitState::PREFIX_DIRTY);
169
return;
170
}
171
172
if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {
173
gpr.SetRegImm(SCRATCH1, js.prefixS);
174
STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));
175
js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);
176
}
177
178
if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) {
179
gpr.SetRegImm(SCRATCH1, js.prefixT);
180
STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));
181
js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);
182
}
183
184
if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) {
185
gpr.SetRegImm(SCRATCH1, js.prefixD);
186
STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));
187
js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);
188
}
189
190
// If we got here, we must've written prefixes to memory in this block.
191
js.blockWrotePrefixes = true;
192
}
193
194
void Arm64Jit::ClearCache() {
195
INFO_LOG(Log::JIT, "ARM64Jit: Clearing the cache!");
196
blocks.Clear();
197
ClearCodeSpace(jitStartOffset);
198
FlushIcacheSection(region + jitStartOffset, region + region_size - jitStartOffset);
199
}
200
201
void Arm64Jit::InvalidateCacheAt(u32 em_address, int length) {
202
if (blocks.RangeMayHaveEmuHacks(em_address, em_address + length)) {
203
blocks.InvalidateICache(em_address, length);
204
}
205
}
206
207
void Arm64Jit::EatInstruction(MIPSOpcode op) {
208
MIPSInfo info = MIPSGetInfo(op);
209
if (info & DELAYSLOT) {
210
ERROR_LOG_REPORT_ONCE(ateDelaySlot, Log::JIT, "Ate a branch op.");
211
}
212
if (js.inDelaySlot) {
213
ERROR_LOG_REPORT_ONCE(ateInDelaySlot, Log::JIT, "Ate an instruction inside a delay slot.");
214
}
215
216
CheckJitBreakpoint(GetCompilerPC() + 4, 0);
217
js.numInstructions++;
218
js.compilerPC += 4;
219
js.downcountAmount += MIPSGetInstructionCycleEstimate(op);
220
}
221
222
void Arm64Jit::CompileDelaySlot(int flags) {
223
// Need to offset the downcount which was already incremented for the branch + delay slot.
224
CheckJitBreakpoint(GetCompilerPC() + 4, -2);
225
226
// preserve flag around the delay slot! Maybe this is not always necessary on ARM where
227
// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the
228
// delay slot, we're screwed.
229
if (flags & DELAYSLOT_SAFE)
230
MRS(FLAGTEMPREG, FIELD_NZCV); // Save flags register. FLAGTEMPREG is preserved through function calls and is not allocated.
231
232
js.inDelaySlot = true;
233
MIPSOpcode op = GetOffsetInstruction(1);
234
MIPSCompileOp(op, this);
235
js.inDelaySlot = false;
236
237
if (flags & DELAYSLOT_FLUSH)
238
FlushAll();
239
if (flags & DELAYSLOT_SAFE)
240
_MSR(FIELD_NZCV, FLAGTEMPREG); // Restore flags register
241
}
242
243
244
void Arm64Jit::Compile(u32 em_address) {
245
PROFILE_THIS_SCOPE("jitc");
246
if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {
247
INFO_LOG(Log::JIT, "Space left: %d", (int)GetSpaceLeft());
248
ClearCache();
249
}
250
251
BeginWrite(JitBlockCache::MAX_BLOCK_INSTRUCTIONS * 16);
252
253
int block_num = blocks.AllocateBlock(em_address);
254
JitBlock *b = blocks.GetBlock(block_num);
255
DoJit(em_address, b);
256
_assert_msg_(b->originalAddress == em_address, "original %08x != em_address %08x (block %d)", b->originalAddress, em_address, b->blockNum);
257
blocks.FinalizeBlock(block_num, jo.enableBlocklink);
258
EndWrite();
259
260
// Don't forget to zap the newly written instructions in the instruction cache!
261
FlushIcache();
262
263
bool cleanSlate = false;
264
265
if (js.hasSetRounding && !js.lastSetRounding) {
266
WARN_LOG(Log::JIT, "Detected rounding mode usage, rebuilding jit with checks");
267
// Won't loop, since hasSetRounding is only ever set to 1.
268
js.lastSetRounding = js.hasSetRounding;
269
cleanSlate = true;
270
}
271
272
// Drat. The VFPU hit an uneaten prefix at the end of a block.
273
if (js.startDefaultPrefix && js.MayHavePrefix()) {
274
WARN_LOG_REPORT(Log::JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);
275
js.LogPrefix();
276
277
// Let's try that one more time. We won't get back here because we toggled the value.
278
js.startDefaultPrefix = false;
279
// TODO ARM64: This crashes.
280
//cleanSlate = true;
281
}
282
283
if (cleanSlate) {
284
// Our assumptions are all wrong so it's clean-slate time.
285
ClearCache();
286
Compile(em_address);
287
}
288
}
289
290
void Arm64Jit::RunLoopUntil(u64 globalticks) {
291
PROFILE_THIS_SCOPE("jit");
292
((void (*)())enterDispatcher)();
293
}
294
295
u32 Arm64Jit::GetCompilerPC() {
296
return js.compilerPC;
297
}
298
299
MIPSOpcode Arm64Jit::GetOffsetInstruction(int offset) {
300
return Memory::Read_Instruction(GetCompilerPC() + 4 * offset);
301
}
302
303
const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) {
304
js.cancel = false;
305
js.blockStart = em_address;
306
js.compilerPC = em_address;
307
js.lastContinuedPC = 0;
308
js.initialBlockSize = 0;
309
js.nextExit = 0;
310
js.downcountAmount = 0;
311
js.curBlock = b;
312
js.compiling = true;
313
js.inDelaySlot = false;
314
js.blockWrotePrefixes = false;
315
js.PrefixStart();
316
317
// We add a downcount flag check before the block, used when entering from a linked block.
318
// The last block decremented downcounter, and the flag should still be available.
319
// Got three variants here of where we position the code, needs detailed benchmarking.
320
321
FixupBranch bail;
322
if (jo.useBackJump) {
323
// Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there.
324
// Speedup seems to be zero unfortunately but I guess it may vary from device to device.
325
// Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to
326
// large/slow construction of 32-bit immediates?
327
const u8 *backJump = GetCodePtr();
328
MOVI2R(SCRATCH1, js.blockStart);
329
B((const void *)outerLoopPCInSCRATCH1);
330
b->checkedEntry = GetCodePtr();
331
B(CC_LT, backJump);
332
} else if (jo.useForwardJump) {
333
b->checkedEntry = GetCodePtr();
334
bail = B(CC_LT);
335
} else if (jo.enableBlocklink) {
336
b->checkedEntry = GetCodePtr();
337
MOVI2R(SCRATCH1, js.blockStart);
338
FixupBranch skip = B(CC_GE);
339
B((const void *)outerLoopPCInSCRATCH1);
340
SetJumpTarget(skip);
341
} else {
342
// No block linking, no need to add headers to blocks.
343
}
344
345
b->normalEntry = GetCodePtr();
346
// TODO: this needs work
347
MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);
348
349
gpr.Start(analysis);
350
fpr.Start(analysis);
351
352
js.numInstructions = 0;
353
while (js.compiling) {
354
gpr.SetCompilerPC(GetCompilerPC()); // Let it know for log messages
355
// Jit breakpoints are quite fast, so let's do them in release too.
356
CheckJitBreakpoint(GetCompilerPC(), 0);
357
358
MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC());
359
js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);
360
361
MIPSCompileOp(inst, this);
362
363
js.compilerPC += 4;
364
js.numInstructions++;
365
366
if (jo.Disabled(JitDisable::REGALLOC_GPR)) {
367
gpr.FlushAll();
368
}
369
if (jo.Disabled(JitDisable::REGALLOC_FPR)) {
370
fpr.FlushAll();
371
FlushPrefixV();
372
}
373
374
// Safety check, in case we get a bunch of really large jit ops without a lot of branching.
375
if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS) {
376
FlushAll();
377
WriteExit(GetCompilerPC(), js.nextExit++);
378
js.compiling = false;
379
}
380
}
381
382
if (jo.useForwardJump) {
383
SetJumpTarget(bail);
384
gpr.SetRegImm(SCRATCH1, js.blockStart);
385
B((const void *)outerLoopPCInSCRATCH1);
386
}
387
388
char temp[256];
389
if (logBlocks > 0 && dontLogBlocks == 0) {
390
INFO_LOG(Log::JIT, "=============== mips %d ===============", blocks.GetNumBlocks());
391
for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) {
392
MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, sizeof(temp), true);
393
INFO_LOG(Log::JIT, "M: %08x %s", cpc, temp);
394
}
395
}
396
397
b->codeSize = GetCodePtr() - b->normalEntry;
398
if (logBlocks > 0 && dontLogBlocks == 0) {
399
INFO_LOG(Log::JIT, "=============== ARM (%d instructions -> %d bytes) ===============", js.numInstructions, b->codeSize);
400
DisassembleArm64Print(b->normalEntry, GetCodePtr() - b->normalEntry);
401
}
402
if (logBlocks > 0)
403
logBlocks--;
404
if (dontLogBlocks > 0)
405
dontLogBlocks--;
406
407
if (js.lastContinuedPC == 0) {
408
b->originalSize = js.numInstructions;
409
} else {
410
// We continued at least once. Add the last proxy and set the originalSize correctly.
411
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
412
b->originalSize = js.initialBlockSize;
413
}
414
415
return b->normalEntry;
416
}
417
418
void Arm64Jit::AddContinuedBlock(u32 dest) {
419
// The first block is the root block. When we continue, we create proxy blocks after that.
420
if (js.lastContinuedPC == 0)
421
js.initialBlockSize = js.numInstructions;
422
else
423
blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());
424
js.lastContinuedPC = dest;
425
}
426
427
bool Arm64Jit::DescribeCodePtr(const u8 *ptr, std::string &name) {
428
// Used in disassembly viewer.
429
if (ptr == applyRoundingMode)
430
name = "applyRoundingMode";
431
else if (ptr == updateRoundingMode)
432
name = "updateRoundingMode";
433
else if (ptr == dispatcher)
434
name = "dispatcher";
435
else if (ptr == dispatcherPCInSCRATCH1)
436
name = "dispatcher (PC in SCRATCH1)";
437
else if (ptr == dispatcherNoCheck)
438
name = "dispatcherNoCheck";
439
else if (ptr == enterDispatcher)
440
name = "enterDispatcher";
441
else if (ptr == restoreRoundingMode)
442
name = "restoreRoundingMode";
443
else if (ptr == saveStaticRegisters)
444
name = "saveStaticRegisters";
445
else if (ptr == loadStaticRegisters)
446
name = "loadStaticRegisters";
447
else {
448
u32 addr = blocks.GetAddressFromBlockPtr(ptr);
449
// Returns 0 when it's valid, but unknown.
450
if (addr == 0) {
451
name = "(unknown or deleted block)";
452
return true;
453
} else if (addr != (u32)-1) {
454
name = "(outside space)";
455
return true;
456
}
457
458
int number = blocks.GetBlockNumberFromAddress(addr);
459
if (number != -1) {
460
const JitBlock *block = blocks.GetBlock(number);
461
if (block) {
462
name = StringFromFormat("(block %d at %08x)", number, block->originalAddress);
463
return true;
464
}
465
}
466
return false;
467
}
468
return true;
469
}
470
471
void Arm64Jit::Comp_RunBlock(MIPSOpcode op) {
472
// This shouldn't be necessary, the dispatcher should catch us before we get here.
473
ERROR_LOG(Log::JIT, "Comp_RunBlock should never be reached!");
474
}
475
476
void Arm64Jit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) {
477
if (PlatformIsWXExclusive()) {
478
ProtectMemoryPages(exitPoint, 32, MEM_PROT_READ | MEM_PROT_WRITE);
479
}
480
ARM64XEmitter emit(GetCodePtrFromWritablePtr(exitPoint), exitPoint);
481
emit.B(checkedEntry);
482
// TODO: Write stuff after, convering up the now-unused instructions.
483
emit.FlushIcache();
484
if (PlatformIsWXExclusive()) {
485
ProtectMemoryPages(exitPoint, 32, MEM_PROT_READ | MEM_PROT_EXEC);
486
}
487
}
488
489
void Arm64Jit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {
490
// Send anyone who tries to run this block back to the dispatcher.
491
// Not entirely ideal, but .. works.
492
// Spurious entrances from previously linked blocks can only come through checkedEntry
493
if (PlatformIsWXExclusive()) {
494
ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_WRITE);
495
}
496
497
ARM64XEmitter emit(GetCodePtrFromWritablePtr(checkedEntry), checkedEntry);
498
emit.MOVI2R(SCRATCH1, originalAddress);
499
emit.STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, pc));
500
emit.B(MIPSComp::jit->GetDispatcher());
501
emit.FlushIcache();
502
503
if (PlatformIsWXExclusive()) {
504
ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_EXEC);
505
}
506
}
507
508
bool Arm64Jit::ReplaceJalTo(u32 dest) {
509
#if PPSSPP_ARCH(ARM64)
510
const ReplacementTableEntry *entry = nullptr;
511
u32 funcSize = 0;
512
if (!CanReplaceJalTo(dest, &entry, &funcSize)) {
513
return false;
514
}
515
516
// Warning - this might be bad if the code at the destination changes...
517
if (entry->flags & REPFLAG_ALLOWINLINE) {
518
// Jackpot! Just do it, no flushing. The code will be entirely inlined.
519
// First, compile the delay slot. It's unconditional so no issues.
520
CompileDelaySlot(DELAYSLOT_NICE);
521
// Technically, we should write the unused return address to RA, but meh.
522
MIPSReplaceFunc repl = entry->jitReplaceFunc;
523
int cycles = (this->*repl)();
524
js.downcountAmount += cycles;
525
} else {
526
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
527
CompileDelaySlot(DELAYSLOT_NICE);
528
FlushAll();
529
SaveStaticRegisters();
530
RestoreRoundingMode();
531
QuickCallFunction(SCRATCH1_64, (const void *)(entry->replaceFunc));
532
ApplyRoundingMode();
533
LoadStaticRegisters();
534
WriteDownCountR(W0); // W0 is the return value from entry->replaceFunc. Neither LoadStaticRegisters nor ApplyRoundingMode can trash it.
535
}
536
537
js.compilerPC += 4;
538
// No writing exits, keep going!
539
540
if (CBreakPoints::HasMemChecks()) {
541
// We could modify coreState, so we need to write PC and check.
542
// Otherwise, PC may end up on the jal. We add 4 to skip the delay slot.
543
FlushAll();
544
WriteExit(GetCompilerPC() + 4, js.nextExit++);
545
js.compiling = false;
546
}
547
548
// Add a trigger so that if the inlined code changes, we invalidate this block.
549
blocks.ProxyBlock(js.blockStart, dest, funcSize / sizeof(u32), GetCodePtr());
550
#endif
551
return true;
552
}
553
554
void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)
555
{
556
// We get here if we execute the first instruction of a replaced function. This means
557
// that we do need to return to RA.
558
559
// Inlined function calls (caught in jal) are handled differently.
560
561
int index = op.encoding & MIPS_EMUHACK_VALUE_MASK;
562
563
const ReplacementTableEntry *entry = GetReplacementFunc(index);
564
if (!entry) {
565
ERROR_LOG_REPORT_ONCE(replFunc, Log::HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC);
566
// TODO: What should we do here? We're way off in the weeds probably.
567
return;
568
}
569
570
u32 funcSize = g_symbolMap->GetFunctionSize(GetCompilerPC());
571
bool disabled = (entry->flags & REPFLAG_DISABLED) != 0;
572
if (!disabled && funcSize != SymbolMap::INVALID_ADDRESS && funcSize > sizeof(u32)) {
573
// We don't need to disable hooks, the code will still run.
574
if ((entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) == 0) {
575
// Any breakpoint at the func entry was already tripped, so we can still run the replacement.
576
// That's a common case - just to see how often the replacement hits.
577
disabled = CBreakPoints::RangeContainsBreakPoint(GetCompilerPC() + sizeof(u32), funcSize - sizeof(u32));
578
}
579
}
580
581
if (disabled) {
582
MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
583
} else if (entry->jitReplaceFunc) {
584
MIPSReplaceFunc repl = entry->jitReplaceFunc;
585
int cycles = (this->*repl)();
586
587
if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
588
// Compile the original instruction at this address. We ignore cycles for hooks.
589
MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
590
} else {
591
FlushAll();
592
// Flushed, so R1 is safe.
593
LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, MIPS_REG_RA * 4);
594
js.downcountAmount += cycles;
595
WriteExitDestInR(SCRATCH1);
596
js.compiling = false;
597
}
598
} else if (entry->replaceFunc) {
599
FlushAll();
600
SaveStaticRegisters();
601
RestoreRoundingMode();
602
gpr.SetRegImm(SCRATCH1, GetCompilerPC());
603
MovToPC(SCRATCH1);
604
605
// Standard function call, nothing fancy.
606
// The function returns the number of cycles it took in EAX.
607
QuickCallFunction(SCRATCH1_64, (const void *)(entry->replaceFunc));
608
609
if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {
610
// Compile the original instruction at this address. We ignore cycles for hooks.
611
ApplyRoundingMode();
612
LoadStaticRegisters();
613
MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);
614
} else {
615
ApplyRoundingMode();
616
LoadStaticRegisters();
617
618
CMPI2R(W0, 0);
619
FixupBranch positive = B(CC_GE);
620
621
NEG(W0, W0);
622
MovFromPC(W1);
623
FixupBranch done = B();
624
625
SetJumpTarget(positive);
626
LDR(INDEX_UNSIGNED, W1, CTXREG, MIPS_REG_RA * 4);
627
628
SetJumpTarget(done);
629
WriteDownCountR(W0);
630
WriteExitDestInR(W1);
631
js.compiling = false;
632
}
633
} else {
634
ERROR_LOG(Log::HLE, "Replacement function %s has neither jit nor regular impl", entry->name);
635
}
636
}
637
638
void Arm64Jit::Comp_Generic(MIPSOpcode op) {
639
FlushAll();
640
MIPSInterpretFunc func = MIPSGetInterpretFunc(op);
641
if (func) {
642
SaveStaticRegisters();
643
// TODO: Perhaps keep the rounding mode for interp? Should probably, right?
644
RestoreRoundingMode();
645
MOVI2R(SCRATCH1, GetCompilerPC());
646
MovToPC(SCRATCH1);
647
MOVI2R(W0, op.encoding);
648
QuickCallFunction(SCRATCH2_64, (void *)func);
649
ApplyRoundingMode();
650
LoadStaticRegisters();
651
}
652
653
const MIPSInfo info = MIPSGetInfo(op);
654
if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0) {
655
// If it does eat them, it'll happen in MIPSCompileOp().
656
if ((info & OUT_EAT_PREFIX) == 0)
657
js.PrefixUnknown();
658
659
// Even if DISABLE'd, we want to set this flag so we overwrite.
660
if ((info & OUT_VFPU_PREFIX) != 0)
661
js.blockWrotePrefixes = true;
662
}
663
}
664
665
void Arm64Jit::MovFromPC(ARM64Reg r) {
666
LDR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
667
}
668
669
void Arm64Jit::MovToPC(ARM64Reg r) {
670
STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));
671
}
672
673
// Should not really be necessary except when entering Advance
674
void Arm64Jit::SaveStaticRegisters() {
675
if (jo.useStaticAlloc) {
676
QuickCallFunction(SCRATCH2_64, saveStaticRegisters);
677
} else {
678
// Inline the single operation
679
STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
680
}
681
}
682
683
void Arm64Jit::LoadStaticRegisters() {
684
if (jo.useStaticAlloc) {
685
QuickCallFunction(SCRATCH2_64, loadStaticRegisters);
686
} else {
687
LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));
688
}
689
}
690
691
void Arm64Jit::WriteDownCount(int offset, bool updateFlags) {
692
int theDowncount = js.downcountAmount + offset;
693
if (updateFlags) {
694
SUBSI2R(DOWNCOUNTREG, DOWNCOUNTREG, theDowncount, SCRATCH1);
695
} else {
696
SUBI2R(DOWNCOUNTREG, DOWNCOUNTREG, theDowncount, SCRATCH1);
697
}
698
}
699
700
void Arm64Jit::WriteDownCountR(ARM64Reg reg, bool updateFlags) {
701
if (updateFlags) {
702
SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);
703
} else {
704
SUB(DOWNCOUNTREG, DOWNCOUNTREG, reg);
705
}
706
}
707
708
// Destroys SCRATCH2
709
void Arm64Jit::RestoreRoundingMode(bool force) {
710
// If the game has never set an interesting rounding mode, we can safely skip this.
711
if (force || js.hasSetRounding) {
712
QuickCallFunction(SCRATCH2_64, restoreRoundingMode);
713
}
714
}
715
716
// Destroys SCRATCH1 and SCRATCH2
717
void Arm64Jit::ApplyRoundingMode(bool force) {
718
// If the game has never set an interesting rounding mode, we can safely skip this.
719
if (force || js.hasSetRounding) {
720
QuickCallFunction(SCRATCH2_64, applyRoundingMode);
721
}
722
}
723
724
// Destroys SCRATCH1 and SCRATCH2
725
void Arm64Jit::UpdateRoundingMode(u32 fcr31) {
726
// We must set js.hasSetRounding at compile time, or this block will use the wrong rounding mode.
727
// The fcr31 parameter is -1 when not known at compile time, so we just assume it was changed.
728
if (fcr31 & 0x01000003) {
729
js.hasSetRounding = true;
730
}
731
QuickCallFunction(SCRATCH2_64, updateRoundingMode);
732
}
733
734
// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,
735
// and just have conditional that set PC "twice". This only works when we fall back to dispatcher
736
// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,
737
// I don't think this gives us that much benefit.
738
void Arm64Jit::WriteExit(u32 destination, int exit_num) {
739
// NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).
740
_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination);
741
742
// NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).
743
WriteDownCount();
744
//If nobody has taken care of this yet (this can be removed when all branches are done)
745
JitBlock *b = js.curBlock;
746
b->exitAddress[exit_num] = destination;
747
b->exitPtrs[exit_num] = GetWritableCodePtr();
748
749
// Link opportunity!
750
int block = blocks.GetBlockNumberFromStartAddress(destination);
751
if (block >= 0 && jo.enableBlocklink) {
752
// The target block exists! Directly link to its checked entrypoint.
753
B(blocks.GetBlock(block)->checkedEntry);
754
b->linkStatus[exit_num] = true;
755
} else {
756
MOVI2R(SCRATCH1, destination);
757
B((const void *)dispatcherPCInSCRATCH1);
758
}
759
}
760
761
void Arm64Jit::WriteExitDestInR(ARM64Reg Reg) {
762
// TODO: If not fast memory, check for invalid address in reg and trigger exception.
763
MovToPC(Reg);
764
WriteDownCount();
765
// TODO: shouldn't need an indirect branch here...
766
B((const void *)dispatcher);
767
}
768
769
void Arm64Jit::WriteSyscallExit() {
770
WriteDownCount();
771
B((const void *)dispatcherCheckCoreState);
772
}
773
774
bool Arm64Jit::CheckJitBreakpoint(u32 addr, int downcountOffset) {
775
if (CBreakPoints::IsAddressBreakPoint(addr)) {
776
MRS(FLAGTEMPREG, FIELD_NZCV);
777
FlushAll();
778
MOVI2R(SCRATCH1, GetCompilerPC());
779
MovToPC(SCRATCH1);
780
SaveStaticRegisters();
781
RestoreRoundingMode();
782
MOVI2R(W0, addr);
783
QuickCallFunction(SCRATCH1_64, &JitBreakpoint);
784
785
// If 0, the conditional breakpoint wasn't taken.
786
CMPI2R(W0, 0);
787
FixupBranch skip = B(CC_EQ);
788
WriteDownCount(downcountOffset);
789
ApplyRoundingMode();
790
LoadStaticRegisters();
791
B((const void *)dispatcherCheckCoreState);
792
SetJumpTarget(skip);
793
794
ApplyRoundingMode();
795
LoadStaticRegisters();
796
_MSR(FIELD_NZCV, FLAGTEMPREG);
797
return true;
798
}
799
800
return false;
801
}
802
803
bool Arm64Jit::CheckMemoryBreakpoint(int instructionOffset) {
804
if (CBreakPoints::HasMemChecks()) {
805
int off = instructionOffset + (js.inDelaySlot ? 1 : 0);
806
807
MRS(FLAGTEMPREG, FIELD_NZCV);
808
FlushAll();
809
RestoreRoundingMode();
810
MOVI2R(W0, GetCompilerPC());
811
MovToPC(W0);
812
if (off != 0)
813
ADDI2R(W0, W0, off * 4);
814
QuickCallFunction(SCRATCH2_64, &JitMemCheck);
815
816
// If 0, the breakpoint wasn't tripped.
817
CMPI2R(W0, 0);
818
FixupBranch skip = B(CC_EQ);
819
WriteDownCount(-1 - off);
820
ApplyRoundingMode();
821
B((const void *)dispatcherCheckCoreState);
822
SetJumpTarget(skip);
823
824
ApplyRoundingMode();
825
_MSR(FIELD_NZCV, FLAGTEMPREG);
826
return true;
827
}
828
829
return false;
830
}
831
832
void Arm64Jit::Comp_DoNothing(MIPSOpcode op) { }
833
834
MIPSOpcode Arm64Jit::GetOriginalOp(MIPSOpcode op) {
835
JitBlockCache *bc = GetBlockCache();
836
int block_num = bc->GetBlockNumberFromEmuHackOp(op, true);
837
if (block_num >= 0) {
838
return bc->GetOriginalFirstOp(block_num);
839
} else {
840
return op;
841
}
842
}
843
844
} // namespace
845
846
#endif // PPSSPP_ARCH(ARM64)
847
848