CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/ARM64/Arm64CompBranch.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#if PPSSPP_ARCH(ARM64)
20
21
#include "Common/Data/Convert/SmallDataConvert.h"
22
#include "Common/Profiler/Profiler.h"
23
24
#include "Core/Config.h"
25
#include "Core/Core.h"
26
#include "Core/Reporting.h"
27
#include "Core/MemMap.h"
28
#include "Core/HLE/HLE.h"
29
#include "Core/HLE/HLETables.h"
30
31
#include "Core/MIPS/MIPS.h"
32
#include "Core/MIPS/MIPSCodeUtils.h"
33
#include "Core/MIPS/MIPSAnalyst.h"
34
#include "Core/MIPS/MIPSTables.h"
35
36
#include "Core/MIPS/ARM64/Arm64Jit.h"
37
#include "Core/MIPS/ARM64/Arm64RegCache.h"
38
#include "Core/MIPS/JitCommon/JitBlockCache.h"
39
40
#include "Common/Arm64Emitter.h"
41
42
#define _RS MIPS_GET_RS(op)
43
#define _RT MIPS_GET_RT(op)
44
#define _RD MIPS_GET_RD(op)
45
#define _FS MIPS_GET_FS(op)
46
#define _FT MIPS_GET_FT(op)
47
#define _FD MIPS_GET_FD(op)
48
#define _SA MIPS_GET_SA(op)
49
#define _POS ((op>> 6) & 0x1F)
50
#define _SIZE ((op>>11) & 0x1F)
51
#define _IMM26 (op & 0x03FFFFFF)
52
#define TARGET16 ((int)(SignExtend16ToU32(op) << 2))
53
#define TARGET26 (_IMM26 << 2)
54
55
#define LOOPOPTIMIZATION 0
56
57
// We can disable nice delay slots.
58
// #define CONDITIONAL_NICE_DELAYSLOT branchInfo.delaySlotIsNice = false;
59
#define CONDITIONAL_NICE_DELAYSLOT ;
60
61
using namespace MIPSAnalyst;
62
63
namespace MIPSComp
64
{
65
using namespace Arm64Gen;
66
using namespace Arm64JitConstants;
67
68
void Arm64Jit::BranchRSRTComp(MIPSOpcode op, CCFlags cc, bool likely)
69
{
70
if (js.inDelaySlot) {
71
ERROR_LOG_REPORT(Log::JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
72
return;
73
}
74
int offset = TARGET16;
75
MIPSGPReg rt = _RT;
76
MIPSGPReg rs = _RS;
77
u32 targetAddr = GetCompilerPC() + offset + 4;
78
79
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
80
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs);
81
CONDITIONAL_NICE_DELAYSLOT;
82
83
bool immBranch = false;
84
bool immBranchTaken = false;
85
if (gpr.IsImm(rs) && gpr.IsImm(rt) && !branchInfo.delaySlotIsBranch) {
86
// The cc flags are opposites: when NOT to take the branch.
87
bool immBranchNotTaken;
88
s32 rsImm = (s32)gpr.GetImm(rs);
89
s32 rtImm = (s32)gpr.GetImm(rt);
90
91
switch (cc)
92
{
93
case CC_EQ: immBranchNotTaken = rsImm == rtImm; break;
94
case CC_NEQ: immBranchNotTaken = rsImm != rtImm; break;
95
default: immBranchNotTaken = false; _dbg_assert_msg_(false, "Bad cc flag in BranchRSRTComp().");
96
}
97
immBranch = true;
98
immBranchTaken = !immBranchNotTaken;
99
}
100
101
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
102
if (!immBranchTaken) {
103
// Skip the delay slot if likely, otherwise it'll be the next instruction.
104
if (likely)
105
js.compilerPC += 4;
106
return;
107
}
108
109
// Branch taken. Always compile the delay slot, and then go to dest.
110
CompileDelaySlot(DELAYSLOT_NICE);
111
AddContinuedBlock(targetAddr);
112
// Account for the increment in the loop.
113
js.compilerPC = targetAddr - 4;
114
// In case the delay slot was a break or something.
115
js.compiling = true;
116
return;
117
}
118
119
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
120
121
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
122
if (immBranch) {
123
// Continuing is handled above, this is just static jumping.
124
if (immBranchTaken || !likely)
125
CompileDelaySlot(DELAYSLOT_FLUSH);
126
else
127
FlushAll();
128
129
const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget;
130
WriteExit(destAddr, js.nextExit++);
131
} else {
132
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
133
CompileDelaySlot(DELAYSLOT_NICE);
134
135
// We might be able to flip the condition (EQ/NEQ are easy.)
136
const bool canFlip = cc == CC_EQ || cc == CC_NEQ;
137
const bool rsIsZero = gpr.IsImm(rs) && gpr.GetImm(rs) == 0;
138
const bool rtIsZero = gpr.IsImm(rt) && gpr.GetImm(rt) == 0;
139
140
Arm64Gen::FixupBranch ptr;
141
if ((likely || branchInfo.delaySlotIsNice) && (rsIsZero || rtIsZero) && canFlip) {
142
// Special case, we can just use CBZ/CBNZ directly.
143
MIPSGPReg r = rsIsZero ? rt : rs;
144
gpr.MapReg(r);
145
// Flush should keep r in the same armreg.
146
ARM64Reg ar = gpr.R(r);
147
FlushAll();
148
if (cc == CC_EQ) {
149
ptr = CBZ(ar);
150
} else {
151
ptr = CBNZ(ar);
152
}
153
} else {
154
u32 val;
155
bool shift;
156
if (gpr.IsImm(rt) && IsImmArithmetic(gpr.GetImm(rt), &val, &shift)) {
157
gpr.MapReg(rs);
158
CMP(gpr.R(rs), val, shift);
159
} else if (gpr.IsImm(rt) && IsImmArithmetic((u64)(s64)-(s32)gpr.GetImm(rt), &val, &shift)) {
160
gpr.MapReg(rs);
161
CMN(gpr.R(rs), val, shift);
162
} else if (gpr.IsImm(rs) && IsImmArithmetic(gpr.GetImm(rs), &val, &shift) && canFlip) {
163
gpr.MapReg(rt);
164
CMP(gpr.R(rt), val, shift);
165
} else if (gpr.IsImm(rs) && IsImmArithmetic((u64)(s64)-(s32)gpr.GetImm(rs), &val, &shift) && canFlip) {
166
gpr.MapReg(rt);
167
CMN(gpr.R(rt), val, shift);
168
} else {
169
gpr.MapInIn(rs, rt);
170
CMP(gpr.R(rs), gpr.R(rt));
171
}
172
173
if (!likely) {
174
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
175
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
176
else
177
FlushAll();
178
ptr = B(cc);
179
} else {
180
FlushAll();
181
ptr = B(cc);
182
}
183
}
184
185
if (likely && !branchInfo.delaySlotIsBranch) {
186
// Only executed when taking the branch.
187
CompileDelaySlot(DELAYSLOT_FLUSH);
188
}
189
190
if (branchInfo.delaySlotIsBranch) {
191
// We still link when the branch is taken (targetAddr case.)
192
// Remember, it's from the perspective of the delay slot, so +12.
193
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
194
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
195
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
196
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
197
FlushAll();
198
}
199
200
// Take the branch
201
WriteExit(targetAddr, js.nextExit++);
202
203
SetJumpTarget(ptr);
204
// Not taken
205
WriteExit(notTakenTarget, js.nextExit++);
206
}
207
208
js.compiling = false;
209
}
210
211
212
void Arm64Jit::BranchRSZeroComp(MIPSOpcode op, CCFlags cc, bool andLink, bool likely)
213
{
214
if (js.inDelaySlot) {
215
ERROR_LOG_REPORT(Log::JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
216
return;
217
}
218
int offset = TARGET16;
219
MIPSGPReg rs = _RS;
220
u32 targetAddr = GetCompilerPC() + offset + 4;
221
222
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely);
223
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs);
224
CONDITIONAL_NICE_DELAYSLOT;
225
226
bool immBranch = false;
227
bool immBranchTaken = false;
228
if (gpr.IsImm(rs) && !branchInfo.delaySlotIsBranch) {
229
// The cc flags are opposites: when NOT to take the branch.
230
bool immBranchNotTaken;
231
s32 imm = (s32)gpr.GetImm(rs);
232
233
switch (cc)
234
{
235
case CC_GT: immBranchNotTaken = imm > 0; break;
236
case CC_GE: immBranchNotTaken = imm >= 0; break;
237
case CC_LT: immBranchNotTaken = imm < 0; break;
238
case CC_LE: immBranchNotTaken = imm <= 0; break;
239
default: immBranchNotTaken = false; _dbg_assert_msg_(false, "Bad cc flag in BranchRSZeroComp().");
240
}
241
immBranch = true;
242
immBranchTaken = !immBranchNotTaken;
243
}
244
245
if (jo.immBranches && immBranch && js.numInstructions < jo.continueMaxInstructions) {
246
if (!immBranchTaken) {
247
// Skip the delay slot if likely, otherwise it'll be the next instruction.
248
if (andLink)
249
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
250
if (likely)
251
js.compilerPC += 4;
252
return;
253
}
254
255
// Branch taken. Always compile the delay slot, and then go to dest.
256
if (andLink)
257
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
258
CompileDelaySlot(DELAYSLOT_NICE);
259
260
AddContinuedBlock(targetAddr);
261
// Account for the increment in the loop.
262
js.compilerPC = targetAddr - 4;
263
// In case the delay slot was a break or something.
264
js.compiling = true;
265
return;
266
}
267
268
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
269
270
u32 notTakenTarget = ResolveNotTakenTarget(branchInfo);
271
if (immBranch) {
272
// Continuing is handled above, this is just static jumping.
273
if (andLink)
274
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
275
if (immBranchTaken || !likely)
276
CompileDelaySlot(DELAYSLOT_FLUSH);
277
else
278
FlushAll();
279
280
const u32 destAddr = immBranchTaken ? targetAddr : notTakenTarget;
281
WriteExit(destAddr, js.nextExit++);
282
} else {
283
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
284
CompileDelaySlot(DELAYSLOT_NICE);
285
286
gpr.MapReg(rs);
287
CMP(gpr.R(rs), 0);
288
289
if (andLink)
290
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
291
292
Arm64Gen::FixupBranch ptr;
293
if (!likely)
294
{
295
if (!branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
296
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
297
else
298
FlushAll();
299
ptr = B(cc);
300
}
301
else
302
{
303
FlushAll();
304
ptr = B(cc);
305
if (!branchInfo.delaySlotIsBranch)
306
CompileDelaySlot(DELAYSLOT_FLUSH);
307
}
308
309
if (branchInfo.delaySlotIsBranch) {
310
// We still link when the branch is taken (targetAddr case.)
311
// Remember, it's from the perspective of the delay slot, so +12.
312
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
313
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
314
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
315
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
316
FlushAll();
317
}
318
319
// Take the branch
320
WriteExit(targetAddr, js.nextExit++);
321
322
SetJumpTarget(ptr);
323
// Not taken
324
WriteExit(notTakenTarget, js.nextExit++);
325
}
326
js.compiling = false;
327
}
328
329
330
void Arm64Jit::Comp_RelBranch(MIPSOpcode op)
331
{
332
// The CC flags here should be opposite of the actual branch becuase they skip the branching action.
333
switch (op >> 26)
334
{
335
case 4: BranchRSRTComp(op, CC_NEQ, false); break;//beq
336
case 5: BranchRSRTComp(op, CC_EQ, false); break;//bne
337
338
case 6: BranchRSZeroComp(op, CC_GT, false, false); break;//blez
339
case 7: BranchRSZeroComp(op, CC_LE, false, false); break;//bgtz
340
341
case 20: BranchRSRTComp(op, CC_NEQ, true); break;//beql
342
case 21: BranchRSRTComp(op, CC_EQ, true); break;//bnel
343
344
case 22: BranchRSZeroComp(op, CC_GT, false, true); break;//blezl
345
case 23: BranchRSZeroComp(op, CC_LE, false, true); break;//bgtzl
346
347
default:
348
_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
349
break;
350
}
351
}
352
353
void Arm64Jit::Comp_RelBranchRI(MIPSOpcode op)
354
{
355
switch ((op >> 16) & 0x1F)
356
{
357
case 0: BranchRSZeroComp(op, CC_GE, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
358
case 1: BranchRSZeroComp(op, CC_LT, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
359
case 2: BranchRSZeroComp(op, CC_GE, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
360
case 3: BranchRSZeroComp(op, CC_LT, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
361
case 16: BranchRSZeroComp(op, CC_GE, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal
362
case 17: BranchRSZeroComp(op, CC_LT, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
363
case 18: BranchRSZeroComp(op, CC_GE, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
364
case 19: BranchRSZeroComp(op, CC_LT, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
365
default:
366
_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
367
break;
368
}
369
}
370
371
// If likely is set, discard the branch slot if NOT taken.
372
void Arm64Jit::BranchFPFlag(MIPSOpcode op, CCFlags cc, bool likely) {
373
if (js.inDelaySlot) {
374
ERROR_LOG_REPORT(Log::JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
375
return;
376
}
377
int offset = TARGET16;
378
u32 targetAddr = GetCompilerPC() + offset + 4;
379
380
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
381
branchInfo.delaySlotIsNice = IsDelaySlotNiceFPU(op, branchInfo.delaySlotOp);
382
CONDITIONAL_NICE_DELAYSLOT;
383
384
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
385
if (!likely && branchInfo.delaySlotIsNice && !branchInfo.delaySlotIsBranch)
386
CompileDelaySlot(DELAYSLOT_NICE);
387
388
gpr.MapReg(MIPS_REG_FPCOND);
389
Arm64Gen::FixupBranch ptr;
390
if (likely || branchInfo.delaySlotIsNice) {
391
// FlushAll() won't actually change the reg.
392
ARM64Reg ar = gpr.R(MIPS_REG_FPCOND);
393
FlushAll();
394
if (cc == CC_EQ) {
395
ptr = TBZ(ar, 0);
396
} else {
397
ptr = TBNZ(ar, 0);
398
}
399
} else {
400
TSTI2R(gpr.R(MIPS_REG_FPCOND), 1, SCRATCH1);
401
if (!branchInfo.delaySlotIsBranch)
402
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
403
ptr = B(cc);
404
}
405
406
if (likely && !branchInfo.delaySlotIsBranch) {
407
CompileDelaySlot(DELAYSLOT_FLUSH);
408
}
409
410
if (branchInfo.delaySlotIsBranch) {
411
// We still link when the branch is taken (targetAddr case.)
412
// Remember, it's from the perspective of the delay slot, so +12.
413
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
414
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
415
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
416
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
417
FlushAll();
418
}
419
420
// Take the branch
421
WriteExit(targetAddr, js.nextExit++);
422
423
SetJumpTarget(ptr);
424
// Not taken
425
WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++);
426
js.compiling = false;
427
}
428
429
void Arm64Jit::Comp_FPUBranch(MIPSOpcode op) {
430
switch((op >> 16) & 0x1f) {
431
case 0: BranchFPFlag(op, CC_NEQ, false); break; // bc1f
432
case 1: BranchFPFlag(op, CC_EQ, false); break; // bc1t
433
case 2: BranchFPFlag(op, CC_NEQ, true); break; // bc1fl
434
case 3: BranchFPFlag(op, CC_EQ, true); break; // bc1tl
435
default:
436
_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");
437
break;
438
}
439
}
440
441
// If likely is set, discard the branch slot if NOT taken.
442
void Arm64Jit::BranchVFPUFlag(MIPSOpcode op, CCFlags cc, bool likely) {
443
if (js.inDelaySlot) {
444
ERROR_LOG_REPORT(Log::JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
445
return;
446
}
447
int offset = TARGET16;
448
u32 targetAddr = GetCompilerPC() + offset + 4;
449
450
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
451
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
452
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
453
// However, it does consistently try each branch, which these games seem to expect.
454
branchInfo.delaySlotIsNice = IsDelaySlotNiceVFPU(op, branchInfo.delaySlotOp);
455
CONDITIONAL_NICE_DELAYSLOT;
456
457
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
458
if (!likely && branchInfo.delaySlotIsNice)
459
CompileDelaySlot(DELAYSLOT_NICE);
460
461
int imm3 = (op >> 18) & 7;
462
463
gpr.MapReg(MIPS_REG_VFPUCC);
464
Arm64Gen::FixupBranch ptr;
465
if (likely || branchInfo.delaySlotIsNice || branchInfo.delaySlotIsBranch) {
466
// FlushAll() won't actually change the reg.
467
ARM64Reg ar = gpr.R(MIPS_REG_VFPUCC);
468
FlushAll();
469
if (cc == CC_EQ) {
470
ptr = TBZ(ar, imm3);
471
} else {
472
ptr = TBNZ(ar, imm3);
473
}
474
} else {
475
TSTI2R(gpr.R(MIPS_REG_VFPUCC), 1ULL << imm3, SCRATCH1);
476
if (!branchInfo.delaySlotIsBranch)
477
CompileDelaySlot(DELAYSLOT_SAFE_FLUSH);
478
ptr = B(cc);
479
}
480
481
if (likely && !branchInfo.delaySlotIsBranch) {
482
CompileDelaySlot(DELAYSLOT_FLUSH);
483
}
484
485
if (branchInfo.delaySlotIsBranch) {
486
// We still link when the branch is taken (targetAddr case.)
487
// Remember, it's from the perspective of the delay slot, so +12.
488
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
489
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 12);
490
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
491
gpr.SetImm(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
492
FlushAll();
493
}
494
495
// Take the branch
496
WriteExit(targetAddr, js.nextExit++);
497
498
SetJumpTarget(ptr);
499
// Not taken
500
WriteExit(ResolveNotTakenTarget(branchInfo), js.nextExit++);
501
js.compiling = false;
502
}
503
504
void Arm64Jit::Comp_VBranch(MIPSOpcode op)
505
{
506
switch ((op >> 16) & 3)
507
{
508
case 0: BranchVFPUFlag(op, CC_NEQ, false); break; // bvf
509
case 1: BranchVFPUFlag(op, CC_EQ, false); break; // bvt
510
case 2: BranchVFPUFlag(op, CC_NEQ, true); break; // bvfl
511
case 3: BranchVFPUFlag(op, CC_EQ, true); break; // bvtl
512
}
513
}
514
515
static void HitInvalidJump(uint32_t dest) {
516
Core_ExecException(dest, currentMIPS->pc - 8, ExecExceptionType::JUMP);
517
}
518
519
void Arm64Jit::Comp_Jump(MIPSOpcode op) {
520
if (js.inDelaySlot) {
521
ERROR_LOG_REPORT(Log::JIT, "Branch in Jump delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
522
return;
523
}
524
u32 off = TARGET26;
525
u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off;
526
527
// Might be a stubbed address or something?
528
if (!Memory::IsValidAddress(targetAddr) || (targetAddr & 3) != 0) {
529
if (js.nextExit == 0) {
530
ERROR_LOG_REPORT(Log::JIT, "Jump to invalid address: %08x", targetAddr);
531
} else {
532
js.compiling = false;
533
}
534
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
535
CompileDelaySlot(DELAYSLOT_NICE);
536
FlushAll();
537
gpr.SetRegImm(SCRATCH1, GetCompilerPC() + 8);
538
MovToPC(SCRATCH1);
539
MOVI2R(W0, targetAddr);
540
QuickCallFunction(SCRATCH1, (const void *)&HitInvalidJump);
541
WriteSyscallExit();
542
return;
543
}
544
545
switch (op >> 26) {
546
case 2: //j
547
CompileDelaySlot(DELAYSLOT_NICE);
548
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
549
AddContinuedBlock(targetAddr);
550
// Account for the increment in the loop.
551
js.compilerPC = targetAddr - 4;
552
// In case the delay slot was a break or something.
553
js.compiling = true;
554
return;
555
}
556
FlushAll();
557
WriteExit(targetAddr, js.nextExit++);
558
break;
559
560
case 3: //jal
561
if (ReplaceJalTo(targetAddr))
562
return;
563
564
gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);
565
CompileDelaySlot(DELAYSLOT_NICE);
566
if (jo.continueJumps && js.numInstructions < jo.continueMaxInstructions) {
567
AddContinuedBlock(targetAddr);
568
// Account for the increment in the loop.
569
js.compilerPC = targetAddr - 4;
570
// In case the delay slot was a break or something.
571
js.compiling = true;
572
return;
573
}
574
FlushAll();
575
WriteExit(targetAddr, js.nextExit++);
576
break;
577
578
default:
579
_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
580
break;
581
}
582
js.compiling = false;
583
}
584
585
void Arm64Jit::Comp_JumpReg(MIPSOpcode op)
586
{
587
if (js.inDelaySlot) {
588
ERROR_LOG_REPORT(Log::JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
589
return;
590
}
591
MIPSGPReg rs = _RS;
592
MIPSGPReg rd = _RD;
593
bool andLink = (op & 0x3f) == 9 && rd != MIPS_REG_ZERO;
594
595
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
596
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
597
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
598
if (andLink && rs == rd)
599
delaySlotIsNice = false;
600
CONDITIONAL_NICE_DELAYSLOT;
601
602
ARM64Reg destReg = INVALID_REG;
603
if (IsSyscall(delaySlotOp)) {
604
gpr.MapReg(rs);
605
MovToPC(gpr.R(rs)); // For syscall to be able to return.
606
if (andLink)
607
gpr.SetImm(rd, GetCompilerPC() + 8);
608
CompileDelaySlot(DELAYSLOT_FLUSH);
609
return; // Syscall (delay slot) wrote exit code.
610
} else if (delaySlotIsNice) {
611
if (andLink)
612
gpr.SetImm(rd, GetCompilerPC() + 8);
613
CompileDelaySlot(DELAYSLOT_NICE);
614
615
if (!andLink && rs == MIPS_REG_RA && g_Config.bDiscardRegsOnJRRA) {
616
// According to the MIPS ABI, there are some regs we don't need to preserve.
617
// Let's discard them so we don't need to write them back.
618
// NOTE: Not all games follow the MIPS ABI! Tekken 6, for example, will crash
619
// with this enabled.
620
gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH);
621
for (int i = MIPS_REG_A0; i <= MIPS_REG_T7; i++)
622
gpr.DiscardR((MIPSGPReg)i);
623
gpr.DiscardR(MIPS_REG_T8);
624
gpr.DiscardR(MIPS_REG_T9);
625
}
626
627
if (jo.continueJumps && gpr.IsImm(rs) && js.numInstructions < jo.continueMaxInstructions) {
628
AddContinuedBlock(gpr.GetImm(rs));
629
// Account for the increment in the loop.
630
js.compilerPC = gpr.GetImm(rs) - 4;
631
// In case the delay slot was a break or something.
632
js.compiling = true;
633
return;
634
}
635
636
gpr.MapReg(rs);
637
destReg = gpr.R(rs); // Safe because FlushAll doesn't change any regs
638
FlushAll();
639
} else {
640
// Since we can't be in a delay slot, should be safe to steal FLAGTEMPREG for a temp reg.
641
// It will be saved, even if a function is called.
642
destReg = DecodeReg(FLAGTEMPREG);
643
gpr.MapReg(rs);
644
MOV(destReg, gpr.R(rs));
645
if (andLink)
646
gpr.SetImm(rd, GetCompilerPC() + 8);
647
CompileDelaySlot(DELAYSLOT_NICE);
648
FlushAll();
649
}
650
651
switch (op & 0x3f)
652
{
653
case 8: //jr
654
break;
655
case 9: //jalr
656
break;
657
default:
658
_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
659
break;
660
}
661
662
WriteExitDestInR(destReg);
663
js.compiling = false;
664
}
665
666
667
void Arm64Jit::Comp_Syscall(MIPSOpcode op)
668
{
669
if (op.encoding == 0x03FFFFcc) {
670
WARN_LOG(Log::JIT, "Encountered bad syscall instruction at %08x (%08x)", js.compilerPC, op.encoding);
671
}
672
if (!g_Config.bSkipDeadbeefFilling)
673
{
674
// All of these will be overwritten with DEADBEEF anyway.
675
gpr.DiscardR(MIPS_REG_COMPILER_SCRATCH);
676
// We need to keep A0 - T3, which are used for args.
677
gpr.DiscardR(MIPS_REG_T4);
678
gpr.DiscardR(MIPS_REG_T5);
679
gpr.DiscardR(MIPS_REG_T6);
680
gpr.DiscardR(MIPS_REG_T7);
681
gpr.DiscardR(MIPS_REG_T8);
682
gpr.DiscardR(MIPS_REG_T9);
683
684
gpr.DiscardR(MIPS_REG_HI);
685
gpr.DiscardR(MIPS_REG_LO);
686
}
687
688
// If we're in a delay slot, this is off by one.
689
const int offset = js.inDelaySlot ? -1 : 0;
690
WriteDownCount(offset, false);
691
RestoreRoundingMode();
692
js.downcountAmount = -offset;
693
694
if (!js.inDelaySlot) {
695
gpr.SetRegImm(SCRATCH1, GetCompilerPC() + 4);
696
MovToPC(SCRATCH1);
697
}
698
699
FlushAll();
700
701
SaveStaticRegisters();
702
#ifdef USE_PROFILER
703
// When profiling, we can't skip CallSyscall, since it times syscalls.
704
MOVI2R(W0, op.encoding);
705
QuickCallFunction(X1, (void *)&CallSyscall);
706
#else
707
// Skip the CallSyscall where possible.
708
void *quickFunc = GetQuickSyscallFunc(op);
709
if (quickFunc) {
710
MOVI2R(X0, (uintptr_t)GetSyscallFuncPointer(op));
711
// Already flushed, so X1 is safe.
712
QuickCallFunction(X1, quickFunc);
713
} else {
714
MOVI2R(W0, op.encoding);
715
QuickCallFunction(X1, (void *)&CallSyscall);
716
}
717
#endif
718
LoadStaticRegisters();
719
ApplyRoundingMode();
720
721
WriteSyscallExit();
722
js.compiling = false;
723
}
724
725
void Arm64Jit::Comp_Break(MIPSOpcode op)
726
{
727
Comp_Generic(op);
728
WriteSyscallExit();
729
js.compiling = false;
730
}
731
732
} // namespace Mipscomp
733
734
#endif // PPSSPP_ARCH(ARM64)
735
736