CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/IR/IRCompBranch.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "Common/Data/Convert/SmallDataConvert.h"
19
#include "Common/Profiler/Profiler.h"
20
21
#include "Core/Reporting.h"
22
#include "Core/Config.h"
23
#include "Core/MemMap.h"
24
#include "Core/HLE/HLE.h"
25
#include "Core/HLE/HLETables.h"
26
27
#include "Core/MIPS/MIPS.h"
28
#include "Core/MIPS/MIPSCodeUtils.h"
29
#include "Core/MIPS/MIPSAnalyst.h"
30
#include "Core/MIPS/MIPSTables.h"
31
32
#include "Core/MIPS/IR/IRFrontend.h"
33
#include "Core/MIPS/JitCommon/JitBlockCache.h"
34
35
#define _RS MIPS_GET_RS(op)
36
#define _RT MIPS_GET_RT(op)
37
#define _RD MIPS_GET_RD(op)
38
#define _FS MIPS_GET_FS(op)
39
#define _FT MIPS_GET_FT(op)
40
#define _FD MIPS_GET_FD(op)
41
#define _SA MIPS_GET_SA(op)
42
#define _POS ((op>> 6) & 0x1F)
43
#define _SIZE ((op>>11) & 0x1F)
44
#define _IMM26 (op & 0x03FFFFFF)
45
#define TARGET16 ((int)(SignExtend16ToU32(op) << 2))
46
#define TARGET26 (_IMM26 << 2)
47
48
#define LOOPOPTIMIZATION 0
49
50
#define MIPS_IS_BREAK(op) (((op) & 0xFC00003F) == 13)
51
52
using namespace MIPSAnalyst;
53
54
namespace MIPSComp
55
{
56
57
void IRFrontend::BranchRSRTComp(MIPSOpcode op, IRComparison cc, bool likely) {
58
if (js.inDelaySlot) {
59
ERROR_LOG_REPORT(Log::JIT, "Branch in RSRTComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
60
return;
61
}
62
int offset = TARGET16;
63
MIPSGPReg rt = _RT;
64
MIPSGPReg rs = _RS;
65
u32 targetAddr = GetCompilerPC() + offset + 4;
66
67
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
68
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rt, rs);
69
70
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
71
72
// Often, div/divu are followed by a likely "break" if the divisor was zero.
73
// Stalling is not really useful for us, so we optimize this out.
74
if (likely && offset == 4 && MIPS_IS_BREAK(branchInfo.delaySlotOp)) {
75
// Okay, let's not actually branch at all. We're done here.
76
EatInstruction(branchInfo.delaySlotOp);
77
// Let's not double-count the downcount, though.
78
js.downcountAmount--;
79
return;
80
}
81
82
MIPSGPReg lhs = rs;
83
MIPSGPReg rhs = rt;
84
if (!branchInfo.delaySlotIsNice && !likely) { // if likely, we don't need this
85
if (rs != 0) {
86
ir.Write(IROp::Mov, IRTEMP_LHS, rs);
87
lhs = (MIPSGPReg)IRTEMP_LHS;
88
}
89
if (rt != 0) {
90
ir.Write(IROp::Mov, IRTEMP_RHS, rt);
91
rhs = (MIPSGPReg)IRTEMP_RHS;
92
}
93
}
94
95
if (!likely && !branchInfo.delaySlotIsBranch)
96
CompileDelaySlot();
97
98
int dcAmount = js.downcountAmount;
99
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
100
js.downcountAmount = 0;
101
102
FlushAll();
103
ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), lhs, rhs);
104
// This makes the block "impure" :(
105
if (likely && !branchInfo.delaySlotIsBranch)
106
CompileDelaySlot();
107
if (branchInfo.delaySlotIsBranch) {
108
// We still link when the branch is taken (targetAddr case.)
109
// Remember, it's from the perspective of the delay slot, so +12.
110
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
111
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
112
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
113
ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
114
}
115
116
FlushAll();
117
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
118
119
// Account for the delay slot.
120
js.compilerPC += 4;
121
js.compiling = false;
122
}
123
124
void IRFrontend::BranchRSZeroComp(MIPSOpcode op, IRComparison cc, bool andLink, bool likely) {
125
if (js.inDelaySlot) {
126
ERROR_LOG_REPORT(Log::JIT, "Branch in RSZeroComp delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
127
return;
128
}
129
int offset = TARGET16;
130
MIPSGPReg rs = _RS;
131
u32 targetAddr = GetCompilerPC() + offset + 4;
132
133
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), andLink, likely);
134
branchInfo.delaySlotIsNice = IsDelaySlotNiceReg(op, branchInfo.delaySlotOp, rs);
135
136
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
137
138
MIPSGPReg lhs = rs;
139
if (!branchInfo.delaySlotIsNice) { // if likely, we don't need this
140
ir.Write(IROp::Mov, IRTEMP_LHS, rs);
141
lhs = (MIPSGPReg)IRTEMP_LHS;
142
}
143
if (andLink)
144
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8);
145
146
if (!likely && !branchInfo.delaySlotIsBranch)
147
CompileDelaySlot();
148
149
int dcAmount = js.downcountAmount;
150
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
151
js.downcountAmount = 0;
152
153
FlushAll();
154
ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), lhs);
155
if (likely && !branchInfo.delaySlotIsBranch)
156
CompileDelaySlot();
157
if (branchInfo.delaySlotIsBranch) {
158
// We still link when the branch is taken (targetAddr case.)
159
// Remember, it's from the perspective of the delay slot, so +12.
160
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
161
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
162
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
163
ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
164
}
165
166
// Taken
167
FlushAll();
168
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
169
170
// Account for the delay slot.
171
js.compilerPC += 4;
172
js.compiling = false;
173
}
174
175
void IRFrontend::Comp_RelBranch(MIPSOpcode op) {
176
// The CC flags here should be opposite of the actual branch because they skip the branching action.
177
switch (op >> 26) {
178
case 4: BranchRSRTComp(op, IRComparison::NotEqual, false); break;//beq
179
case 5: BranchRSRTComp(op, IRComparison::Equal, false); break;//bne
180
181
case 6: BranchRSZeroComp(op, IRComparison::Greater, false, false); break;//blez
182
case 7: BranchRSZeroComp(op, IRComparison::LessEqual, false, false); break;//bgtz
183
184
case 20: BranchRSRTComp(op, IRComparison::NotEqual, true); break;//beql
185
case 21: BranchRSRTComp(op, IRComparison::Equal, true); break;//bnel
186
187
case 22: BranchRSZeroComp(op, IRComparison::Greater, false, true); break;//blezl
188
case 23: BranchRSZeroComp(op, IRComparison::LessEqual, false, true); break;//bgtzl
189
190
default:
191
_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
192
break;
193
}
194
}
195
196
void IRFrontend::Comp_RelBranchRI(MIPSOpcode op) {
197
switch ((op >> 16) & 0x1F) {
198
case 0: BranchRSZeroComp(op, IRComparison::GreaterEqual, false, false); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltz
199
case 1: BranchRSZeroComp(op, IRComparison::Less, false, false); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgez
200
case 2: BranchRSZeroComp(op, IRComparison::GreaterEqual, false, true); break; //if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 8; break;//bltzl
201
case 3: BranchRSZeroComp(op, IRComparison::Less, false, true); break; //if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 8; break;//bgezl
202
case 16: BranchRSZeroComp(op, IRComparison::GreaterEqual, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else PC += 4; break;//bltzal
203
case 17: BranchRSZeroComp(op, IRComparison::Less, true, false); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else PC += 4; break;//bgezal
204
case 18: BranchRSZeroComp(op, IRComparison::GreaterEqual, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) < 0) DelayBranchTo(addr); else SkipLikely(); break;//bltzall
205
case 19: BranchRSZeroComp(op, IRComparison::Less, true, true); break; //R(MIPS_REG_RA) = PC + 8; if ((s32)R(rs) >= 0) DelayBranchTo(addr); else SkipLikely(); break;//bgezall
206
default:
207
_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
208
break;
209
}
210
}
211
212
// If likely is set, discard the branch slot if NOT taken.
213
void IRFrontend::BranchFPFlag(MIPSOpcode op, IRComparison cc, bool likely) {
214
if (js.inDelaySlot) {
215
ERROR_LOG_REPORT(Log::JIT, "Branch in FPFlag delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
216
return;
217
}
218
int offset = TARGET16;
219
u32 targetAddr = GetCompilerPC() + offset + 4;
220
221
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
222
223
ir.Write(IROp::FpCondToReg, IRTEMP_LHS);
224
if (!likely && !branchInfo.delaySlotIsBranch)
225
CompileDelaySlot();
226
227
int dcAmount = js.downcountAmount;
228
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
229
js.downcountAmount = 0;
230
231
FlushAll();
232
// Not taken
233
ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), IRTEMP_LHS, 0);
234
// Taken
235
if (likely && !branchInfo.delaySlotIsBranch)
236
CompileDelaySlot();
237
if (branchInfo.delaySlotIsBranch) {
238
// We still link when the branch is taken (targetAddr case.)
239
// Remember, it's from the perspective of the delay slot, so +12.
240
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
241
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
242
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
243
ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
244
}
245
246
FlushAll();
247
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
248
249
// Account for the delay slot.
250
js.compilerPC += 4;
251
js.compiling = false;
252
}
253
254
void IRFrontend::Comp_FPUBranch(MIPSOpcode op) {
255
switch((op >> 16) & 0x1f) {
256
case 0: BranchFPFlag(op, IRComparison::NotEqual, false); break; // bc1f
257
case 1: BranchFPFlag(op, IRComparison::Equal, false); break; // bc1t
258
case 2: BranchFPFlag(op, IRComparison::NotEqual, true); break; // bc1fl
259
case 3: BranchFPFlag(op, IRComparison::Equal, true); break; // bc1tl
260
default:
261
_dbg_assert_msg_( 0, "Trying to interpret instruction that can't be interpreted");
262
break;
263
}
264
}
265
266
// If likely is set, discard the branch slot if NOT taken.
267
void IRFrontend::BranchVFPUFlag(MIPSOpcode op, IRComparison cc, bool likely) {
268
if (js.inDelaySlot) {
269
ERROR_LOG_REPORT(Log::JIT, "Branch in VFPU delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
270
return;
271
}
272
int offset = TARGET16;
273
u32 targetAddr = GetCompilerPC() + offset + 4;
274
275
BranchInfo branchInfo(GetCompilerPC(), op, GetOffsetInstruction(1), false, likely);
276
277
js.downcountAmount += MIPSGetInstructionCycleEstimate(branchInfo.delaySlotOp);
278
ir.Write(IROp::VfpuCtrlToReg, IRTEMP_LHS, VFPU_CTRL_CC);
279
280
// Sometimes there's a VFPU branch in a delay slot (Disgaea 2: Dark Hero Days, Zettai Hero Project, La Pucelle)
281
// The behavior is undefined - the CPU may take the second branch even if the first one passes.
282
// However, it does consistently try each branch, which these games seem to expect.
283
if (!likely && !branchInfo.delaySlotIsBranch)
284
CompileDelaySlot();
285
286
int dcAmount = js.downcountAmount;
287
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
288
js.downcountAmount = 0;
289
290
int imm3 = (op >> 18) & 7;
291
292
ir.Write(IROp::AndConst, IRTEMP_LHS, IRTEMP_LHS, ir.AddConstant(1 << imm3));
293
FlushAll();
294
ir.Write(ComparisonToExit(cc), ir.AddConstant(ResolveNotTakenTarget(branchInfo)), IRTEMP_LHS, 0);
295
296
if (likely && !branchInfo.delaySlotIsBranch)
297
CompileDelaySlot();
298
if (branchInfo.delaySlotIsBranch) {
299
// We still link when the branch is taken (targetAddr case.)
300
// Remember, it's from the perspective of the delay slot, so +12.
301
if ((branchInfo.delaySlotInfo & OUT_RA) != 0)
302
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 12);
303
if ((branchInfo.delaySlotInfo & OUT_RD) != 0)
304
ir.WriteSetConstant(MIPS_GET_RD(branchInfo.delaySlotOp), GetCompilerPC() + 12);
305
}
306
307
// Taken
308
FlushAll();
309
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
310
311
// Account for the delay slot.
312
js.compilerPC += 4;
313
js.compiling = false;
314
}
315
316
void IRFrontend::Comp_VBranch(MIPSOpcode op) {
317
switch ((op >> 16) & 3) {
318
case 0: BranchVFPUFlag(op, IRComparison::NotEqual, false); break; // bvf
319
case 1: BranchVFPUFlag(op, IRComparison::Equal, false); break; // bvt
320
case 2: BranchVFPUFlag(op, IRComparison::NotEqual, true); break; // bvfl
321
case 3: BranchVFPUFlag(op, IRComparison::Equal, true); break; // bvtl
322
}
323
}
324
325
void IRFrontend::Comp_Jump(MIPSOpcode op) {
326
if (js.inDelaySlot) {
327
ERROR_LOG_REPORT(Log::JIT, "Branch in Jump delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
328
return;
329
}
330
331
u32 off = TARGET26;
332
u32 targetAddr = (GetCompilerPC() & 0xF0000000) | off;
333
334
// Might be a stubbed address or something?
335
if (!Memory::IsValidAddress(targetAddr)) {
336
// If preloading, flush - this block will likely be fixed later.
337
if (js.preloading)
338
js.cancel = true;
339
else
340
ERROR_LOG_REPORT(Log::JIT, "Jump to invalid address: %08x", targetAddr);
341
// TODO: Mark this block dirty or something? May be indication it will be changed by imports.
342
// Continue so the block gets completed and crashes properly.
343
}
344
345
switch (op >> 26) {
346
case 2: //j
347
CompileDelaySlot();
348
break;
349
350
case 3: //jal
351
ir.WriteSetConstant(MIPS_REG_RA, GetCompilerPC() + 8);
352
CompileDelaySlot();
353
break;
354
355
default:
356
_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
357
break;
358
}
359
360
int dcAmount = js.downcountAmount;
361
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
362
js.downcountAmount = 0;
363
364
FlushAll();
365
ir.Write(IROp::ExitToConst, ir.AddConstant(targetAddr));
366
367
// Account for the delay slot.
368
js.compilerPC += 4;
369
js.compiling = false;
370
}
371
372
void IRFrontend::Comp_JumpReg(MIPSOpcode op) {
373
if (js.inDelaySlot) {
374
ERROR_LOG_REPORT(Log::JIT, "Branch in JumpReg delay slot at %08x in block starting at %08x", GetCompilerPC(), js.blockStart);
375
return;
376
}
377
MIPSGPReg rs = _RS;
378
MIPSGPReg rd = _RD;
379
bool andLink = (op & 0x3f) == 9 && rd != MIPS_REG_ZERO;
380
381
MIPSOpcode delaySlotOp = GetOffsetInstruction(1);
382
js.downcountAmount += MIPSGetInstructionCycleEstimate(delaySlotOp);
383
bool delaySlotIsNice = IsDelaySlotNiceReg(op, delaySlotOp, rs);
384
if (andLink && rs == rd)
385
delaySlotIsNice = false;
386
387
int destReg;
388
if (IsSyscall(delaySlotOp)) {
389
ir.Write(IROp::SetPC, 0, rs);
390
if (andLink)
391
ir.WriteSetConstant(rd, GetCompilerPC() + 8);
392
CompileDelaySlot();
393
// Syscall (the delay slot) does FlushAll.
394
395
// Account for the delay slot itself in total bytes.
396
js.compilerPC += 4;
397
return; // Syscall (delay slot) wrote exit code.
398
} else if (delaySlotIsNice) {
399
if (andLink)
400
ir.WriteSetConstant(rd, GetCompilerPC() + 8);
401
CompileDelaySlot();
402
destReg = rs; // Safe because FlushAll doesn't change any regs
403
FlushAll();
404
} else {
405
// Bad delay slot.
406
ir.Write(IROp::Mov, IRTEMP_LHS, rs);
407
destReg = IRTEMP_LHS;
408
if (andLink)
409
ir.WriteSetConstant(rd, GetCompilerPC() + 8);
410
CompileDelaySlot();
411
FlushAll();
412
}
413
414
switch (op & 0x3f)
415
{
416
case 8: //jr
417
break;
418
case 9: //jalr
419
break;
420
default:
421
_dbg_assert_msg_(false,"Trying to compile instruction that can't be compiled");
422
break;
423
}
424
425
int dcAmount = js.downcountAmount;
426
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
427
js.downcountAmount = 0;
428
429
ir.Write(IROp::ExitToReg, 0, destReg, 0);
430
431
// Account for the delay slot.
432
js.compilerPC += 4;
433
js.compiling = false;
434
}
435
436
void IRFrontend::Comp_Syscall(MIPSOpcode op) {
437
// Note: If we're in a delay slot, this is off by one compared to the interpreter.
438
int dcAmount = js.downcountAmount + (js.inDelaySlot ? -1 : 0);
439
ir.Write(IROp::Downcount, 0, ir.AddConstant(dcAmount));
440
js.downcountAmount = 0;
441
442
// If not in a delay slot, we need to update PC.
443
if (!js.inDelaySlot) {
444
ir.Write(IROp::SetPCConst, 0, ir.AddConstant(GetCompilerPC() + 4));
445
}
446
447
FlushAll();
448
449
RestoreRoundingMode();
450
ir.Write(IROp::Syscall, 0, ir.AddConstant(op.encoding));
451
ApplyRoundingMode();
452
ir.Write(IROp::ExitToPC);
453
454
js.compiling = false;
455
}
456
457
void IRFrontend::Comp_Break(MIPSOpcode op) {
458
ir.Write(IROp::SetPCConst, 0, ir.AddConstant(GetCompilerPC()));
459
ir.Write(IROp::Break);
460
js.compiling = false;
461
}
462
463
} // namespace Mipscomp
464
465