Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/IR/IRInterpreter.cpp
5667 views
1
#include <algorithm>
2
#include <cmath>
3
4
#include "ppsspp_config.h"
5
6
#if PPSSPP_PLATFORM(WINDOWS) && PPSSPP_ARCH(ARM64)
7
#include <arm64intr.h>
8
#endif
9
10
#include "Common/BitSet.h"
11
#include "Common/BitScan.h"
12
#include "Common/Common.h"
13
#include "Common/CommonFuncs.h"
14
#include "Common/Data/Convert/SmallDataConvert.h"
15
#include "Common/Math/math_util.h"
16
#include "Common/Math/SIMDHeaders.h"
17
#include "Core/Core.h"
18
#include "Core/CoreTiming.h"
19
#include "Core/Debugger/Breakpoints.h"
20
#include "Core/HLE/HLE.h"
21
#include "Core/HLE/ReplaceTables.h"
22
#include "Core/MemMap.h"
23
#include "Core/MIPS/MIPS.h"
24
#include "Core/MIPS/MIPSTables.h"
25
#include "Core/MIPS/MIPSVFPUUtils.h"
26
#include "Core/MIPS/IR/IRInst.h"
27
#include "Core/MIPS/IR/IRInterpreter.h"
28
#include "Core/System.h"
29
#include "Core/MIPS/MIPSTracer.h"
30
31
#if PPSSPP_ARCH(ARM64)
32
33
// TODO: This should be put in some common header.
34
static inline u64 ARM64ReadFPCR() {
35
#if PPSSPP_PLATFORM(WINDOWS)
36
return _ReadStatusReg(ARM64_FPCR);
37
#else
38
// TODO: Try __builtin_arm_get_fpcr()
39
u64 fpcr; // not really 64-bit, just to match the register size.
40
asm volatile ("mrs %0, fpcr" : "=r" (fpcr));
41
return fpcr;
42
#endif
43
}
44
45
static inline void ARM64WriteFPCR(u64 fpcr) {
46
#if PPSSPP_PLATFORM(WINDOWS)
47
_WriteStatusReg(ARM64_FPCR, fpcr);
48
#else
49
// TODO: Try __builtin_arm_set_fpcr()
50
// Write back the modified FPCR
51
asm volatile ("msr fpcr, %0" : : "r" (fpcr));
52
#endif
53
}
54
55
#endif
56
57
#ifdef mips
58
// Why do MIPS compilers define something so generic? Try to keep defined, at least...
59
#undef mips
60
#define mips mips
61
#endif
62
63
alignas(16) static const float vec4InitValues[8][4] = {
64
{ 0.0f, 0.0f, 0.0f, 0.0f },
65
{ 1.0f, 1.0f, 1.0f, 1.0f },
66
{ -1.0f, -1.0f, -1.0f, -1.0f },
67
{ 1.0f, 0.0f, 0.0f, 0.0f },
68
{ 0.0f, 1.0f, 0.0f, 0.0f },
69
{ 0.0f, 0.0f, 1.0f, 0.0f },
70
{ 0.0f, 0.0f, 0.0f, 1.0f },
71
};
72
73
alignas(16) static const uint32_t signBits[4] = {
74
0x80000000, 0x80000000, 0x80000000, 0x80000000,
75
};
76
77
alignas(16) static const uint32_t noSignMask[4] = {
78
0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF, 0x7FFFFFFF,
79
};
80
81
alignas(16) static const uint32_t lowBytesMask[4] = {
82
0x000000FF, 0x000000FF, 0x000000FF, 0x000000FF,
83
};
84
85
u32 IRRunBreakpoint(u32 pc) {
86
// Should we skip this breakpoint?
87
uint32_t skipFirst = g_breakpoints.CheckSkipFirst();
88
if (skipFirst == pc || skipFirst == currentMIPS->pc)
89
return 0;
90
91
// Did we already hit one?
92
if (coreState != CORE_RUNNING_CPU && coreState != CORE_NEXTFRAME)
93
return 1;
94
95
g_breakpoints.ExecBreakPoint(pc);
96
return coreState != CORE_RUNNING_CPU ? 1 : 0;
97
}
98
99
u32 IRRunMemCheck(u32 pc, u32 addr) {
100
// Should we skip this breakpoint?
101
uint32_t skipFirst = g_breakpoints.CheckSkipFirst();
102
if (skipFirst == pc || skipFirst == currentMIPS->pc)
103
return 0;
104
105
// Did we already hit one?
106
if (coreState != CORE_RUNNING_CPU && coreState != CORE_NEXTFRAME)
107
return 1;
108
109
g_breakpoints.ExecOpMemCheck(addr, pc);
110
return coreState != CORE_RUNNING_CPU ? 1 : 0;
111
}
112
113
void IRApplyRounding(MIPSState *mips) {
114
u32 fcr1Bits = mips->fcr31 & 0x01000003;
115
// If these are 0, we just leave things as they are.
116
if (fcr1Bits) {
117
int rmode = fcr1Bits & 3;
118
bool ftz = (fcr1Bits & 0x01000000) != 0;
119
#if PPSSPP_ARCH(SSE2)
120
u32 csr = _mm_getcsr() & ~0x6000;
121
// Translate the rounding mode bits to X86, the same way as in Asm.cpp.
122
if (rmode & 1) {
123
rmode ^= 2;
124
}
125
csr |= rmode << 13;
126
127
if (ftz) {
128
// Flush to zero
129
csr |= 0x8000;
130
}
131
_mm_setcsr(csr);
132
#elif PPSSPP_ARCH(ARM64)
133
u64 fpcr = ARM64ReadFPCR();
134
// Translate MIPS to ARM rounding mode
135
static const u8 lookup[4] = {0, 3, 1, 2};
136
137
fpcr &= ~(3 << 22); // Clear bits [23:22]
138
fpcr |= ((u64)lookup[rmode] << 22);
139
140
if (ftz) {
141
fpcr |= 1 << 24;
142
}
143
144
ARM64WriteFPCR(fpcr);
145
#endif
146
}
147
}
148
149
void IRRestoreRounding() {
150
#if PPSSPP_ARCH(SSE2)
151
// TODO: We should avoid this if we didn't apply rounding in the first place.
152
// In the meantime, clear out FTZ and rounding mode bits.
153
u32 csr = _mm_getcsr();
154
csr &= ~(7 << 13);
155
_mm_setcsr(csr);
156
#elif PPSSPP_ARCH(ARM64)
157
u64 fpcr = ARM64ReadFPCR(); // not really 64-bit, just to match the regsiter size.
158
fpcr &= ~(7 << 22); // Clear bits [23:22] for rounding, 24 for FTZ
159
// Write back the modified FPCR
160
ARM64WriteFPCR(fpcr);
161
#endif
162
}
163
164
u32 IRInterpret(MIPSState *mips, const IRInst *inst) {
165
while (true) {
166
switch (inst->op) {
167
case IROp::SetConst:
168
mips->r[inst->dest] = inst->constant;
169
break;
170
case IROp::SetConstF:
171
memcpy(&mips->f[inst->dest], &inst->constant, 4);
172
break;
173
case IROp::Add:
174
mips->r[inst->dest] = mips->r[inst->src1] + mips->r[inst->src2];
175
break;
176
case IROp::Sub:
177
mips->r[inst->dest] = mips->r[inst->src1] - mips->r[inst->src2];
178
break;
179
case IROp::And:
180
mips->r[inst->dest] = mips->r[inst->src1] & mips->r[inst->src2];
181
break;
182
case IROp::Or:
183
mips->r[inst->dest] = mips->r[inst->src1] | mips->r[inst->src2];
184
break;
185
case IROp::Xor:
186
mips->r[inst->dest] = mips->r[inst->src1] ^ mips->r[inst->src2];
187
break;
188
case IROp::Mov:
189
mips->r[inst->dest] = mips->r[inst->src1];
190
break;
191
case IROp::AddConst:
192
mips->r[inst->dest] = mips->r[inst->src1] + inst->constant;
193
break;
194
case IROp::OptAddConst: // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
195
mips->r[inst->dest] += inst->constant;
196
break;
197
case IROp::SubConst:
198
mips->r[inst->dest] = mips->r[inst->src1] - inst->constant;
199
break;
200
case IROp::AndConst:
201
mips->r[inst->dest] = mips->r[inst->src1] & inst->constant;
202
break;
203
case IROp::OptAndConst: // For this one, it's worth having a "unary" variant of the above that only needs to read one register param.
204
mips->r[inst->dest] &= inst->constant;
205
break;
206
case IROp::OrConst:
207
mips->r[inst->dest] = mips->r[inst->src1] | inst->constant;
208
break;
209
case IROp::OptOrConst:
210
mips->r[inst->dest] |= inst->constant;
211
break;
212
case IROp::XorConst:
213
mips->r[inst->dest] = mips->r[inst->src1] ^ inst->constant;
214
break;
215
case IROp::Neg:
216
mips->r[inst->dest] = (u32)(-(s32)mips->r[inst->src1]);
217
break;
218
case IROp::Not:
219
mips->r[inst->dest] = ~mips->r[inst->src1];
220
break;
221
case IROp::Ext8to32:
222
mips->r[inst->dest] = SignExtend8ToU32(mips->r[inst->src1]);
223
break;
224
case IROp::Ext16to32:
225
mips->r[inst->dest] = SignExtend16ToU32(mips->r[inst->src1]);
226
break;
227
case IROp::ReverseBits:
228
mips->r[inst->dest] = ReverseBits32(mips->r[inst->src1]);
229
break;
230
231
case IROp::Load8:
232
mips->r[inst->dest] = Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant);
233
break;
234
case IROp::Load8Ext:
235
mips->r[inst->dest] = SignExtend8ToU32(Memory::ReadUnchecked_U8(mips->r[inst->src1] + inst->constant));
236
break;
237
case IROp::Load16:
238
mips->r[inst->dest] = Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant);
239
break;
240
case IROp::Load16Ext:
241
mips->r[inst->dest] = SignExtend16ToU32(Memory::ReadUnchecked_U16(mips->r[inst->src1] + inst->constant));
242
break;
243
case IROp::Load32:
244
mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant);
245
break;
246
case IROp::Load32Left:
247
{
248
u32 addr = mips->r[inst->src1] + inst->constant;
249
u32 shift = (addr & 3) * 8;
250
u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
251
u32 destMask = 0x00ffffff >> shift;
252
mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem << (24 - shift));
253
break;
254
}
255
case IROp::Load32Right:
256
{
257
u32 addr = mips->r[inst->src1] + inst->constant;
258
u32 shift = (addr & 3) * 8;
259
u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
260
u32 destMask = 0xffffff00 << (24 - shift);
261
mips->r[inst->dest] = (mips->r[inst->dest] & destMask) | (mem >> shift);
262
break;
263
}
264
case IROp::Load32Linked:
265
if (inst->dest != MIPS_REG_ZERO)
266
mips->r[inst->dest] = Memory::ReadUnchecked_U32(mips->r[inst->src1] + inst->constant);
267
mips->llBit = 1;
268
break;
269
case IROp::LoadFloat:
270
mips->f[inst->dest] = Memory::ReadUnchecked_Float(mips->r[inst->src1] + inst->constant);
271
break;
272
273
case IROp::Store8:
274
Memory::WriteUnchecked_U8(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
275
break;
276
case IROp::Store16:
277
Memory::WriteUnchecked_U16(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
278
break;
279
case IROp::Store32:
280
Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
281
break;
282
case IROp::Store32Left:
283
{
284
u32 addr = mips->r[inst->src1] + inst->constant;
285
u32 shift = (addr & 3) * 8;
286
u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
287
u32 memMask = 0xffffff00 << shift;
288
u32 result = (mips->r[inst->src3] >> (24 - shift)) | (mem & memMask);
289
Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
290
break;
291
}
292
case IROp::Store32Right:
293
{
294
u32 addr = mips->r[inst->src1] + inst->constant;
295
u32 shift = (addr & 3) * 8;
296
u32 mem = Memory::ReadUnchecked_U32(addr & 0xfffffffc);
297
u32 memMask = 0x00ffffff >> (24 - shift);
298
u32 result = (mips->r[inst->src3] << shift) | (mem & memMask);
299
Memory::WriteUnchecked_U32(result, addr & 0xfffffffc);
300
break;
301
}
302
case IROp::Store32Conditional:
303
if (mips->llBit) {
304
Memory::WriteUnchecked_U32(mips->r[inst->src3], mips->r[inst->src1] + inst->constant);
305
if (inst->dest != MIPS_REG_ZERO) {
306
mips->r[inst->dest] = 1;
307
}
308
} else if (inst->dest != MIPS_REG_ZERO) {
309
mips->r[inst->dest] = 0;
310
}
311
break;
312
case IROp::StoreFloat:
313
Memory::WriteUnchecked_Float(mips->f[inst->src3], mips->r[inst->src1] + inst->constant);
314
break;
315
316
case IROp::LoadVec4:
317
{
318
u32 base = mips->r[inst->src1] + inst->constant;
319
// This compiles to a nice SSE load/store on x86, and hopefully similar on ARM.
320
memcpy(&mips->f[inst->dest], Memory::GetPointerUnchecked(base), 4 * 4);
321
break;
322
}
323
case IROp::StoreVec4:
324
{
325
u32 base = mips->r[inst->src1] + inst->constant;
326
memcpy((float *)Memory::GetPointerUnchecked(base), &mips->f[inst->dest], 4 * 4);
327
break;
328
}
329
330
case IROp::Vec4Init:
331
{
332
memcpy(&mips->f[inst->dest], vec4InitValues[inst->src1], 4 * sizeof(float));
333
break;
334
}
335
336
case IROp::Vec4Shuffle:
337
{
338
// Can't use the SSE shuffle here because it takes an immediate. pshufb with a table would work though,
339
// or a big switch - there are only 256 shuffles possible (4^4)
340
float temp[4];
341
for (int i = 0; i < 4; i++)
342
temp[i] = mips->f[inst->src1 + ((inst->src2 >> (i * 2)) & 3)];
343
const int dest = inst->dest;
344
for (int i = 0; i < 4; i++)
345
mips->f[dest + i] = temp[i];
346
break;
347
}
348
349
case IROp::Vec4Blend:
350
{
351
const int dest = inst->dest;
352
const int src1 = inst->src1;
353
const int src2 = inst->src2;
354
const int constant = inst->constant;
355
// 90% of calls to this is inst->constant == 7 or inst->constant == 8. Some are 1 and 4, others very rare.
356
// Could use _mm_blendv_ps (SSE4+BMI), vbslq_f32 (ARM), __riscv_vmerge_vvm (RISC-V)
357
float temp[4];
358
for (int i = 0; i < 4; i++)
359
temp[i] = ((constant >> i) & 1) ? mips->f[src2 + i] : mips->f[src1 + i];
360
for (int i = 0; i < 4; i++)
361
mips->f[dest + i] = temp[i];
362
break;
363
}
364
365
case IROp::Vec4Mov:
366
{
367
#if defined(_M_SSE)
368
_mm_store_ps(&mips->f[inst->dest], _mm_load_ps(&mips->f[inst->src1]));
369
#elif PPSSPP_ARCH(ARM_NEON)
370
vst1q_f32(&mips->f[inst->dest], vld1q_f32(&mips->f[inst->src1]));
371
#else
372
memcpy(&mips->f[inst->dest], &mips->f[inst->src1], 4 * sizeof(float));
373
#endif
374
break;
375
}
376
377
case IROp::Vec4Add:
378
{
379
#if defined(_M_SSE)
380
_mm_store_ps(&mips->f[inst->dest], _mm_add_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
381
#elif PPSSPP_ARCH(ARM_NEON)
382
vst1q_f32(&mips->f[inst->dest], vaddq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
383
#else
384
for (int i = 0; i < 4; i++)
385
mips->f[inst->dest + i] = mips->f[inst->src1 + i] + mips->f[inst->src2 + i];
386
#endif
387
break;
388
}
389
390
case IROp::Vec4Sub:
391
{
392
#if defined(_M_SSE)
393
_mm_store_ps(&mips->f[inst->dest], _mm_sub_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
394
#elif PPSSPP_ARCH(ARM_NEON)
395
vst1q_f32(&mips->f[inst->dest], vsubq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
396
#else
397
for (int i = 0; i < 4; i++)
398
mips->f[inst->dest + i] = mips->f[inst->src1 + i] - mips->f[inst->src2 + i];
399
#endif
400
break;
401
}
402
403
case IROp::Vec4Mul:
404
{
405
#if defined(_M_SSE)
406
_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
407
#elif PPSSPP_ARCH(ARM_NEON)
408
vst1q_f32(&mips->f[inst->dest], vmulq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
409
#else
410
for (int i = 0; i < 4; i++)
411
mips->f[inst->dest + i] = mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
412
#endif
413
break;
414
}
415
416
case IROp::Vec4Div:
417
{
418
#if defined(_M_SSE)
419
_mm_store_ps(&mips->f[inst->dest], _mm_div_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps(&mips->f[inst->src2])));
420
#elif PPSSPP_ARCH(ARM64_NEON)
421
vst1q_f32(&mips->f[inst->dest], vdivq_f32(vld1q_f32(&mips->f[inst->src1]), vld1q_f32(&mips->f[inst->src2])));
422
#else
423
for (int i = 0; i < 4; i++)
424
mips->f[inst->dest + i] = mips->f[inst->src1 + i] / mips->f[inst->src2 + i];
425
#endif
426
break;
427
}
428
429
case IROp::Vec4Scale:
430
{
431
#if defined(_M_SSE)
432
_mm_store_ps(&mips->f[inst->dest], _mm_mul_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_set1_ps(mips->f[inst->src2])));
433
#elif PPSSPP_ARCH(ARM_NEON)
434
vst1q_f32(&mips->f[inst->dest], vmulq_lane_f32(vld1q_f32(&mips->f[inst->src1]), vdup_n_f32(mips->f[inst->src2]), 0));
435
#else
436
const float factor = mips->f[inst->src2];
437
for (int i = 0; i < 4; i++)
438
mips->f[inst->dest + i] = mips->f[inst->src1 + i] * factor;
439
#endif
440
break;
441
}
442
443
case IROp::Vec4Neg:
444
{
445
#if defined(_M_SSE)
446
_mm_store_ps(&mips->f[inst->dest], _mm_xor_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)signBits)));
447
#elif PPSSPP_ARCH(ARM_NEON)
448
vst1q_f32(&mips->f[inst->dest], vnegq_f32(vld1q_f32(&mips->f[inst->src1])));
449
#else
450
for (int i = 0; i < 4; i++)
451
mips->f[inst->dest + i] = -mips->f[inst->src1 + i];
452
#endif
453
break;
454
}
455
456
case IROp::Vec4Abs:
457
{
458
#if defined(_M_SSE)
459
_mm_store_ps(&mips->f[inst->dest], _mm_and_ps(_mm_load_ps(&mips->f[inst->src1]), _mm_load_ps((const float *)noSignMask)));
460
#elif PPSSPP_ARCH(ARM_NEON)
461
vst1q_f32(&mips->f[inst->dest], vabsq_f32(vld1q_f32(&mips->f[inst->src1])));
462
#else
463
for (int i = 0; i < 4; i++)
464
mips->f[inst->dest + i] = fabsf(mips->f[inst->src1 + i]);
465
#endif
466
break;
467
}
468
469
case IROp::Vec2Unpack16To31:
470
{
471
const int dest = inst->dest;
472
const int src1 = inst->src1;
473
const int temp0 = (mips->fi[src1] << 16) >> 1;
474
const int temp1 = (mips->fi[src1] & 0xFFFF0000) >> 1;
475
mips->fi[dest] = temp0;
476
mips->fi[dest + 1] = temp1;
477
break;
478
}
479
480
case IROp::Vec2Unpack16To32:
481
{
482
const int dest = inst->dest;
483
const int src1 = inst->src1;
484
const int temp0 = (mips->fi[src1] << 16);
485
const int temp1 = (mips->fi[src1] & 0xFFFF0000);
486
mips->fi[dest] = temp0;
487
mips->fi[dest + 1] = temp1;
488
break;
489
}
490
491
case IROp::Vec4Unpack8To32:
492
{
493
#if defined(_M_SSE)
494
__m128i src = _mm_cvtsi32_si128(mips->fi[inst->src1]);
495
src = _mm_unpacklo_epi8(src, _mm_setzero_si128());
496
src = _mm_unpacklo_epi16(src, _mm_setzero_si128());
497
_mm_store_si128((__m128i *)&mips->fi[inst->dest], _mm_slli_epi32(src, 24));
498
#elif PPSSPP_ARCH(ARM_NEON) && 0 // Untested
499
const uint8x8_t value = (uint8x8_t)vdup_n_u32(mips->fi[inst->src1]);
500
const uint16x8_t value16 = vmovl_u8(value);
501
const uint32x4_t value32 = vshll_n_u16(vget_low_u16(value16), 24);
502
vst1q_u32(&mips->fi[inst->dest], value32);
503
#else
504
mips->fi[inst->dest] = (mips->fi[inst->src1] << 24);
505
mips->fi[inst->dest + 1] = (mips->fi[inst->src1] << 16) & 0xFF000000;
506
mips->fi[inst->dest + 2] = (mips->fi[inst->src1] << 8) & 0xFF000000;
507
mips->fi[inst->dest + 3] = (mips->fi[inst->src1]) & 0xFF000000;
508
#endif
509
break;
510
}
511
512
case IROp::Vec2Pack32To16:
513
{
514
u32 val = mips->fi[inst->src1] >> 16;
515
mips->fi[inst->dest] = (mips->fi[inst->src1 + 1] & 0xFFFF0000) | val;
516
break;
517
}
518
519
case IROp::Vec2Pack31To16:
520
{
521
// Used in Tekken 6
522
523
u32 val = (mips->fi[inst->src1] >> 15) & 0xFFFF;
524
val |= (mips->fi[inst->src1 + 1] << 1) & 0xFFFF0000;
525
mips->fi[inst->dest] = val;
526
break;
527
}
528
529
case IROp::Vec4Pack32To8:
530
{
531
// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
532
// pshufb or SSE4 instructions can be used instead.
533
u32 val = mips->fi[inst->src1] >> 24;
534
val |= (mips->fi[inst->src1 + 1] >> 16) & 0xFF00;
535
val |= (mips->fi[inst->src1 + 2] >> 8) & 0xFF0000;
536
val |= (mips->fi[inst->src1 + 3]) & 0xFF000000;
537
mips->fi[inst->dest] = val;
538
break;
539
}
540
541
case IROp::Vec4Pack31To8:
542
{
543
// Used in Tekken 6
544
545
// Removed previous SSE code due to the need for unsigned 16-bit pack, which I'm too lazy to work around the lack of in SSE2.
546
// pshufb or SSE4 instructions can be used instead.
547
#if PPSSPP_ARCH(ARM_NEON) && 0
548
// Untested
549
uint32x4_t value = vld1q_u32(&mips->fi[inst->src1]);
550
value = vshlq_n_u32(value, 1);
551
uint32x2_t halved = vshrn_n_u32(value, 8);
552
uint32x2_t halvedAgain = vshrn_n_u32(vcombine_u32(halved, vdup_n_u32(0)), 8);
553
mips->fi[inst->dest] = vget_lane_u32(halvedAgain, 0);
554
#else
555
u32 val = (mips->fi[inst->src1] >> 23) & 0xFF;
556
val |= (mips->fi[inst->src1 + 1] >> 15) & 0xFF00;
557
val |= (mips->fi[inst->src1 + 2] >> 7) & 0xFF0000;
558
val |= (mips->fi[inst->src1 + 3] << 1) & 0xFF000000;
559
mips->fi[inst->dest] = val;
560
#endif
561
break;
562
}
563
564
case IROp::Vec2ClampToZero:
565
{
566
const u32 temp0 = mips->fi[inst->src1];
567
const u32 temp1 = mips->fi[inst->src1 + 1];
568
mips->fi[inst->dest] = (int)temp0 >= 0 ? temp0 : 0;
569
mips->fi[inst->dest + 1] = (int)temp1 >= 0 ? temp1 : 0;
570
break;
571
}
572
573
case IROp::Vec4ClampToZero:
574
{
575
#if defined(_M_SSE)
576
// Trickery: Expand the sign bit, and use andnot to zero negative values.
577
__m128i val = _mm_load_si128((const __m128i *)&mips->fi[inst->src1]);
578
__m128i mask = _mm_srai_epi32(val, 31);
579
val = _mm_andnot_si128(mask, val);
580
_mm_store_si128((__m128i *)&mips->fi[inst->dest], val);
581
#else
582
const int src1 = inst->src1;
583
const int dest = inst->dest;
584
for (int i = 0; i < 4; i++) {
585
u32 val = mips->fi[src1 + i];
586
mips->fi[dest + i] = (int)val >= 0 ? val : 0;
587
}
588
#endif
589
break;
590
}
591
592
case IROp::Vec4DuplicateUpperBitsAndShift1: // For vuc2i, the weird one.
593
{
594
const int src1 = inst->src1;
595
const int dest = inst->dest;
596
u32 temp[4];
597
for (int i = 0; i < 4; i++) {
598
u32 val = mips->fi[src1 + i];
599
val = val | (val >> 8);
600
val = val | (val >> 16);
601
temp[i] = val >> 1;
602
}
603
for (int i = 0; i < 4; i++) {
604
mips->fi[dest + i] = temp[i];
605
}
606
break;
607
}
608
609
case IROp::FCmpVfpuBit:
610
{
611
const int op = inst->dest & 0xF;
612
const int bit = inst->dest >> 4;
613
int result = 0;
614
switch (op) {
615
case VC_EQ: result = mips->f[inst->src1] == mips->f[inst->src2]; break;
616
case VC_NE: result = mips->f[inst->src1] != mips->f[inst->src2]; break;
617
case VC_LT: result = mips->f[inst->src1] < mips->f[inst->src2]; break;
618
case VC_LE: result = mips->f[inst->src1] <= mips->f[inst->src2]; break;
619
case VC_GT: result = mips->f[inst->src1] > mips->f[inst->src2]; break;
620
case VC_GE: result = mips->f[inst->src1] >= mips->f[inst->src2]; break;
621
case VC_EZ: result = mips->f[inst->src1] == 0.0f; break;
622
case VC_NZ: result = mips->f[inst->src1] != 0.0f; break;
623
case VC_EN: result = my_isnan(mips->f[inst->src1]); break;
624
case VC_NN: result = !my_isnan(mips->f[inst->src1]); break;
625
case VC_EI: result = my_isinf(mips->f[inst->src1]); break;
626
case VC_NI: result = !my_isinf(mips->f[inst->src1]); break;
627
case VC_ES: result = my_isnanorinf(mips->f[inst->src1]); break;
628
case VC_NS: result = !my_isnanorinf(mips->f[inst->src1]); break;
629
case VC_TR: result = 1; break;
630
case VC_FL: result = 0; break;
631
default:
632
result = 0;
633
}
634
if (result != 0) {
635
mips->vfpuCtrl[VFPU_CTRL_CC] |= (1 << bit);
636
} else {
637
mips->vfpuCtrl[VFPU_CTRL_CC] &= ~(1 << bit);
638
}
639
break;
640
}
641
642
case IROp::FCmpVfpuAggregate:
643
{
644
const u32 mask = inst->dest;
645
const u32 cc = mips->vfpuCtrl[VFPU_CTRL_CC];
646
int anyBit = (cc & mask) ? 0x10 : 0x00;
647
int allBit = (cc & mask) == mask ? 0x20 : 0x00;
648
mips->vfpuCtrl[VFPU_CTRL_CC] = (cc & ~0x30) | anyBit | allBit;
649
break;
650
}
651
652
case IROp::FCmovVfpuCC:
653
if (((mips->vfpuCtrl[VFPU_CTRL_CC] >> (inst->src2 & 0xf)) & 1) == ((u32)inst->src2 >> 7)) {
654
mips->f[inst->dest] = mips->f[inst->src1];
655
}
656
break;
657
658
case IROp::Vec4Dot:
659
{
660
// Not quickly implementable on all platforms, unfortunately.
661
// Though, this is still pretty fast compared to one split into multiple IR instructions.
662
// This might be good though: https://gist.github.com/rikusalminen/3040241
663
float dot = mips->f[inst->src1] * mips->f[inst->src2];
664
for (int i = 1; i < 4; i++)
665
dot += mips->f[inst->src1 + i] * mips->f[inst->src2 + i];
666
mips->f[inst->dest] = dot;
667
break;
668
}
669
670
case IROp::FSin:
671
mips->f[inst->dest] = vfpu_sin(mips->f[inst->src1]);
672
break;
673
case IROp::FCos:
674
mips->f[inst->dest] = vfpu_cos(mips->f[inst->src1]);
675
break;
676
case IROp::FRSqrt:
677
mips->f[inst->dest] = 1.0f / sqrtf(mips->f[inst->src1]);
678
break;
679
case IROp::FRecip:
680
mips->f[inst->dest] = 1.0f / mips->f[inst->src1];
681
break;
682
case IROp::FAsin:
683
mips->f[inst->dest] = vfpu_asin(mips->f[inst->src1]);
684
break;
685
686
case IROp::ShlImm:
687
mips->r[inst->dest] = mips->r[inst->src1] << (int)inst->src2;
688
break;
689
case IROp::ShrImm:
690
mips->r[inst->dest] = mips->r[inst->src1] >> (int)inst->src2;
691
break;
692
case IROp::SarImm:
693
mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (int)inst->src2;
694
break;
695
case IROp::RorImm:
696
{
697
u32 x = mips->r[inst->src1];
698
int sa = inst->src2;
699
mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
700
}
701
break;
702
703
case IROp::Shl:
704
mips->r[inst->dest] = mips->r[inst->src1] << (mips->r[inst->src2] & 31);
705
break;
706
case IROp::Shr:
707
mips->r[inst->dest] = mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
708
break;
709
case IROp::Sar:
710
mips->r[inst->dest] = (s32)mips->r[inst->src1] >> (mips->r[inst->src2] & 31);
711
break;
712
case IROp::Ror:
713
{
714
u32 x = mips->r[inst->src1];
715
int sa = mips->r[inst->src2] & 31;
716
mips->r[inst->dest] = (x >> sa) | (x << (32 - sa));
717
break;
718
}
719
720
case IROp::Clz:
721
{
722
mips->r[inst->dest] = clz32(mips->r[inst->src1]);
723
break;
724
}
725
726
case IROp::Slt:
727
mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2];
728
break;
729
730
case IROp::SltU:
731
mips->r[inst->dest] = mips->r[inst->src1] < mips->r[inst->src2];
732
break;
733
734
case IROp::SltConst:
735
mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)inst->constant;
736
break;
737
738
case IROp::SltUConst:
739
mips->r[inst->dest] = mips->r[inst->src1] < inst->constant;
740
break;
741
742
case IROp::MovZ:
743
if (mips->r[inst->src1] == 0)
744
mips->r[inst->dest] = mips->r[inst->src2];
745
break;
746
case IROp::MovNZ:
747
if (mips->r[inst->src1] != 0)
748
mips->r[inst->dest] = mips->r[inst->src2];
749
break;
750
751
case IROp::Max:
752
mips->r[inst->dest] = (s32)mips->r[inst->src1] > (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
753
break;
754
case IROp::Min:
755
mips->r[inst->dest] = (s32)mips->r[inst->src1] < (s32)mips->r[inst->src2] ? mips->r[inst->src1] : mips->r[inst->src2];
756
break;
757
758
case IROp::MtLo:
759
mips->lo = mips->r[inst->src1];
760
break;
761
case IROp::MtHi:
762
mips->hi = mips->r[inst->src1];
763
break;
764
case IROp::MfLo:
765
mips->r[inst->dest] = mips->lo;
766
break;
767
case IROp::MfHi:
768
mips->r[inst->dest] = mips->hi;
769
break;
770
771
case IROp::Mult:
772
{
773
s64 result = (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
774
memcpy(&mips->lo, &result, 8);
775
break;
776
}
777
case IROp::MultU:
778
{
779
u64 result = (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
780
memcpy(&mips->lo, &result, 8);
781
break;
782
}
783
case IROp::Madd:
784
{
785
s64 result;
786
memcpy(&result, &mips->lo, 8);
787
result += (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
788
memcpy(&mips->lo, &result, 8);
789
break;
790
}
791
case IROp::MaddU:
792
{
793
s64 result;
794
memcpy(&result, &mips->lo, 8);
795
result += (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
796
memcpy(&mips->lo, &result, 8);
797
break;
798
}
799
case IROp::Msub:
800
{
801
s64 result;
802
memcpy(&result, &mips->lo, 8);
803
result -= (s64)(s32)mips->r[inst->src1] * (s64)(s32)mips->r[inst->src2];
804
memcpy(&mips->lo, &result, 8);
805
break;
806
}
807
case IROp::MsubU:
808
{
809
s64 result;
810
memcpy(&result, &mips->lo, 8);
811
result -= (u64)mips->r[inst->src1] * (u64)mips->r[inst->src2];
812
memcpy(&mips->lo, &result, 8);
813
break;
814
}
815
816
case IROp::Div:
817
{
818
s32 numerator = (s32)mips->r[inst->src1];
819
s32 denominator = (s32)mips->r[inst->src2];
820
if (numerator == (s32)0x80000000 && denominator == -1) {
821
mips->lo = 0x80000000;
822
mips->hi = -1;
823
} else if (denominator != 0) {
824
mips->lo = (u32)(numerator / denominator);
825
mips->hi = (u32)(numerator % denominator);
826
} else {
827
mips->lo = numerator < 0 ? 1 : -1;
828
mips->hi = numerator;
829
}
830
break;
831
}
832
case IROp::DivU:
833
{
834
u32 numerator = mips->r[inst->src1];
835
u32 denominator = mips->r[inst->src2];
836
if (denominator != 0) {
837
mips->lo = numerator / denominator;
838
mips->hi = numerator % denominator;
839
} else {
840
mips->lo = numerator <= 0xFFFF ? 0xFFFF : -1;
841
mips->hi = numerator;
842
}
843
break;
844
}
845
846
case IROp::BSwap16:
847
{
848
u32 x = mips->r[inst->src1];
849
// Don't think we can beat this with intrinsics.
850
mips->r[inst->dest] = ((x & 0xFF00FF00) >> 8) | ((x & 0x00FF00FF) << 8);
851
break;
852
}
853
case IROp::BSwap32:
854
{
855
mips->r[inst->dest] = swap32(mips->r[inst->src1]);
856
break;
857
}
858
859
case IROp::FAdd:
860
mips->f[inst->dest] = mips->f[inst->src1] + mips->f[inst->src2];
861
break;
862
case IROp::FSub:
863
mips->f[inst->dest] = mips->f[inst->src1] - mips->f[inst->src2];
864
break;
865
case IROp::FMul:
866
#if 1
867
{
868
float a = mips->f[inst->src1];
869
float b = mips->f[inst->src2];
870
if ((b == 0.0f && my_isinf(a)) || (a == 0.0f && my_isinf(b))) {
871
mips->fi[inst->dest] = 0x7fc00000;
872
} else {
873
mips->f[inst->dest] = a * b;
874
}
875
}
876
break;
877
#else
878
// Not sure if faster since it needs to load the operands twice? But the code is simpler.
879
{
880
// Takes care of negative zero by masking away the top bit, which also makes the inf check shorter.
881
u32 a = mips->fi[inst->src1] & 0x7FFFFFFF;
882
u32 b = mips->fi[inst->src2] & 0x7FFFFFFF;
883
if ((a == 0 && b == 0x7F800000) || (b == 0 && a == 0x7F800000)) {
884
mips->fi[inst->dest] = 0x7fc00000;
885
} else {
886
mips->f[inst->dest] = mips->f[inst->src1] * mips->f[inst->src2];
887
}
888
break;
889
}
890
#endif
891
case IROp::FDiv:
892
mips->f[inst->dest] = mips->f[inst->src1] / mips->f[inst->src2];
893
break;
894
case IROp::FMin:
895
if (my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2])) {
896
// See interpreter for this logic: this is for vmin, we're comparing mantissa+exp.
897
if (mips->fs[inst->src1] < 0 && mips->fs[inst->src2] < 0) {
898
mips->fs[inst->dest] = std::max(mips->fs[inst->src1], mips->fs[inst->src2]);
899
} else {
900
mips->fs[inst->dest] = std::min(mips->fs[inst->src1], mips->fs[inst->src2]);
901
}
902
} else {
903
mips->f[inst->dest] = std::min(mips->f[inst->src1], mips->f[inst->src2]);
904
}
905
break;
906
case IROp::FMax:
907
if (my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2])) {
908
// See interpreter for this logic: this is for vmax, we're comparing mantissa+exp.
909
if (mips->fs[inst->src1] < 0 && mips->fs[inst->src2] < 0) {
910
mips->fs[inst->dest] = std::min(mips->fs[inst->src1], mips->fs[inst->src2]);
911
} else {
912
mips->fs[inst->dest] = std::max(mips->fs[inst->src1], mips->fs[inst->src2]);
913
}
914
} else {
915
mips->f[inst->dest] = std::max(mips->f[inst->src1], mips->f[inst->src2]);
916
}
917
break;
918
919
case IROp::FMov:
920
mips->f[inst->dest] = mips->f[inst->src1];
921
break;
922
case IROp::FAbs:
923
mips->f[inst->dest] = fabsf(mips->f[inst->src1]);
924
break;
925
case IROp::FSqrt:
926
mips->f[inst->dest] = sqrtf(mips->f[inst->src1]);
927
break;
928
case IROp::FNeg:
929
mips->f[inst->dest] = -mips->f[inst->src1];
930
break;
931
case IROp::FSat0_1:
932
// We have to do this carefully to handle NAN and -0.0f.
933
mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], 0.0f, 1.0f);
934
break;
935
case IROp::FSatMinus1_1:
936
mips->f[inst->dest] = vfpu_clamp(mips->f[inst->src1], -1.0f, 1.0f);
937
break;
938
939
case IROp::FSign:
940
{
941
// Bitwise trickery
942
u32 val;
943
memcpy(&val, &mips->f[inst->src1], sizeof(u32));
944
if (val == 0 || val == 0x80000000)
945
mips->f[inst->dest] = 0.0f;
946
else if ((val >> 31) == 0)
947
mips->f[inst->dest] = 1.0f;
948
else
949
mips->f[inst->dest] = -1.0f;
950
break;
951
}
952
953
case IROp::FpCondFromReg:
954
mips->fpcond = mips->r[inst->dest];
955
break;
956
case IROp::FpCondToReg:
957
mips->r[inst->dest] = mips->fpcond;
958
break;
959
case IROp::FpCtrlFromReg:
960
mips->fcr31 = mips->r[inst->src1] & 0x0181FFFF;
961
// Extract the new fpcond value.
962
// TODO: Is it really helping us to keep it separate?
963
mips->fpcond = (mips->fcr31 >> 23) & 1;
964
break;
965
case IROp::FpCtrlToReg:
966
// Update the fpcond bit first.
967
mips->fcr31 = (mips->fcr31 & ~(1 << 23)) | ((mips->fpcond & 1) << 23);
968
mips->r[inst->dest] = mips->fcr31;
969
break;
970
case IROp::VfpuCtrlToReg:
971
mips->r[inst->dest] = mips->vfpuCtrl[inst->src1];
972
break;
973
case IROp::FRound:
974
{
975
float value = mips->f[inst->src1];
976
if (my_isnanorinf(value)) {
977
mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
978
break;
979
} else {
980
mips->fs[inst->dest] = (int)round_ieee_754(value);
981
}
982
break;
983
}
984
case IROp::FTrunc:
985
{
986
float value = mips->f[inst->src1];
987
if (my_isnanorinf(value)) {
988
mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
989
break;
990
} else {
991
if (value >= 0.0f) {
992
mips->fs[inst->dest] = (int)floorf(value);
993
// Overflow, but it was positive.
994
if (mips->fs[inst->dest] == -2147483648LL) {
995
mips->fs[inst->dest] = 2147483647LL;
996
}
997
} else {
998
// Overflow happens to be the right value anyway.
999
mips->fs[inst->dest] = (int)ceilf(value);
1000
}
1001
break;
1002
}
1003
}
1004
case IROp::FCeil:
1005
{
1006
float value = mips->f[inst->src1];
1007
if (my_isnanorinf(value)) {
1008
mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
1009
break;
1010
} else {
1011
mips->fs[inst->dest] = (int)ceilf(value);
1012
}
1013
break;
1014
}
1015
case IROp::FFloor:
1016
{
1017
float value = mips->f[inst->src1];
1018
if (my_isnanorinf(value)) {
1019
mips->fi[inst->dest] = my_isinf(value) && value < 0.0f ? -2147483648LL : 2147483647LL;
1020
break;
1021
} else {
1022
mips->fs[inst->dest] = (int)floorf(value);
1023
}
1024
break;
1025
}
1026
case IROp::FCmp:
1027
switch (inst->dest) {
1028
case IRFpCompareMode::False:
1029
mips->fpcond = 0;
1030
break;
1031
case IRFpCompareMode::EitherUnordered:
1032
{
1033
float a = mips->f[inst->src1];
1034
float b = mips->f[inst->src2];
1035
mips->fpcond = !(a > b || a < b || a == b);
1036
break;
1037
}
1038
case IRFpCompareMode::EqualOrdered:
1039
mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2];
1040
break;
1041
case IRFpCompareMode::EqualUnordered:
1042
mips->fpcond = mips->f[inst->src1] == mips->f[inst->src2] || my_isnan(mips->f[inst->src1]) || my_isnan(mips->f[inst->src2]);
1043
break;
1044
case IRFpCompareMode::LessEqualOrdered:
1045
mips->fpcond = mips->f[inst->src1] <= mips->f[inst->src2];
1046
break;
1047
case IRFpCompareMode::LessEqualUnordered:
1048
mips->fpcond = !(mips->f[inst->src1] > mips->f[inst->src2]);
1049
break;
1050
case IRFpCompareMode::LessOrdered:
1051
mips->fpcond = mips->f[inst->src1] < mips->f[inst->src2];
1052
break;
1053
case IRFpCompareMode::LessUnordered:
1054
mips->fpcond = !(mips->f[inst->src1] >= mips->f[inst->src2]);
1055
break;
1056
}
1057
break;
1058
1059
case IROp::FCvtSW:
1060
mips->f[inst->dest] = (float)mips->fs[inst->src1];
1061
break;
1062
case IROp::FCvtWS:
1063
{
1064
float src = mips->f[inst->src1];
1065
if (my_isnanorinf(src)) {
1066
mips->fs[inst->dest] = my_isinf(src) && src < 0.0f ? -2147483648LL : 2147483647LL;
1067
break;
1068
}
1069
// TODO: Inline assembly to use here would be better.
1070
switch (IRRoundMode(mips->fcr31 & 3)) {
1071
case IRRoundMode::RINT_0: mips->fs[inst->dest] = (int)round_ieee_754(src); break;
1072
case IRRoundMode::CAST_1: mips->fs[inst->dest] = (int)src; break;
1073
case IRRoundMode::CEIL_2: mips->fs[inst->dest] = (int)ceilf(src); break;
1074
case IRRoundMode::FLOOR_3: mips->fs[inst->dest] = (int)floorf(src); break;
1075
}
1076
break; //cvt.w.s
1077
}
1078
case IROp::FCvtScaledSW:
1079
mips->f[inst->dest] = (float)mips->fs[inst->src1] * (1.0f / (1UL << (inst->src2 & 0x1F)));
1080
break;
1081
case IROp::FCvtScaledWS:
1082
{
1083
float src = mips->f[inst->src1];
1084
if (my_isnan(src)) {
1085
// TODO: True for negatives too?
1086
mips->fs[inst->dest] = 2147483647L;
1087
break;
1088
}
1089
1090
float mult = (float)(1UL << (inst->src2 & 0x1F));
1091
double sv = src * mult; // (float)0x7fffffff == (float)0x80000000
1092
// Cap/floor it to 0x7fffffff / 0x80000000
1093
if (sv > (double)0x7fffffff) {
1094
mips->fs[inst->dest] = 0x7fffffff;
1095
} else if (sv <= (double)(int)0x80000000) {
1096
mips->fs[inst->dest] = 0x80000000;
1097
} else {
1098
switch (IRRoundMode(inst->src2 >> 6)) {
1099
case IRRoundMode::RINT_0: mips->fs[inst->dest] = (int)round_ieee_754(sv); break;
1100
case IRRoundMode::CAST_1: mips->fs[inst->dest] = src >= 0 ? (int)floor(sv) : (int)ceil(sv); break;
1101
case IRRoundMode::CEIL_2: mips->fs[inst->dest] = (int)ceil(sv); break;
1102
case IRRoundMode::FLOOR_3: mips->fs[inst->dest] = (int)floor(sv); break;
1103
}
1104
}
1105
break;
1106
}
1107
1108
case IROp::FMovFromGPR:
1109
memcpy(&mips->f[inst->dest], &mips->r[inst->src1], 4);
1110
break;
1111
case IROp::OptFCvtSWFromGPR:
1112
mips->f[inst->dest] = (float)(int)mips->r[inst->src1];
1113
break;
1114
case IROp::FMovToGPR:
1115
memcpy(&mips->r[inst->dest], &mips->f[inst->src1], 4);
1116
break;
1117
case IROp::OptFMovToGPRShr8:
1118
{
1119
u32 temp;
1120
memcpy(&temp, &mips->f[inst->src1], 4);
1121
mips->r[inst->dest] = temp >> 8;
1122
break;
1123
}
1124
1125
case IROp::ExitToConst:
1126
return inst->constant;
1127
1128
case IROp::ExitToReg:
1129
return mips->r[inst->src1];
1130
1131
case IROp::ExitToConstIfEq:
1132
if (mips->r[inst->src1] == mips->r[inst->src2])
1133
return inst->constant;
1134
break;
1135
case IROp::ExitToConstIfNeq:
1136
if (mips->r[inst->src1] != mips->r[inst->src2])
1137
return inst->constant;
1138
break;
1139
case IROp::ExitToConstIfGtZ:
1140
if ((s32)mips->r[inst->src1] > 0)
1141
return inst->constant;
1142
break;
1143
case IROp::ExitToConstIfGeZ:
1144
if ((s32)mips->r[inst->src1] >= 0)
1145
return inst->constant;
1146
break;
1147
case IROp::ExitToConstIfLtZ:
1148
if ((s32)mips->r[inst->src1] < 0)
1149
return inst->constant;
1150
break;
1151
case IROp::ExitToConstIfLeZ:
1152
if ((s32)mips->r[inst->src1] <= 0)
1153
return inst->constant;
1154
break;
1155
1156
case IROp::Downcount:
1157
mips->downcount -= (int)inst->constant;
1158
break;
1159
1160
case IROp::SetPC:
1161
mips->pc = mips->r[inst->src1];
1162
break;
1163
1164
case IROp::SetPCConst:
1165
mips->pc = inst->constant;
1166
break;
1167
1168
case IROp::Syscall:
1169
// IROp::SetPC was (hopefully) executed before.
1170
{
1171
MIPSOpcode op(inst->constant);
1172
CallSyscall(op);
1173
if (coreState != CORE_RUNNING_CPU)
1174
CoreTiming::ForceCheck();
1175
break;
1176
}
1177
1178
case IROp::ExitToPC:
1179
return mips->pc;
1180
1181
case IROp::Interpret: // SLOW fallback. Can be made faster. Ideally should be removed but may be useful for debugging.
1182
{
1183
MIPSOpcode op(inst->constant);
1184
MIPSInterpret(op);
1185
break;
1186
}
1187
1188
case IROp::CallReplacement:
1189
{
1190
int funcIndex = inst->constant;
1191
const ReplacementTableEntry *f = GetReplacementFunc(funcIndex);
1192
int cycles = f->replaceFunc();
1193
mips->r[inst->dest] = cycles < 0 ? -1 : 0;
1194
mips->downcount -= cycles < 0 ? -cycles : cycles;
1195
break;
1196
}
1197
1198
case IROp::SetCtrlVFPU:
1199
mips->vfpuCtrl[inst->dest] = inst->constant;
1200
break;
1201
1202
case IROp::SetCtrlVFPUReg:
1203
mips->vfpuCtrl[inst->dest] = mips->r[inst->src1];
1204
break;
1205
1206
case IROp::SetCtrlVFPUFReg:
1207
memcpy(&mips->vfpuCtrl[inst->dest], &mips->f[inst->src1], 4);
1208
break;
1209
1210
case IROp::ApplyRoundingMode:
1211
IRApplyRounding(mips);
1212
break;
1213
case IROp::RestoreRoundingMode:
1214
IRRestoreRounding();
1215
break;
1216
case IROp::UpdateRoundingMode:
1217
// TODO: Implement
1218
break;
1219
1220
case IROp::Break:
1221
Core_BreakException(mips->pc);
1222
return mips->pc + 4;
1223
1224
case IROp::Breakpoint:
1225
if (IRRunBreakpoint(inst->constant)) {
1226
CoreTiming::ForceCheck();
1227
return mips->pc;
1228
}
1229
break;
1230
1231
case IROp::MemoryCheck:
1232
if (IRRunMemCheck(mips->pc + inst->dest, mips->r[inst->src1] + inst->constant)) {
1233
CoreTiming::ForceCheck();
1234
return mips->pc;
1235
}
1236
break;
1237
1238
case IROp::ValidateAddress8:
1239
if (RunValidateAddress<1>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1240
CoreTiming::ForceCheck();
1241
return mips->pc;
1242
}
1243
break;
1244
case IROp::ValidateAddress16:
1245
if (RunValidateAddress<2>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1246
CoreTiming::ForceCheck();
1247
return mips->pc;
1248
}
1249
break;
1250
case IROp::ValidateAddress32:
1251
if (RunValidateAddress<4>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1252
CoreTiming::ForceCheck();
1253
return mips->pc;
1254
}
1255
break;
1256
case IROp::ValidateAddress128:
1257
if (RunValidateAddress<16>(mips->pc, mips->r[inst->src1] + inst->constant, inst->src2)) {
1258
CoreTiming::ForceCheck();
1259
return mips->pc;
1260
}
1261
break;
1262
case IROp::LogIRBlock:
1263
if (mipsTracer.tracing_enabled) {
1264
mipsTracer.executed_blocks.push_back(inst->constant);
1265
}
1266
break;
1267
1268
case IROp::Nop: // TODO: This shouldn't crash, but for now we should not emit nops, so...
1269
case IROp::Bad:
1270
default:
1271
// Unimplemented IR op. Bad. We define it as unreachable so the compiler can optimize better (remove the range check).
1272
UNREACHABLE();
1273
break;
1274
}
1275
1276
#ifdef _DEBUG
1277
if (mips->r[0] != 0)
1278
Crash();
1279
#endif
1280
inst++;
1281
}
1282
1283
// We should not reach here anymore.
1284
return 0;
1285
}
1286
1287