CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/ARM64/Arm64CompLoadStore.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#if PPSSPP_ARCH(ARM64)
20
21
#include "Core/MemMap.h"
22
#include "Core/Config.h"
23
#include "Core/MIPS/MIPS.h"
24
#include "Core/MIPS/MIPSAnalyst.h"
25
#include "Core/MIPS/MIPSCodeUtils.h"
26
#include "Core/MIPS/ARM64/Arm64Jit.h"
27
#include "Core/MIPS/ARM64/Arm64RegCache.h"
28
29
#define _RS MIPS_GET_RS(op)
30
#define _RT MIPS_GET_RT(op)
31
#define _RD MIPS_GET_RD(op)
32
#define _FS MIPS_GET_FS(op)
33
#define _FT MIPS_GET_FT(op)
34
#define _FD MIPS_GET_FD(op)
35
#define _SA MIPS_GET_SA(op)
36
#define _POS ((op>> 6) & 0x1F)
37
#define _SIZE ((op>>11) & 0x1F)
38
#define _IMM16 (signed short)(op & 0xFFFF)
39
#define _IMM26 (op & 0x03FFFFFF)
40
41
// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.
42
// Currently known non working ones should have DISABLE.
43
44
//#define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; }
45
#define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }
46
#define DISABLE { Comp_Generic(op); return; }
47
48
namespace MIPSComp {
49
using namespace Arm64Gen;
50
using namespace Arm64JitConstants;
51
52
// Destroys SCRATCH2
53
void Arm64Jit::SetScratch1ToEffectiveAddress(MIPSGPReg rs, s16 offset) {
54
if (offset) {
55
ADDI2R(SCRATCH1, gpr.R(rs), offset, SCRATCH2);
56
} else {
57
MOV(SCRATCH1, gpr.R(rs));
58
}
59
#ifdef MASKED_PSP_MEMORY
60
ANDI2R(SCRATCH1, SCRATCH1, 0x3FFFFFFF);
61
#endif
62
}
63
64
std::vector<FixupBranch> Arm64Jit::SetScratch1ForSafeAddress(MIPSGPReg rs, s16 offset, ARM64Reg tempReg) {
65
std::vector<FixupBranch> skips;
66
67
SetScratch1ToEffectiveAddress(rs, offset);
68
69
// We can do this a little smarter by shifting out the lower 8 bits, since blocks are 0x100 aligned.
70
// PSP_GetUserMemoryEnd() is dynamic, but the others encode to imms just fine.
71
// So we only need to safety check the one value.
72
// This is because ARM64 immediates for many instructions like CMP can only encode
73
// immediates up to 12 bits, shifted by 12 or not.
74
75
if ((PSP_GetUserMemoryEnd() & 0x000FFFFF) == 0) {
76
// In other words, shift right 8, and kill off the top 4 bits as we don't want them involved in the ocmpares.
77
UBFX(tempReg, SCRATCH1, 8, 24 - 4);
78
// Now check if we're higher than that.
79
CMPI2R(tempReg, PSP_GetUserMemoryEnd() >> 8);
80
} else {
81
// Compare first using the tempReg (need it because we have a full 28-bit value), then shift into it.
82
ANDI2R(SCRATCH1, SCRATCH1, 0x0FFFFFFF);
83
CMPI2R(SCRATCH1, PSP_GetUserMemoryEnd(), tempReg);
84
UBFX(tempReg, SCRATCH1, 8, 24);
85
}
86
skips.push_back(B(CC_HS));
87
88
// If its higher than memory start and we didn't skip yet, it must be good. Hurray.
89
CMPI2R(tempReg, PSP_GetKernelMemoryBase() >> 8);
90
FixupBranch inRAM = B(CC_HS);
91
92
// If we got here and it's higher, then it's between VRAM and RAM - skip.
93
CMPI2R(tempReg, PSP_GetVidMemEnd() >> 8);
94
skips.push_back(B(CC_HS));
95
96
// And if it's higher the VRAM and we're still here again, it's in VRAM.
97
CMPI2R(tempReg, PSP_GetVidMemBase() >> 8);
98
FixupBranch inVRAM = B(CC_HS);
99
100
// Last gap, this is between SRAM and VRAM. Skip it.
101
CMPI2R(tempReg, PSP_GetScratchpadMemoryEnd() >> 8);
102
skips.push_back(B(CC_HS));
103
104
// And for lower than SRAM, we just skip again.
105
CMPI2R(tempReg, PSP_GetScratchpadMemoryBase() >> 8);
106
skips.push_back(B(CC_LO));
107
108
// At this point, we're either in SRAM (above) or in RAM/VRAM.
109
SetJumpTarget(inRAM);
110
SetJumpTarget(inVRAM);
111
112
return skips;
113
}
114
115
void Arm64Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) {
116
CONDITIONAL_DISABLE(LSU);
117
CheckMemoryBreakpoint();
118
int offset = SignExtend16ToS32(op & 0xFFFF);
119
MIPSGPReg rt = _RT;
120
MIPSGPReg rs = _RS;
121
int o = op >> 26;
122
123
if (!js.inDelaySlot && !jo.Disabled(JitDisable::LSU_UNALIGNED)) {
124
// Optimisation: Combine to single unaligned load/store
125
bool isLeft = (o == 34 || o == 42);
126
CheckMemoryBreakpoint(1);
127
MIPSOpcode nextOp = GetOffsetInstruction(1);
128
// Find a matching shift in opposite direction with opposite offset.
129
if (nextOp == (isLeft ? (op.encoding + (4 << 26) - 3) : (op.encoding - (4 << 26) + 3))) {
130
EatInstruction(nextOp);
131
nextOp = MIPSOpcode(((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x03FFFFFF)); //lw, sw
132
Comp_ITypeMem(nextOp);
133
return;
134
}
135
}
136
137
u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
138
std::vector<FixupBranch> skips;
139
140
if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {
141
#ifdef MASKED_PSP_MEMORY
142
u32 addr = iaddr & 0x3FFFFFFF;
143
#else
144
u32 addr = iaddr;
145
#endif
146
// Need to initialize since this only loads part of the register.
147
// But rs no longer matters (even if rs == rt) since we have the address.
148
gpr.MapReg(rt, load ? MAP_DIRTY : 0);
149
gpr.SetRegImm(SCRATCH1, addr & ~3);
150
151
u8 shift = (addr & 3) * 8;
152
153
switch (o) {
154
case 34: // lwl
155
LDR(SCRATCH1, MEMBASEREG, SCRATCH1);
156
ANDI2R(gpr.R(rt), gpr.R(rt), 0x00ffffff >> shift, INVALID_REG);
157
ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSL, 24 - shift));
158
break;
159
160
case 38: // lwr
161
LDR(SCRATCH1, MEMBASEREG, SCRATCH1);
162
ANDI2R(gpr.R(rt), gpr.R(rt), 0xffffff00 << (24 - shift), INVALID_REG);
163
ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSR, shift));
164
break;
165
166
case 42: // swl
167
LDR(SCRATCH2, MEMBASEREG, SCRATCH1);
168
ANDI2R(SCRATCH2, SCRATCH2, 0xffffff00 << shift, INVALID_REG);
169
ORR(SCRATCH2, SCRATCH2, gpr.R(rt), ArithOption(gpr.R(rt), ST_LSR, 24 - shift));
170
STR(SCRATCH2, MEMBASEREG, SCRATCH1);
171
break;
172
173
case 46: // swr
174
LDR(SCRATCH2, MEMBASEREG, SCRATCH1);
175
ANDI2R(SCRATCH2, SCRATCH2, 0x00ffffff >> (24 - shift), INVALID_REG);
176
ORR(SCRATCH2, SCRATCH2, gpr.R(rt), ArithOption(gpr.R(rt), ST_LSL, shift));
177
STR(SCRATCH2, MEMBASEREG, SCRATCH1);
178
break;
179
}
180
return;
181
}
182
183
_dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address %08x? CPU bug?", iaddr);
184
if (load) {
185
gpr.MapDirtyIn(rt, rs, false);
186
} else {
187
gpr.MapInIn(rt, rs);
188
}
189
gpr.SpillLock(rt);
190
gpr.SpillLock(rs);
191
// Need to get temps before skipping safe mem.
192
ARM64Reg LR_SCRATCH3 = gpr.GetAndLockTempR();
193
ARM64Reg LR_SCRATCH4 = o == 42 || o == 46 ? gpr.GetAndLockTempR() : INVALID_REG;
194
195
if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {
196
skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
197
} else {
198
SetScratch1ToEffectiveAddress(rs, offset);
199
}
200
201
// Here's our shift amount.
202
ANDI2R(SCRATCH2, SCRATCH1, 3);
203
LSL(SCRATCH2, SCRATCH2, 3);
204
205
// Now align the address for the actual read.
206
ANDI2R(SCRATCH1, SCRATCH1, ~3U);
207
208
switch (o) {
209
case 34: // lwl
210
MOVI2R(LR_SCRATCH3, 0x00ffffff);
211
LDR(SCRATCH1, MEMBASEREG, ArithOption(SCRATCH1));
212
LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
213
AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3);
214
NEG(SCRATCH2, SCRATCH2);
215
ADDI2R(SCRATCH2, SCRATCH2, 24);
216
LSLV(SCRATCH1, SCRATCH1, SCRATCH2);
217
ORR(gpr.R(rt), gpr.R(rt), SCRATCH1);
218
break;
219
220
case 38: // lwr
221
MOVI2R(LR_SCRATCH3, 0xffffff00);
222
LDR(SCRATCH1, MEMBASEREG, ArithOption(SCRATCH1));
223
LSRV(SCRATCH1, SCRATCH1, SCRATCH2);
224
NEG(SCRATCH2, SCRATCH2);
225
ADDI2R(SCRATCH2, SCRATCH2, 24);
226
LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
227
AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3);
228
ORR(gpr.R(rt), gpr.R(rt), SCRATCH1);
229
break;
230
231
case 42: // swl
232
MOVI2R(LR_SCRATCH3, 0xffffff00);
233
LDR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));
234
LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
235
AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
236
NEG(SCRATCH2, SCRATCH2);
237
ADDI2R(SCRATCH2, SCRATCH2, 24);
238
239
LSRV(LR_SCRATCH3, gpr.R(rt), SCRATCH2);
240
ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
241
STR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));
242
break;
243
244
case 46: // swr
245
MOVI2R(LR_SCRATCH3, 0x00ffffff);
246
LDR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));
247
NEG(SCRATCH2, SCRATCH2);
248
ADDI2R(SCRATCH2, SCRATCH2, 24);
249
LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);
250
AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
251
NEG(SCRATCH2, SCRATCH2);
252
ADDI2R(SCRATCH2, SCRATCH2, 24);
253
LSLV(LR_SCRATCH3, gpr.R(rt), SCRATCH2);
254
ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);
255
STR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));
256
break;
257
}
258
259
for (auto skip : skips) {
260
SetJumpTarget(skip);
261
}
262
263
gpr.ReleaseSpillLocksAndDiscardTemps();
264
}
265
266
void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) {
267
CONDITIONAL_DISABLE(LSU);
268
CheckMemoryBreakpoint();
269
270
int offset = SignExtend16ToS32(op & 0xFFFF);
271
bool load = false;
272
MIPSGPReg rt = _RT;
273
MIPSGPReg rs = _RS;
274
int o = op >> 26;
275
if (((op >> 29) & 1) == 0 && rt == MIPS_REG_ZERO) {
276
// Don't load anything into $zr
277
return;
278
}
279
280
u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;
281
std::vector<FixupBranch> skips;
282
ARM64Reg targetReg = INVALID_REG;
283
ARM64Reg addrReg = INVALID_REG;
284
285
int dataSize = 4;
286
switch (o) {
287
case 37:
288
case 33:
289
dataSize = 2;
290
break;
291
case 36:
292
case 32:
293
dataSize = 1;
294
break;
295
// Store
296
case 41:
297
dataSize = 2;
298
break;
299
case 40:
300
dataSize = 1;
301
break;
302
}
303
304
switch (o) {
305
case 32: //lb
306
case 33: //lh
307
case 35: //lw
308
case 36: //lbu
309
case 37: //lhu
310
load = true;
311
case 40: //sb
312
case 41: //sh
313
case 43: //sw
314
#ifndef MASKED_PSP_MEMORY
315
if (jo.cachePointers && g_Config.bFastMemory) {
316
// ARM has smaller load/store immediate displacements than MIPS, 12 bits - and some memory ops only have 8 bits.
317
int offsetRange = 0x3ff;
318
if (o == 41 || o == 33 || o == 37 || o == 32)
319
offsetRange = 0xff; // 8 bit offset only
320
if (!gpr.IsImm(rs) && rs != rt && (offset <= offsetRange) && offset >= 0 &&
321
(dataSize == 1 || (offset & (dataSize - 1)) == 0)) { // Check that the offset is aligned to the access size as that's required for INDEX_UNSIGNED encodings. we can get here through fallback from lwl/lwr
322
gpr.SpillLock(rs, rt);
323
gpr.MapRegAsPointer(rs);
324
325
// For a store, try to avoid mapping a reg if not needed.
326
targetReg = load ? INVALID_REG : gpr.TryMapTempImm(rt);
327
if (targetReg == INVALID_REG) {
328
gpr.MapReg(rt, load ? MAP_NOINIT : 0);
329
targetReg = gpr.R(rt);
330
}
331
332
switch (o) {
333
case 35: LDR(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
334
case 37: LDRH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
335
case 33: LDRSH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
336
case 36: LDRB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
337
case 32: LDRSB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
338
// Store
339
case 43: STR(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
340
case 41: STRH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
341
case 40: STRB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;
342
}
343
gpr.ReleaseSpillLocksAndDiscardTemps();
344
break;
345
}
346
}
347
#endif
348
349
if (!load && gpr.IsImm(rt) && gpr.TryMapTempImm(rt) != INVALID_REG) {
350
// We're storing an immediate value, let's see if we can optimize rt.
351
if (!gpr.IsImm(rs) || !Memory::IsValidAddress(iaddr) || offset == 0) {
352
// In this case, we're always going to need rs mapped, which may flush the temp imm.
353
// We handle that in the cases below since targetReg is INVALID_REG.
354
gpr.MapIn(rs);
355
}
356
357
targetReg = gpr.TryMapTempImm(rt);
358
}
359
360
if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {
361
#ifdef MASKED_PSP_MEMORY
362
u32 addr = iaddr & 0x3FFFFFFF;
363
#else
364
u32 addr = iaddr;
365
#endif
366
if (addr == iaddr && offset == 0) {
367
// It was already safe. Let's shove it into a reg and use it directly.
368
if (targetReg == INVALID_REG) {
369
load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);
370
targetReg = gpr.R(rt);
371
}
372
addrReg = gpr.R(rs);
373
} else {
374
// In this case, only map rt. rs+offset will be in SCRATCH1.
375
if (targetReg == INVALID_REG) {
376
gpr.MapReg(rt, load ? MAP_NOINIT : 0);
377
targetReg = gpr.R(rt);
378
}
379
gpr.SetRegImm(SCRATCH1, addr);
380
addrReg = SCRATCH1;
381
}
382
} else {
383
// This gets hit in a few games, as a result of never-taken delay slots (some branch types
384
// conditionally execute the delay slot instructions). Ignore in those cases.
385
if (!js.inDelaySlot) {
386
_dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address %08x? CPU bug?", iaddr);
387
}
388
389
// If we already have a targetReg, we optimized an imm, and rs is already mapped.
390
if (targetReg == INVALID_REG) {
391
if (load) {
392
gpr.MapDirtyIn(rt, rs);
393
} else {
394
gpr.MapInIn(rt, rs);
395
}
396
targetReg = gpr.R(rt);
397
}
398
399
if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {
400
skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);
401
} else {
402
SetScratch1ToEffectiveAddress(rs, offset);
403
}
404
addrReg = SCRATCH1;
405
}
406
407
switch (o) {
408
// Load
409
case 35: LDR(targetReg, MEMBASEREG, addrReg); break;
410
case 37: LDRH(targetReg, MEMBASEREG, addrReg); break;
411
case 33: LDRSH(targetReg, MEMBASEREG, addrReg); break;
412
case 36: LDRB(targetReg, MEMBASEREG, addrReg); break;
413
case 32: LDRSB(targetReg, MEMBASEREG, addrReg); break;
414
// Store
415
case 43: STR(targetReg, MEMBASEREG, addrReg); break;
416
case 41: STRH(targetReg, MEMBASEREG, addrReg); break;
417
case 40: STRB(targetReg, MEMBASEREG, addrReg); break;
418
}
419
for (auto skip : skips) {
420
SetJumpTarget(skip);
421
// TODO: Could clear to zero here on load, if skipping this for good reads.
422
}
423
break;
424
case 34: //lwl
425
case 38: //lwr
426
load = true;
427
case 42: //swl
428
case 46: //swr
429
Comp_ITypeMemLR(op, load);
430
break;
431
default:
432
Comp_Generic(op);
433
return;
434
}
435
}
436
437
void Arm64Jit::Comp_StoreSync(MIPSOpcode op) {
438
CONDITIONAL_DISABLE(LSU);
439
440
DISABLE;
441
}
442
443
void Arm64Jit::Comp_Cache(MIPSOpcode op) {
444
CONDITIONAL_DISABLE(LSU);
445
446
int func = (op >> 16) & 0x1F;
447
448
// See Int_Cache for the definitions.
449
switch (func) {
450
case 24: break;
451
case 25: break;
452
case 27: break;
453
case 30: break;
454
default:
455
// Fall back to the interpreter.
456
DISABLE;
457
}
458
}
459
}
460
461
#endif // PPSSPP_ARCH(ARM64)
462
463