Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/ARM64/Arm64RegCacheFPU.cpp
5669 views
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#if PPSSPP_ARCH(ARM64)
20
21
#include <cstring>
22
23
#include "Common/CPUDetect.h"
24
#include "Common/Log.h"
25
#include "Core/Reporting.h"
26
#include "Core/MIPS/MIPS.h"
27
#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"
28
#include "Core/MIPS/ARM64/Arm64Jit.h"
29
#include "Core/MIPS/MIPSTables.h"
30
31
using namespace Arm64Gen;
32
using namespace Arm64JitConstants;
33
34
Arm64RegCacheFPU::Arm64RegCacheFPU(MIPSState *mipsState, MIPSComp::JitState *js, MIPSComp::JitOptions *jo) : mips_(mipsState), vr(mr + 32), js_(js), jo_(jo) {
35
numARMFpuReg_ = 32;
36
}
37
38
void Arm64RegCacheFPU::Init(Arm64Gen::ARM64XEmitter *emit, Arm64Gen::ARM64FloatEmitter *fp) {
39
emit_ = emit;
40
fp_ = fp;
41
}
42
43
void Arm64RegCacheFPU::Start(MIPSAnalyst::AnalysisResults &stats) {
44
if (!initialReady) {
45
SetupInitialRegs();
46
initialReady = true;
47
}
48
49
memcpy(ar, arInitial, sizeof(ar));
50
memcpy(mr, mrInitial, sizeof(mr));
51
pendingFlush = false;
52
}
53
54
void Arm64RegCacheFPU::SetupInitialRegs() {
55
for (int i = 0; i < numARMFpuReg_; i++) {
56
arInitial[i].mipsReg = -1;
57
arInitial[i].isDirty = false;
58
}
59
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
60
mrInitial[i].loc = ML_MEM;
61
mrInitial[i].reg = INVALID_REG;
62
mrInitial[i].spillLock = false;
63
mrInitial[i].tempLock = false;
64
}
65
}
66
67
const ARM64Reg *Arm64RegCacheFPU::GetMIPSAllocationOrder(int &count) {
68
// On ARM64, all 32 registers are fully 128-bit and fully interchangable so we don't
69
// have to care about upper or lower registers. However, only S8-S15 are callee-save, and
70
// only the bottom 64 bits of those. So we should allocate into these when we call
71
// C functions, although we don't currently do so...
72
73
static const ARM64Reg allocationOrder[] = {
74
// Reserve four full 128-bit temp registers, should be plenty.
75
S4, S5, S6, S7,
76
S8, S9, S10, S11, // Partially callee-save (bottom 64 bits)
77
S12, S13, S14, S15, // Partially callee-save (bottom 64 bits)
78
S16, S17, S18, S19,
79
S20, S21, S22, S23,
80
S24, S25, S26, S27,
81
S28, S29, S30, S31,
82
};
83
84
count = sizeof(allocationOrder) / sizeof(const ARM64Reg);
85
return allocationOrder;
86
}
87
88
bool Arm64RegCacheFPU::IsMapped(MIPSReg r) {
89
return mr[r].loc == ML_ARMREG;
90
}
91
92
bool Arm64RegCacheFPU::IsInRAM(MIPSReg r) {
93
return mr[r].loc == ML_MEM;
94
}
95
96
ARM64Reg Arm64RegCacheFPU::MapReg(MIPSReg mipsReg, int mapFlags) {
97
pendingFlush = true;
98
// Let's see if it's already mapped. If so we just need to update the dirty flag.
99
// We don't need to check for ML_NOINIT because we assume that anyone who maps
100
// with that flag immediately writes a "known" value to the register.
101
if (mr[mipsReg].loc == ML_ARMREG) {
102
if (ar[mr[mipsReg].reg].mipsReg != mipsReg) {
103
ERROR_LOG(Log::JIT, "Reg mapping out of sync! MR %i", mipsReg);
104
}
105
if (mapFlags & MAP_DIRTY) {
106
ar[mr[mipsReg].reg].isDirty = true;
107
}
108
//INFO_LOG(Log::JIT, "Already mapped %i to %i", mipsReg, mr[mipsReg].reg);
109
return (ARM64Reg)(mr[mipsReg].reg + S0);
110
}
111
112
// Okay, not mapped, so we need to allocate an ARM register.
113
114
int allocCount;
115
const ARM64Reg *allocOrder = GetMIPSAllocationOrder(allocCount);
116
117
allocate:
118
for (int i = 0; i < allocCount; i++) {
119
int reg = DecodeReg(allocOrder[i]);
120
121
if (ar[reg].mipsReg == -1) {
122
// That means it's free. Grab it, and load the value into it (if requested).
123
ar[reg].isDirty = (mapFlags & MAP_DIRTY) ? true : false;
124
if ((mapFlags & MAP_NOINIT) != MAP_NOINIT) {
125
if (mr[mipsReg].loc == ML_MEM && mipsReg < TEMP0) {
126
fp_->LDR(32, INDEX_UNSIGNED, (ARM64Reg)(reg + S0), CTXREG, GetMipsRegOffset(mipsReg));
127
}
128
}
129
ar[reg].mipsReg = mipsReg;
130
mr[mipsReg].loc = ML_ARMREG;
131
mr[mipsReg].reg = reg;
132
//INFO_LOG(Log::JIT, "Mapped %i to %i", mipsReg, mr[mipsReg].reg);
133
return (ARM64Reg)(reg + S0);
134
}
135
}
136
137
138
// Still nothing. Let's spill a reg and goto 10.
139
// TODO: Use age or something to choose which register to spill?
140
// TODO: Spill dirty regs first? or opposite?
141
int bestToSpill = -1;
142
for (int i = 0; i < allocCount; i++) {
143
int reg = allocOrder[i] - S0;
144
if (ar[reg].mipsReg != -1 && (mr[ar[reg].mipsReg].spillLock || mr[ar[reg].mipsReg].tempLock))
145
continue;
146
bestToSpill = reg;
147
break;
148
}
149
150
if (bestToSpill != -1) {
151
FlushArmReg((ARM64Reg)(S0 + bestToSpill));
152
goto allocate;
153
}
154
155
// Uh oh, we have all them spilllocked....
156
ERROR_LOG(Log::JIT, "Out of spillable registers at PC %08x!!!", js_->compilerPC);
157
return INVALID_REG;
158
}
159
160
void Arm64RegCacheFPU::MapInIn(MIPSReg rd, MIPSReg rs) {
161
SpillLock(rd, rs);
162
MapReg(rd);
163
MapReg(rs);
164
ReleaseSpillLock(rd);
165
ReleaseSpillLock(rs);
166
}
167
168
void Arm64RegCacheFPU::MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad) {
169
SpillLock(rd, rs);
170
bool load = !avoidLoad || rd == rs;
171
MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT);
172
MapReg(rs);
173
ReleaseSpillLock(rd);
174
ReleaseSpillLock(rs);
175
}
176
177
void Arm64RegCacheFPU::MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad) {
178
SpillLock(rd, rs, rt);
179
bool load = !avoidLoad || (rd == rs || rd == rt);
180
MapReg(rd, load ? MAP_DIRTY : MAP_NOINIT);
181
MapReg(rt);
182
MapReg(rs);
183
ReleaseSpillLock(rd);
184
ReleaseSpillLock(rs);
185
ReleaseSpillLock(rt);
186
}
187
188
void Arm64RegCacheFPU::SpillLockV(const u8 *v, VectorSize sz) {
189
for (int i = 0; i < GetNumVectorElements(sz); i++) {
190
vr[v[i]].spillLock = true;
191
}
192
}
193
194
void Arm64RegCacheFPU::SpillLockV(int vec, VectorSize sz) {
195
u8 v[4];
196
GetVectorRegs(v, sz, vec);
197
SpillLockV(v, sz);
198
}
199
200
void Arm64RegCacheFPU::MapRegV(int vreg, int flags) {
201
MapReg(vreg + 32, flags);
202
}
203
204
void Arm64RegCacheFPU::LoadToRegV(ARM64Reg armReg, int vreg) {
205
if (vr[vreg].loc == ML_ARMREG) {
206
fp_->FMOV(armReg, (ARM64Reg)(S0 + vr[vreg].reg));
207
} else {
208
MapRegV(vreg);
209
fp_->FMOV(armReg, V(vreg));
210
}
211
}
212
213
void Arm64RegCacheFPU::MapRegsAndSpillLockV(int vec, VectorSize sz, int flags) {
214
u8 v[4];
215
GetVectorRegs(v, sz, vec);
216
SpillLockV(v, sz);
217
for (int i = 0; i < GetNumVectorElements(sz); i++) {
218
MapRegV(v[i], flags);
219
}
220
}
221
222
void Arm64RegCacheFPU::MapRegsAndSpillLockV(const u8 *v, VectorSize sz, int flags) {
223
SpillLockV(v, sz);
224
for (int i = 0; i < GetNumVectorElements(sz); i++) {
225
MapRegV(v[i], flags);
226
}
227
}
228
229
void Arm64RegCacheFPU::MapInInV(int vs, int vt) {
230
SpillLockV(vs);
231
SpillLockV(vt);
232
MapRegV(vs);
233
MapRegV(vt);
234
ReleaseSpillLockV(vs);
235
ReleaseSpillLockV(vt);
236
}
237
238
void Arm64RegCacheFPU::MapDirtyInV(int vd, int vs, bool avoidLoad) {
239
bool load = !avoidLoad || (vd == vs);
240
SpillLockV(vd);
241
SpillLockV(vs);
242
MapRegV(vd, load ? MAP_DIRTY : MAP_NOINIT);
243
MapRegV(vs);
244
ReleaseSpillLockV(vd);
245
ReleaseSpillLockV(vs);
246
}
247
248
void Arm64RegCacheFPU::MapDirtyInInV(int vd, int vs, int vt, bool avoidLoad) {
249
bool load = !avoidLoad || (vd == vs || vd == vt);
250
SpillLockV(vd);
251
SpillLockV(vs);
252
SpillLockV(vt);
253
MapRegV(vd, load ? MAP_DIRTY : MAP_NOINIT);
254
MapRegV(vs);
255
MapRegV(vt);
256
ReleaseSpillLockV(vd);
257
ReleaseSpillLockV(vs);
258
ReleaseSpillLockV(vt);
259
}
260
261
void Arm64RegCacheFPU::FlushArmReg(ARM64Reg r) {
262
if (r >= S0 && r <= S31) {
263
int reg = r - S0;
264
if (ar[reg].mipsReg == -1) {
265
// Nothing to do, reg not mapped.
266
return;
267
}
268
if (ar[reg].mipsReg != -1) {
269
if (ar[reg].isDirty && mr[ar[reg].mipsReg].loc == ML_ARMREG){
270
//INFO_LOG(Log::JIT, "Flushing ARM reg %i", reg);
271
fp_->STR(32, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(ar[reg].mipsReg));
272
}
273
// IMMs won't be in an ARM reg.
274
mr[ar[reg].mipsReg].loc = ML_MEM;
275
mr[ar[reg].mipsReg].reg = INVALID_REG;
276
} else {
277
ERROR_LOG(Log::JIT, "Dirty but no mipsreg?");
278
}
279
ar[reg].mipsReg = -1;
280
ar[reg].isDirty = false;
281
}
282
}
283
284
void Arm64RegCacheFPU::FlushV(MIPSReg r) {
285
FlushR(r + 32);
286
}
287
288
void Arm64RegCacheFPU::FlushR(MIPSReg r) {
289
switch (mr[r].loc) {
290
case ML_IMM:
291
// IMM is always "dirty".
292
// IMM is not allowed for FP (yet).
293
ERROR_LOG(Log::JIT, "Imm in FP register?");
294
break;
295
296
case ML_ARMREG:
297
if (mr[r].reg == INVALID_REG) {
298
ERROR_LOG(Log::JIT, "FlushR: MipsReg had bad ArmReg");
299
}
300
FlushArmReg((ARM64Reg)(S0 + mr[r].reg));
301
break;
302
303
case ML_MEM:
304
// Already there, nothing to do.
305
break;
306
307
default:
308
//BAD
309
break;
310
}
311
}
312
313
Arm64Gen::ARM64Reg Arm64RegCacheFPU::ARM64RegForFlush(int r) {
314
switch (mr[r].loc) {
315
case ML_IMM:
316
// IMM is always "dirty".
317
// IMM is not allowed for FP (yet).
318
ERROR_LOG(Log::JIT, "Imm in FP register?");
319
return INVALID_REG;
320
321
case ML_ARMREG:
322
if (mr[r].reg == INVALID_REG) {
323
ERROR_LOG_REPORT(Log::JIT, "ARM64RegForFlush: MipsReg %d had bad ArmReg", r);
324
return INVALID_REG;
325
}
326
// No need to flush if it's not dirty.
327
if (!ar[mr[r].reg].isDirty) {
328
return INVALID_REG;
329
}
330
return (ARM64Reg)(S0 + mr[r].reg);
331
332
case ML_MEM:
333
return INVALID_REG;
334
335
default:
336
ERROR_LOG_REPORT(Log::JIT, "ARM64RegForFlush: MipsReg %d with invalid location %d", r, mr[r].loc);
337
return INVALID_REG;
338
}
339
}
340
341
void Arm64RegCacheFPU::FlushAll() {
342
if (!pendingFlush) {
343
// Nothing allocated. FPU regs are not nearly as common as GPR.
344
return;
345
}
346
347
// Discard temps!
348
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; i++) {
349
DiscardR(i);
350
}
351
352
int numArmRegs = 0;
353
354
const ARM64Reg *order = GetMIPSAllocationOrder(numArmRegs);
355
356
// Flush pairs first when possible. Note that STP's offset can't reach more than 256 bytes so
357
// most VFPU registers cannot be flushed this way, unless we are willing to generate another offset pointer
358
// (which we could actually do right here, point right in the middle of the VFPU stuff and would reach it all)... TODO
359
for (int i = 0; i < 31; i++) {
360
int mr1 = i;
361
int mr2 = i + 1;
362
ARM64Reg ar1 = ARM64RegForFlush(mr1);
363
ARM64Reg ar2 = ARM64RegForFlush(mr2);
364
365
if (ar1 != INVALID_REG && ar2 != INVALID_REG) {
366
fp_->STP(32, INDEX_SIGNED, ar1, ar2, CTXREG, GetMipsRegOffset(mr1));
367
DiscardR(mr1);
368
DiscardR(mr2);
369
}
370
}
371
372
// Then flush one by one.
373
374
for (int i = 0; i < numArmRegs; i++) {
375
int a = DecodeReg(order[i]);
376
int m = ar[a].mipsReg;
377
378
if (ar[a].isDirty) {
379
if (m == -1) {
380
INFO_LOG(Log::JIT, "ARM reg %d is dirty but has no mipsreg", a);
381
continue;
382
}
383
384
fp_->STR(32, INDEX_UNSIGNED, (ARM64Reg)(a + S0), CTXREG, GetMipsRegOffset(m));
385
386
mr[m].loc = ML_MEM;
387
mr[m].reg = (int)INVALID_REG;
388
ar[a].mipsReg = -1;
389
ar[a].isDirty = false;
390
} else {
391
if (m != -1) {
392
mr[m].loc = ML_MEM;
393
mr[m].reg = (int)INVALID_REG;
394
}
395
ar[a].mipsReg = -1;
396
// already not dirty
397
}
398
}
399
400
// Sanity check
401
for (int i = 0; i < numARMFpuReg_; i++) {
402
if (ar[i].mipsReg != -1) {
403
ERROR_LOG(Log::JIT, "Flush fail: ar[%d].mipsReg=%d", i, ar[i].mipsReg);
404
}
405
}
406
pendingFlush = false;
407
}
408
409
void Arm64RegCacheFPU::DiscardR(MIPSReg r) {
410
switch (mr[r].loc) {
411
case ML_IMM:
412
// IMM is always "dirty".
413
// IMM is not allowed for FP (yet).
414
ERROR_LOG(Log::JIT, "Imm in FP register?");
415
break;
416
417
case ML_ARMREG:
418
if (mr[r].reg == INVALID_REG) {
419
ERROR_LOG(Log::JIT, "DiscardR: MipsReg had bad ArmReg");
420
} else {
421
// Note that we DO NOT write it back here. That's the whole point of Discard.
422
ar[mr[r].reg].isDirty = false;
423
ar[mr[r].reg].mipsReg = -1;
424
}
425
break;
426
427
case ML_MEM:
428
// Already there, nothing to do.
429
break;
430
431
default:
432
//BAD
433
break;
434
}
435
mr[r].loc = ML_MEM;
436
mr[r].reg = (int)INVALID_REG;
437
mr[r].tempLock = false;
438
mr[r].spillLock = false;
439
}
440
441
bool Arm64RegCacheFPU::IsTempX(ARM64Reg r) const {
442
return ar[r - S0].mipsReg >= TEMP0;
443
}
444
445
int Arm64RegCacheFPU::GetTempR() {
446
pendingFlush = true;
447
for (int r = TEMP0; r < TEMP0 + NUM_TEMPS; ++r) {
448
if (mr[r].loc == ML_MEM && !mr[r].tempLock) {
449
mr[r].tempLock = true;
450
return r;
451
}
452
}
453
454
ERROR_LOG(Log::CPU, "Out of temp regs! Might need to DiscardR() some");
455
_assert_msg_(false, "Regcache ran out of temp regs, might need to DiscardR() some.");
456
return -1;
457
}
458
459
int Arm64RegCacheFPU::GetMipsRegOffset(MIPSReg r) {
460
// These are offsets within the MIPSState structure. First there are the GPRS, then FPRS, then the "VFPURs", then the VFPU ctrls.
461
if (r < 0 || r > 32 + 128 + NUM_TEMPS) {
462
ERROR_LOG(Log::JIT, "bad mips register %i, out of range", r);
463
return 0; // or what?
464
}
465
466
if (r < 32 || r >= 32 + 128) {
467
return (32 + r) << 2;
468
} else {
469
// r is between 32 and 128 + 32
470
return (32 + 32 + voffset[r - 32]) << 2;
471
}
472
}
473
474
void Arm64RegCacheFPU::SpillLock(MIPSReg r1, MIPSReg r2, MIPSReg r3, MIPSReg r4) {
475
mr[r1].spillLock = true;
476
if (r2 != -1) mr[r2].spillLock = true;
477
if (r3 != -1) mr[r3].spillLock = true;
478
if (r4 != -1) mr[r4].spillLock = true;
479
}
480
481
// This is actually pretty slow with all the 160 regs...
482
void Arm64RegCacheFPU::ReleaseSpillLocksAndDiscardTemps() {
483
for (int i = 0; i < NUM_MIPSFPUREG; i++) {
484
mr[i].spillLock = false;
485
}
486
for (int i = TEMP0; i < TEMP0 + NUM_TEMPS; ++i) {
487
DiscardR(i);
488
}
489
}
490
491
ARM64Reg Arm64RegCacheFPU::R(int mipsReg) {
492
if (mr[mipsReg].loc == ML_ARMREG) {
493
return (ARM64Reg)(mr[mipsReg].reg + S0);
494
} else {
495
if (mipsReg < 32) {
496
ERROR_LOG(Log::JIT, "FReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg, js_->compilerPC, MIPSDisasmAt(js_->compilerPC).c_str());
497
} else if (mipsReg < 32 + 128) {
498
ERROR_LOG(Log::JIT, "VReg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC).c_str());
499
} else {
500
ERROR_LOG(Log::JIT, "Tempreg %i not in ARM reg. compilerPC = %08x : %s", mipsReg - 128 - 32, js_->compilerPC, MIPSDisasmAt(js_->compilerPC).c_str());
501
}
502
return INVALID_REG; // BAAAD
503
}
504
}
505
506
#endif // PPSSPP_ARCH(ARM64)
507
508