CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/ARM64/Arm64IRRegCache.cpp
Views: 1401
1
// Copyright (c) 2023- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL.
20
#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__))
21
22
#ifndef offsetof
23
#include <cstddef>
24
#endif
25
26
#include "Common/CPUDetect.h"
27
#include "Common/LogReporting.h"
28
#include "Core/MemMap.h"
29
#include "Core/MIPS/IR/IRInst.h"
30
#include "Core/MIPS/IR/IRAnalysis.h"
31
#include "Core/MIPS/ARM64/Arm64IRRegCache.h"
32
#include "Core/MIPS/JitCommon/JitState.h"
33
34
using namespace Arm64Gen;
35
using namespace Arm64IRJitConstants;
36
37
Arm64IRRegCache::Arm64IRRegCache(MIPSComp::JitOptions *jo)
38
: IRNativeRegCacheBase(jo) {
39
// The S/D/Q regs overlap, so we just use one slot. The numbers don't match ARM64Reg.
40
config_.totalNativeRegs = NUM_X_REGS + NUM_X_FREGS;
41
config_.mapFPUSIMD = true;
42
// XMM regs are used for both FPU and Vec, so we don't need VREGs.
43
config_.mapUseVRegs = false;
44
}
45
46
void Arm64IRRegCache::Init(ARM64XEmitter *emitter, ARM64FloatEmitter *fp) {
47
emit_ = emitter;
48
fp_ = fp;
49
}
50
51
const int *Arm64IRRegCache::GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const {
52
if (type == MIPSLoc::REG) {
53
// See register alloc remarks in Arm64Asm.cpp.
54
base = W0;
55
56
// W19-W23 are most suitable for static allocation. Those that are chosen for static allocation
57
// should be omitted here and added in GetStaticAllocations.
58
static const int allocationOrder[] = {
59
W19, W20, W21, W22, W23, W24, W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15,
60
};
61
static const int allocationOrderStaticAlloc[] = {
62
W0, W1, W2, W3, W4, W5, W6, W7, W8, W9, W10, W11, W12, W13, W14, W15,
63
};
64
65
if (jo_->useStaticAlloc) {
66
count = ARRAY_SIZE(allocationOrderStaticAlloc);
67
return allocationOrderStaticAlloc;
68
}
69
count = ARRAY_SIZE(allocationOrder);
70
return allocationOrder;
71
} else if (type == MIPSLoc::FREG) {
72
base = S0 - NUM_X_REGS;
73
74
// We don't really need four temps, probably.
75
// We start with S8 for call flushes.
76
static const int allocationOrder[] = {
77
// Reserve four full 128-bit temp registers, should be plenty.
78
S8, S9, S10, S11, // Partially callee-save (bottom 64 bits)
79
S12, S13, S14, S15, // Partially callee-save (bottom 64 bits)
80
S16, S17, S18, S19,
81
S20, S21, S22, S23,
82
S24, S25, S26, S27,
83
S28, S29, S30, S31,
84
S4, S5, S6, S7,
85
};
86
87
count = ARRAY_SIZE(allocationOrder);
88
return allocationOrder;
89
} else {
90
_assert_msg_(false, "Allocation order not yet implemented");
91
count = 0;
92
return nullptr;
93
}
94
}
95
96
const Arm64IRRegCache::StaticAllocation *Arm64IRRegCache::GetStaticAllocations(int &count) const {
97
static const StaticAllocation allocs[] = {
98
{ MIPS_REG_SP, W19, MIPSLoc::REG, true },
99
{ MIPS_REG_V0, W20, MIPSLoc::REG },
100
{ MIPS_REG_V1, W21, MIPSLoc::REG },
101
{ MIPS_REG_A0, W22, MIPSLoc::REG },
102
{ MIPS_REG_A1, W23, MIPSLoc::REG },
103
{ MIPS_REG_RA, W24, MIPSLoc::REG },
104
};
105
106
if (jo_->useStaticAlloc) {
107
count = ARRAY_SIZE(allocs);
108
return allocs;
109
}
110
return IRNativeRegCacheBase::GetStaticAllocations(count);
111
}
112
113
void Arm64IRRegCache::EmitLoadStaticRegisters() {
114
int count = 0;
115
const StaticAllocation *allocs = GetStaticAllocations(count);
116
for (int i = 0; i < count; ++i) {
117
int offset = GetMipsRegOffset(allocs[i].mr);
118
if (i + 1 < count && allocs[i].mr == allocs[i + 1].mr - 1) {
119
_assert_(!allocs[i].pointerified && !allocs[i + 1].pointerified);
120
emit_->LDP(INDEX_SIGNED, FromNativeReg(allocs[i].nr), FromNativeReg(allocs[i + 1].nr), CTXREG, offset);
121
++i;
122
} else {
123
emit_->LDR(INDEX_UNSIGNED, FromNativeReg(allocs[i].nr), CTXREG, offset);
124
if (allocs[i].pointerified && jo_->enablePointerify) {
125
ARM64Reg r64 = FromNativeReg64(allocs[i].nr);
126
uint32_t membaseHigh = (uint32_t)((uint64_t)Memory::base >> 32);
127
emit_->MOVK(r64, membaseHigh & 0xFFFF, SHIFT_32);
128
if (membaseHigh & 0xFFFF0000)
129
emit_->MOVK(r64, membaseHigh >> 16, SHIFT_48);
130
}
131
}
132
}
133
}
134
135
void Arm64IRRegCache::EmitSaveStaticRegisters() {
136
int count = 0;
137
const StaticAllocation *allocs = GetStaticAllocations(count);
138
// This only needs to run once (by Asm) so checks don't need to be fast.
139
for (int i = 0; i < count; ++i) {
140
int offset = GetMipsRegOffset(allocs[i].mr);
141
if (i + 1 < count && allocs[i].mr == allocs[i + 1].mr - 1) {
142
emit_->STP(INDEX_SIGNED, FromNativeReg(allocs[i].nr), FromNativeReg(allocs[i + 1].nr), CTXREG, offset);
143
++i;
144
} else {
145
emit_->STR(INDEX_UNSIGNED, FromNativeReg(allocs[i].nr), CTXREG, offset);
146
}
147
}
148
}
149
150
void Arm64IRRegCache::FlushBeforeCall() {
151
// These registers are not preserved by function calls.
152
auto isGPRSaved = [&](IRNativeReg nreg) {
153
ARM64Reg ar = FromNativeReg(nreg);
154
return ar >= W19 && ar <= W29;
155
};
156
auto isFPRSaved = [&](IRNativeReg nreg) {
157
ARM64Reg ar = FromNativeReg(nreg);
158
return ar >= S8 && ar <= S15;
159
};
160
161
// Go through by IR index first, to use STP where we can.
162
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) {
163
if (mr[i].nReg == -1 || mr[i + 1].nReg == -1 || mr[i].isStatic || mr[i + 1].isStatic)
164
continue;
165
// Ignore multilane regs.
166
if (mr[i].lane != -1 || mr[i + 1].lane != -1)
167
continue;
168
if (!nr[mr[i].nReg].isDirty || !nr[mr[i + 1].nReg].isDirty)
169
continue;
170
// Make sure not to try to pair a GPR and FPR.
171
if (IsValidGPR(i) != IsValidGPR(i + 1))
172
continue;
173
174
int offset = GetMipsRegOffset(i);
175
176
// Okay, it's a maybe. Are we flushing both as GPRs?
177
if (!isGPRSaved(mr[i].nReg) && !isGPRSaved(mr[i + 1].nReg) && IsValidGPR(i) && offset <= 252) {
178
// If either is mapped as a pointer, fix it.
179
if (mr[i].loc == MIPSLoc::REG_AS_PTR)
180
AdjustNativeRegAsPtr(mr[i].nReg, false);
181
if (mr[i + 1].loc == MIPSLoc::REG_AS_PTR)
182
AdjustNativeRegAsPtr(mr[i + 1].nReg, false);
183
184
// That means we should use STP.
185
emit_->STP(INDEX_SIGNED, FromNativeReg(mr[i].nReg), FromNativeReg(mr[i + 1].nReg), CTXREG, offset);
186
187
DiscardNativeReg(mr[i].nReg);
188
DiscardNativeReg(mr[i + 1].nReg);
189
190
++i;
191
continue;
192
}
193
194
// Perhaps as FPRs? Note: these must be single lane at this point.
195
// TODO: Could use STP on quads etc. too, i.e. i & i + 4.
196
if (!isFPRSaved(mr[i].nReg) && !isFPRSaved(mr[i + 1].nReg) && !IsValidGPR(i) && offset <= 252) {
197
fp_->STP(32, INDEX_SIGNED, FromNativeReg(mr[i].nReg), FromNativeReg(mr[i + 1].nReg), CTXREG, offset);
198
199
DiscardNativeReg(mr[i].nReg);
200
DiscardNativeReg(mr[i + 1].nReg);
201
202
++i;
203
continue;
204
}
205
}
206
207
// Alright, now go through any that didn't get flushed with STP.
208
for (int i = 0; i < 19; ++i) {
209
FlushNativeReg(GPRToNativeReg(ARM64Reg(W0 + i)));
210
}
211
FlushNativeReg(GPRToNativeReg(W30));
212
213
for (int i = 0; i < 8; ++i) {
214
FlushNativeReg(VFPToNativeReg(ARM64Reg(S0 + i)));
215
}
216
for (int i = 8; i < 16; ++i) {
217
// These are preserved but only the low 64 bits.
218
IRNativeReg nreg = VFPToNativeReg(ARM64Reg(S0 + i));
219
if (nr[nreg].mipsReg != IRREG_INVALID && GetFPRLaneCount(nr[nreg].mipsReg - 32) > 2)
220
FlushNativeReg(nreg);
221
}
222
for (int i = 16; i < 32; ++i) {
223
FlushNativeReg(VFPToNativeReg(ARM64Reg(S0 + i)));
224
}
225
}
226
227
ARM64Reg Arm64IRRegCache::TryMapTempImm(IRReg r) {
228
_dbg_assert_(IsValidGPR(r));
229
230
// If already mapped, no need for a temporary.
231
if (IsGPRMapped(r)) {
232
return R(r);
233
}
234
235
if (mr[r].loc == MIPSLoc::IMM) {
236
// Can we just use zero?
237
if (mr[r].imm == 0)
238
return WZR;
239
240
// Try our luck - check for an exact match in another xreg.
241
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS; ++i) {
242
if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == mr[r].imm) {
243
// Awesome, let's just use this reg.
244
return FromNativeReg(mr[i].nReg);
245
}
246
}
247
}
248
249
return INVALID_REG;
250
}
251
252
ARM64Reg Arm64IRRegCache::GetAndLockTempGPR() {
253
IRNativeReg reg = AllocateReg(MIPSLoc::REG, MIPSMap::INIT);
254
if (reg != -1) {
255
nr[reg].tempLockIRIndex = irIndex_;
256
}
257
return FromNativeReg(reg);
258
}
259
260
ARM64Reg Arm64IRRegCache::GetAndLockTempFPR() {
261
IRNativeReg reg = AllocateReg(MIPSLoc::FREG, MIPSMap::INIT);
262
if (reg != -1) {
263
nr[reg].tempLockIRIndex = irIndex_;
264
}
265
return FromNativeReg(reg);
266
}
267
268
ARM64Reg Arm64IRRegCache::MapWithFPRTemp(const IRInst &inst) {
269
return FromNativeReg(MapWithTemp(inst, MIPSLoc::FREG));
270
}
271
272
ARM64Reg Arm64IRRegCache::MapGPR(IRReg mipsReg, MIPSMap mapFlags) {
273
_dbg_assert_(IsValidGPR(mipsReg));
274
275
// Okay, not mapped, so we need to allocate an arm64 register.
276
IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 1, mapFlags);
277
return FromNativeReg(nreg);
278
}
279
280
ARM64Reg Arm64IRRegCache::MapGPR2(IRReg mipsReg, MIPSMap mapFlags) {
281
_dbg_assert_(IsValidGPR(mipsReg) && IsValidGPR(mipsReg + 1));
282
283
// Okay, not mapped, so we need to allocate an arm64 register.
284
IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 2, mapFlags);
285
return FromNativeReg64(nreg);
286
}
287
288
ARM64Reg Arm64IRRegCache::MapGPRAsPointer(IRReg reg) {
289
return FromNativeReg64(MapNativeRegAsPointer(reg));
290
}
291
292
ARM64Reg Arm64IRRegCache::MapFPR(IRReg mipsReg, MIPSMap mapFlags) {
293
_dbg_assert_(IsValidFPR(mipsReg));
294
_dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::MEM || mr[mipsReg + 32].loc == MIPSLoc::FREG);
295
296
IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, mipsReg + 32, 1, mapFlags);
297
if (nreg != -1)
298
return FromNativeReg(nreg);
299
return INVALID_REG;
300
}
301
302
ARM64Reg Arm64IRRegCache::MapVec2(IRReg first, MIPSMap mapFlags) {
303
_dbg_assert_(IsValidFPR(first));
304
_dbg_assert_((first & 1) == 0);
305
_dbg_assert_(mr[first + 32].loc == MIPSLoc::MEM || mr[first + 32].loc == MIPSLoc::FREG);
306
307
IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, first + 32, 2, mapFlags);
308
if (nreg != -1)
309
return EncodeRegToDouble(FromNativeReg(nreg));
310
return INVALID_REG;
311
}
312
313
ARM64Reg Arm64IRRegCache::MapVec4(IRReg first, MIPSMap mapFlags) {
314
_dbg_assert_(IsValidFPR(first));
315
_dbg_assert_((first & 3) == 0);
316
_dbg_assert_(mr[first + 32].loc == MIPSLoc::MEM || mr[first + 32].loc == MIPSLoc::FREG);
317
318
IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, first + 32, 4, mapFlags);
319
if (nreg != -1)
320
return EncodeRegToQuad(FromNativeReg(nreg));
321
return INVALID_REG;
322
}
323
324
void Arm64IRRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) {
325
_assert_(nreg >= 0 && nreg < (IRNativeReg)WZR);
326
ARM64Reg r = FromNativeReg64(nreg);
327
if (state) {
328
if (!jo_->enablePointerify) {
329
#if defined(MASKED_PSP_MEMORY)
330
// This destroys the value...
331
_dbg_assert_(!nr[nreg].isDirty);
332
emit_->ANDI2R(r, r, Memory::MEMVIEW32_MASK);
333
#endif
334
emit_->ADD(r, r, MEMBASEREG);
335
} else {
336
uint32_t membaseHigh = (uint32_t)((uint64_t)Memory::base >> 32);
337
emit_->MOVK(r, membaseHigh & 0xFFFF, SHIFT_32);
338
if (membaseHigh & 0xFFFF0000)
339
emit_->MOVK(r, membaseHigh >> 16, SHIFT_48);
340
}
341
} else {
342
if (!jo_->enablePointerify) {
343
#if defined(MASKED_PSP_MEMORY)
344
_dbg_assert_(!nr[nreg].isDirty);
345
#endif
346
emit_->SUB(r, r, MEMBASEREG);
347
} else {
348
// Nothing to do, just ignore the high 32 bits.
349
}
350
}
351
}
352
353
bool Arm64IRRegCache::IsNativeRegCompatible(IRNativeReg nreg, MIPSLoc type, MIPSMap flags, int lanes) {
354
// No special flags, skip the check for a little speed.
355
return true;
356
}
357
358
void Arm64IRRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
359
ARM64Reg r = FromNativeReg(nreg);
360
_dbg_assert_(first != MIPS_REG_ZERO);
361
if (nreg < NUM_X_REGS) {
362
_assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO));
363
if (lanes == 1)
364
emit_->LDR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first));
365
else if (lanes == 2)
366
emit_->LDR(INDEX_UNSIGNED, EncodeRegTo64(r), CTXREG, GetMipsRegOffset(first));
367
else
368
_assert_(false);
369
} else {
370
_dbg_assert_(nreg < NUM_X_REGS + NUM_X_FREGS);
371
_assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot load this type: %d", (int)mr[first].loc);
372
if (lanes == 1)
373
fp_->LDR(32, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first));
374
else if (lanes == 2)
375
fp_->LDR(64, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first));
376
else if (lanes == 4)
377
fp_->LDR(128, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first));
378
else
379
_assert_(false);
380
}
381
}
382
383
void Arm64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
384
ARM64Reg r = FromNativeReg(nreg);
385
_dbg_assert_(first != MIPS_REG_ZERO);
386
if (nreg < NUM_X_REGS) {
387
_assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO));
388
_assert_(mr[first].loc == MIPSLoc::REG || mr[first].loc == MIPSLoc::REG_IMM);
389
if (lanes == 1)
390
emit_->STR(INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first));
391
else if (lanes == 2)
392
emit_->STR(INDEX_UNSIGNED, EncodeRegTo64(r), CTXREG, GetMipsRegOffset(first));
393
else
394
_assert_(false);
395
} else {
396
_dbg_assert_(nreg < NUM_X_REGS + NUM_X_FREGS);
397
_assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot store this type: %d", (int)mr[first].loc);
398
if (lanes == 1)
399
fp_->STR(32, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first));
400
else if (lanes == 2)
401
fp_->STR(64, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first));
402
else if (lanes == 4)
403
fp_->STR(128, INDEX_UNSIGNED, r, CTXREG, GetMipsRegOffset(first));
404
else
405
_assert_(false);
406
}
407
}
408
409
void Arm64IRRegCache::SetNativeRegValue(IRNativeReg nreg, uint32_t imm) {
410
ARM64Reg r = FromNativeReg(nreg);
411
_dbg_assert_(nreg >= 0 && nreg < (IRNativeReg)WZR);
412
// On ARM64, MOVZ/MOVK is really fast.
413
emit_->MOVI2R(r, imm);
414
}
415
416
void Arm64IRRegCache::StoreRegValue(IRReg mreg, uint32_t imm) {
417
_assert_(IsValidGPRNoZero(mreg));
418
// Try to optimize using a different reg.
419
ARM64Reg storeReg = INVALID_REG;
420
if (imm == 0)
421
storeReg = WZR;
422
423
// Could we get lucky? Check for an exact match in another xreg.
424
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS; ++i) {
425
if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == imm) {
426
// Awesome, let's just store this reg.
427
storeReg = (ARM64Reg)mr[i].nReg;
428
break;
429
}
430
}
431
432
if (storeReg == INVALID_REG) {
433
emit_->MOVI2R(SCRATCH1, imm);
434
storeReg = SCRATCH1;
435
}
436
emit_->STR(INDEX_UNSIGNED, storeReg, CTXREG, GetMipsRegOffset(mreg));
437
}
438
439
bool Arm64IRRegCache::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) {
440
bool allowed = !mr[nr[nreg].mipsReg].isStatic;
441
// There's currently no support for non-FREGs here.
442
allowed = allowed && type == MIPSLoc::FREG;
443
444
if (dest == -1)
445
dest = nreg;
446
447
if (allowed && (flags == MIPSMap::INIT || flags == MIPSMap::DIRTY)) {
448
// Alright, changing lane count (possibly including lane position.)
449
IRReg oldfirst = nr[nreg].mipsReg;
450
int oldlanes = 0;
451
while (mr[oldfirst + oldlanes].nReg == nreg)
452
oldlanes++;
453
_assert_msg_(oldlanes != 0, "TransferNativeReg encountered nreg mismatch");
454
_assert_msg_(oldlanes != lanes, "TransferNativeReg transfer to same lanecount, misaligned?");
455
456
if (lanes == 1 && TransferVecTo1(nreg, dest, first, oldlanes))
457
return true;
458
if (oldlanes == 1 && Transfer1ToVec(nreg, dest, first, lanes))
459
return true;
460
}
461
462
return IRNativeRegCacheBase::TransferNativeReg(nreg, dest, type, first, lanes, flags);
463
}
464
465
bool Arm64IRRegCache::TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes) {
466
IRReg oldfirst = nr[nreg].mipsReg;
467
468
// Is it worth preserving any of the old regs?
469
int numKept = 0;
470
for (int i = 0; i < oldlanes; ++i) {
471
// Skip whichever one this is extracting.
472
if (oldfirst + i == first)
473
continue;
474
// If 0 isn't being transfered, easy to keep in its original reg.
475
if (i == 0 && dest != nreg) {
476
numKept++;
477
continue;
478
}
479
480
IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);
481
if (freeReg != -1 && IsRegRead(MIPSLoc::FREG, oldfirst + i)) {
482
// If there's one free, use it. Don't modify nreg, though.
483
fp_->DUP(32, FromNativeReg(freeReg), FromNativeReg(nreg), i);
484
485
// Update accounting.
486
nr[freeReg].isDirty = nr[nreg].isDirty;
487
nr[freeReg].mipsReg = oldfirst + i;
488
mr[oldfirst + i].lane = -1;
489
mr[oldfirst + i].nReg = freeReg;
490
numKept++;
491
}
492
}
493
494
// Unless all other lanes were kept, store.
495
if (nr[nreg].isDirty && numKept < oldlanes - 1) {
496
StoreNativeReg(nreg, oldfirst, oldlanes);
497
// Set false even for regs that were split out, since they were flushed too.
498
for (int i = 0; i < oldlanes; ++i) {
499
if (mr[oldfirst + i].nReg != -1)
500
nr[mr[oldfirst + i].nReg].isDirty = false;
501
}
502
}
503
504
// Next, move the desired element into first place.
505
if (mr[first].lane > 0) {
506
fp_->DUP(32, FromNativeReg(dest), FromNativeReg(nreg), mr[first].lane);
507
} else if (mr[first].lane <= 0 && dest != nreg) {
508
fp_->DUP(32, FromNativeReg(dest), FromNativeReg(nreg), 0);
509
}
510
511
// Now update accounting.
512
for (int i = 0; i < oldlanes; ++i) {
513
auto &mreg = mr[oldfirst + i];
514
if (oldfirst + i == first) {
515
mreg.lane = -1;
516
mreg.nReg = dest;
517
} else if (mreg.nReg == nreg && i == 0 && nreg != dest) {
518
// Still in the same register, but no longer a vec.
519
mreg.lane = -1;
520
} else if (mreg.nReg == nreg) {
521
// No longer in a register.
522
mreg.nReg = -1;
523
mreg.lane = -1;
524
mreg.loc = MIPSLoc::MEM;
525
}
526
}
527
528
if (dest != nreg) {
529
nr[dest].isDirty = nr[nreg].isDirty;
530
if (oldfirst == first) {
531
nr[nreg].mipsReg = -1;
532
nr[nreg].isDirty = false;
533
}
534
}
535
nr[dest].mipsReg = first;
536
537
return true;
538
}
539
540
bool Arm64IRRegCache::Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes) {
541
ARM64Reg destReg = FromNativeReg(dest);
542
ARM64Reg cur[4]{};
543
int numInRegs = 0;
544
u8 blendMask = 0;
545
for (int i = 0; i < lanes; ++i) {
546
if (mr[first + i].lane != -1 || (i != 0 && mr[first + i].spillLockIRIndex >= irIndex_)) {
547
// Can't do it, either double mapped or overlapping vec.
548
return false;
549
}
550
551
if (mr[first + i].nReg == -1) {
552
cur[i] = INVALID_REG;
553
blendMask |= 1 << i;
554
} else {
555
cur[i] = FromNativeReg(mr[first + i].nReg);
556
numInRegs++;
557
}
558
}
559
560
// Shouldn't happen, this should only get called to transfer one in a reg.
561
if (numInRegs == 0)
562
return false;
563
564
// If everything's currently in a reg, move it into this reg.
565
if (lanes == 4) {
566
// Go with an exhaustive approach, only 15 possibilities...
567
if (blendMask == 0) {
568
// y = yw##, x = xz##, dest = xyzw.
569
fp_->ZIP1(32, EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[3]));
570
fp_->ZIP1(32, EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[2]));
571
fp_->ZIP1(32, EncodeRegToQuad(destReg), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[1]));
572
} else if (blendMask == 0b0001) {
573
// y = yw##, w = x###, w = xz##, dest = xyzw.
574
fp_->ZIP1(32, EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[3]));
575
fp_->LDR(32, INDEX_UNSIGNED, cur[3], CTXREG, GetMipsRegOffset(first + 0));
576
fp_->ZIP1(32, EncodeRegToQuad(cur[3]), EncodeRegToQuad(cur[3]), EncodeRegToQuad(cur[2]));
577
fp_->ZIP1(32, EncodeRegToQuad(destReg), EncodeRegToQuad(cur[3]), EncodeRegToQuad(cur[1]));
578
} else if (blendMask == 0b0010) {
579
// x = xz##, z = y###, z = yw##, dest = xyzw.
580
fp_->ZIP1(32, EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[2]));
581
fp_->LDR(32, INDEX_UNSIGNED, cur[2], CTXREG, GetMipsRegOffset(first + 1));
582
fp_->ZIP1(32, EncodeRegToQuad(cur[2]), EncodeRegToQuad(cur[2]), EncodeRegToQuad(cur[3]));
583
fp_->ZIP1(32, EncodeRegToQuad(destReg), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[2]));
584
} else if (blendMask == 0b0011 && (first & 1) == 0) {
585
// z = zw##, w = xy##, dest = xyzw. Mixed lane sizes.
586
fp_->ZIP1(32, EncodeRegToQuad(cur[2]), EncodeRegToQuad(cur[2]), EncodeRegToQuad(cur[3]));
587
fp_->LDR(64, INDEX_UNSIGNED, EncodeRegToDouble(cur[3]), CTXREG, GetMipsRegOffset(first + 0));
588
fp_->ZIP1(64, EncodeRegToQuad(destReg), EncodeRegToQuad(cur[3]), EncodeRegToQuad(cur[2]));
589
} else if (blendMask == 0b0100) {
590
// y = yw##, w = z###, x = xz##, dest = xyzw.
591
fp_->ZIP1(32, EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[3]));
592
fp_->LDR(32, INDEX_UNSIGNED, cur[3], CTXREG, GetMipsRegOffset(first + 2));
593
fp_->ZIP1(32, EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[3]));
594
fp_->ZIP1(32, EncodeRegToQuad(destReg), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[1]));
595
} else if (blendMask == 0b0101 && (first & 3) == 0) {
596
// y = yw##, w=x#z#, w = xz##, dest = xyzw.
597
fp_->ZIP1(32, EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[3]));
598
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(cur[3]), CTXREG, GetMipsRegOffset(first));
599
fp_->UZP1(32, EncodeRegToQuad(cur[3]), EncodeRegToQuad(cur[3]), EncodeRegToQuad(cur[3]));
600
fp_->ZIP1(32, EncodeRegToQuad(destReg), EncodeRegToQuad(cur[3]), EncodeRegToQuad(cur[1]));
601
} else if (blendMask == 0b0110 && (first & 3) == 0) {
602
if (destReg == cur[0]) {
603
// w = wx##, dest = #yz#, dest = xyz#, dest = xyzw.
604
fp_->ZIP1(32, EncodeRegToQuad(cur[3]), EncodeRegToQuad(cur[3]), EncodeRegToQuad(cur[0]));
605
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(destReg), CTXREG, GetMipsRegOffset(first));
606
fp_->INS(32, EncodeRegToQuad(destReg), 0, EncodeRegToQuad(cur[3]), 1);
607
fp_->INS(32, EncodeRegToQuad(destReg), 3, EncodeRegToQuad(cur[3]), 0);
608
} else {
609
// Assumes destReg may equal cur[3].
610
// x = xw##, dest = #yz#, dest = xyz#, dest = xyzw.
611
fp_->ZIP1(32, EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[3]));
612
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(destReg), CTXREG, GetMipsRegOffset(first));
613
fp_->INS(32, EncodeRegToQuad(destReg), 0, EncodeRegToQuad(cur[0]), 0);
614
fp_->INS(32, EncodeRegToQuad(destReg), 3, EncodeRegToQuad(cur[0]), 1);
615
}
616
} else if (blendMask == 0b0111 && (first & 3) == 0 && destReg != cur[3]) {
617
// dest = xyz#, dest = xyzw.
618
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(destReg), CTXREG, GetMipsRegOffset(first));
619
fp_->INS(32, EncodeRegToQuad(destReg), 3, EncodeRegToQuad(cur[3]), 0);
620
} else if (blendMask == 0b1000) {
621
// x = xz##, z = w###, y = yw##, dest = xyzw.
622
fp_->ZIP1(32, EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[2]));
623
fp_->LDR(32, INDEX_UNSIGNED, cur[2], CTXREG, GetMipsRegOffset(first + 3));
624
fp_->ZIP1(32, EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[2]));
625
fp_->ZIP1(32, EncodeRegToQuad(destReg), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[1]));
626
} else if (blendMask == 0b1001 && (first & 3) == 0) {
627
if (destReg == cur[1]) {
628
// w = zy##, dest = x##w, dest = xy#w, dest = xyzw.
629
fp_->ZIP1(32, EncodeRegToQuad(cur[2]), EncodeRegToQuad(cur[2]), EncodeRegToQuad(cur[1]));
630
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(destReg), CTXREG, GetMipsRegOffset(first));
631
fp_->INS(32, EncodeRegToQuad(destReg), 1, EncodeRegToQuad(cur[2]), 1);
632
fp_->INS(32, EncodeRegToQuad(destReg), 2, EncodeRegToQuad(cur[2]), 0);
633
} else {
634
// Assumes destReg may equal cur[2].
635
// y = yz##, dest = x##w, dest = xy#w, dest = xyzw.
636
fp_->ZIP1(32, EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[1]), EncodeRegToQuad(cur[2]));
637
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(destReg), CTXREG, GetMipsRegOffset(first));
638
fp_->INS(32, EncodeRegToQuad(destReg), 1, EncodeRegToQuad(cur[1]), 0);
639
fp_->INS(32, EncodeRegToQuad(destReg), 2, EncodeRegToQuad(cur[1]), 1);
640
}
641
} else if (blendMask == 0b1010 && (first & 3) == 0) {
642
// x = xz##, z = #y#w, z=yw##, dest = xyzw.
643
fp_->ZIP1(32, EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[2]));
644
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(cur[2]), CTXREG, GetMipsRegOffset(first));
645
fp_->UZP2(32, EncodeRegToQuad(cur[2]), EncodeRegToQuad(cur[2]), EncodeRegToQuad(cur[2]));
646
fp_->ZIP1(32, EncodeRegToQuad(destReg), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[2]));
647
} else if (blendMask == 0b1011 && (first & 3) == 0 && destReg != cur[2]) {
648
// dest = xy#w, dest = xyzw.
649
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(destReg), CTXREG, GetMipsRegOffset(first));
650
fp_->INS(32, EncodeRegToQuad(destReg), 2, EncodeRegToQuad(cur[2]), 0);
651
} else if (blendMask == 0b1100 && (first & 1) == 0) {
652
// x = xy##, y = zw##, dest = xyzw. Mixed lane sizes.
653
fp_->ZIP1(32, EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[1]));
654
fp_->LDR(64, INDEX_UNSIGNED, EncodeRegToDouble(cur[1]), CTXREG, GetMipsRegOffset(first + 2));
655
fp_->ZIP1(64, EncodeRegToQuad(destReg), EncodeRegToQuad(cur[0]), EncodeRegToQuad(cur[1]));
656
} else if (blendMask == 0b1101 && (first & 3) == 0 && destReg != cur[1]) {
657
// dest = x#zw, dest = xyzw.
658
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(destReg), CTXREG, GetMipsRegOffset(first));
659
fp_->INS(32, EncodeRegToQuad(destReg), 1, EncodeRegToQuad(cur[1]), 0);
660
} else if (blendMask == 0b1110 && (first & 3) == 0 && destReg != cur[0]) {
661
// dest = #yzw, dest = xyzw.
662
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(destReg), CTXREG, GetMipsRegOffset(first));
663
fp_->INS(32, EncodeRegToQuad(destReg), 0, EncodeRegToQuad(cur[0]), 0);
664
} else if (blendMask == 0b1110 && (first & 3) == 0) {
665
// If dest == cur[0] (which may be common), we need a temp...
666
IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);
667
// Very unfortunate.
668
if (freeReg == INVALID_REG)
669
return false;
670
671
// free = x###, dest = #yzw, dest = xyzw.
672
fp_->DUP(32, EncodeRegToQuad(FromNativeReg(freeReg)), EncodeRegToQuad(cur[0]), 0);
673
fp_->LDR(128, INDEX_UNSIGNED, EncodeRegToQuad(destReg), CTXREG, GetMipsRegOffset(first));
674
fp_->INS(32, EncodeRegToQuad(destReg), 0, EncodeRegToQuad(FromNativeReg(freeReg)), 0);
675
} else {
676
return false;
677
}
678
} else if (lanes == 2) {
679
if (cur[0] != INVALID_REG && cur[1] != INVALID_REG) {
680
fp_->ZIP1(32, EncodeRegToDouble(destReg), EncodeRegToDouble(cur[0]), EncodeRegToDouble(cur[1]));
681
} else if (cur[0] == INVALID_REG && dest != nreg) {
682
fp_->LDR(32, INDEX_UNSIGNED, destReg, CTXREG, GetMipsRegOffset(first + 0));
683
fp_->INS(32, EncodeRegToDouble(destReg), 1, EncodeRegToDouble(cur[1]), 0);
684
} else {
685
IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);
686
if (freeReg == INVALID_REG)
687
return false;
688
689
if (cur[0] == INVALID_REG) {
690
fp_->LDR(32, INDEX_UNSIGNED, FromNativeReg(freeReg), CTXREG, GetMipsRegOffset(first + 0));
691
fp_->ZIP1(32, EncodeRegToDouble(destReg), EncodeRegToDouble(FromNativeReg(freeReg)), EncodeRegToDouble(cur[1]));
692
} else {
693
fp_->LDR(32, INDEX_UNSIGNED, FromNativeReg(freeReg), CTXREG, GetMipsRegOffset(first + 1));
694
fp_->ZIP1(32, EncodeRegToDouble(destReg), EncodeRegToDouble(cur[0]), EncodeRegToDouble(FromNativeReg(freeReg)));
695
}
696
}
697
} else {
698
return false;
699
}
700
701
mr[first].lane = 0;
702
for (int i = 0; i < lanes; ++i) {
703
if (mr[first + i].nReg != -1) {
704
// If this was dirty, the combined reg is now dirty.
705
if (nr[mr[first + i].nReg].isDirty)
706
nr[dest].isDirty = true;
707
708
// Throw away the other register we're no longer using.
709
if (i != 0)
710
DiscardNativeReg(mr[first + i].nReg);
711
}
712
713
// And set it as using the new one.
714
mr[first + i].lane = i;
715
mr[first + i].loc = MIPSLoc::FREG;
716
mr[first + i].nReg = dest;
717
}
718
719
if (dest != nreg) {
720
nr[dest].mipsReg = first;
721
nr[nreg].mipsReg = -1;
722
nr[nreg].isDirty = false;
723
}
724
725
return true;
726
}
727
728
void Arm64IRRegCache::FlushAll(bool gprs, bool fprs) {
729
// Note: make sure not to change the registers when flushing:
730
// Branching code may expect the armreg to retain its value.
731
732
auto needsFlush = [&](IRReg i) {
733
if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic)
734
return false;
735
if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty)
736
return false;
737
return true;
738
};
739
740
// Try to flush in pairs when possible.
741
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) {
742
if (!needsFlush(i) || !needsFlush(i + 1))
743
continue;
744
// Ignore multilane regs. Could handle with more smartness...
745
if (mr[i].lane != -1 || mr[i + 1].lane != -1)
746
continue;
747
748
int offset = GetMipsRegOffset(i);
749
750
// If both are imms, let's materialize a single reg and store.
751
if (mr[i].loc == MIPSLoc::IMM && mr[i + 1].loc == MIPSLoc::IMM) {
752
if ((i & 1) == 0) {
753
uint64_t fullImm = ((uint64_t) mr[i + 1].imm << 32) | mr[i].imm;
754
emit_->MOVI2R(SCRATCH1_64, fullImm);
755
emit_->STR(INDEX_UNSIGNED, SCRATCH1_64, CTXREG, offset);
756
DiscardReg(i);
757
DiscardReg(i + 1);
758
++i;
759
}
760
continue;
761
}
762
763
// Okay, two dirty regs in a row, in need of flushing. Both GPRs?
764
if (IsValidGPR(i) && IsValidGPR(i + 1) && offset <= 252) {
765
auto setupForFlush = [&](ARM64Reg &ar, IRReg r) {
766
if (mr[r].loc == MIPSLoc::IMM) {
767
ar = TryMapTempImm(r);
768
if (ar == INVALID_REG) {
769
// Both cannot be imms, so this is safe.
770
ar = SCRATCH1;
771
emit_->MOVI2R(ar, mr[r].imm);
772
}
773
} else if (mr[r].loc == MIPSLoc::REG_AS_PTR) {
774
AdjustNativeRegAsPtr(r, false);
775
ar = FromNativeReg(mr[r].nReg);
776
} else {
777
_dbg_assert_(mr[r].loc == MIPSLoc::REG || mr[r].loc == MIPSLoc::REG_IMM);
778
ar = FromNativeReg(mr[r].nReg);
779
}
780
};
781
782
ARM64Reg armRegs[2]{ INVALID_REG, INVALID_REG };
783
setupForFlush(armRegs[0], i);
784
setupForFlush(armRegs[1], i + 1);
785
786
emit_->STP(INDEX_SIGNED, armRegs[0], armRegs[1], CTXREG, offset);
787
DiscardReg(i);
788
DiscardReg(i + 1);
789
++i;
790
continue;
791
}
792
793
// Perhaps as FPRs? Note: these must be single lane at this point.
794
// TODO: Could use STP on quads etc. too, i.e. i & i + 4.
795
if (i >= 32 && IsValidFPR(i - 32) && IsValidFPR(i + 1 - 32) && offset <= 252) {
796
_dbg_assert_(mr[i].loc == MIPSLoc::FREG && mr[i + 1].loc == MIPSLoc::FREG);
797
fp_->STP(32, INDEX_SIGNED, FromNativeReg(mr[i].nReg), FromNativeReg(mr[i + 1].nReg), CTXREG, offset);
798
799
DiscardNativeReg(mr[i].nReg);
800
DiscardNativeReg(mr[i + 1].nReg);
801
802
++i;
803
continue;
804
}
805
}
806
807
// Flush all the rest that weren't done via STP.
808
IRNativeRegCacheBase::FlushAll(gprs, fprs);
809
}
810
811
ARM64Reg Arm64IRRegCache::R(IRReg mipsReg) {
812
_dbg_assert_(IsValidGPR(mipsReg));
813
_dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM);
814
if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {
815
return FromNativeReg(mr[mipsReg].nReg);
816
} else {
817
ERROR_LOG_REPORT(Log::JIT, "Reg %i not in arm64 reg", mipsReg);
818
return INVALID_REG; // BAAAD
819
}
820
}
821
822
ARM64Reg Arm64IRRegCache::R64(IRReg mipsReg) {
823
return EncodeRegTo64(R(mipsReg));
824
}
825
826
ARM64Reg Arm64IRRegCache::RPtr(IRReg mipsReg) {
827
_dbg_assert_(IsValidGPR(mipsReg));
828
_dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM || mr[mipsReg].loc == MIPSLoc::REG_AS_PTR);
829
if (mr[mipsReg].loc == MIPSLoc::REG_AS_PTR) {
830
return FromNativeReg64(mr[mipsReg].nReg);
831
} else if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {
832
int r = mr[mipsReg].nReg;
833
_dbg_assert_(nr[r].pointerified);
834
if (nr[r].pointerified) {
835
return FromNativeReg64(mr[mipsReg].nReg);
836
} else {
837
ERROR_LOG(Log::JIT, "Tried to use a non-pointer register as a pointer");
838
return INVALID_REG;
839
}
840
} else {
841
ERROR_LOG_REPORT(Log::JIT, "Reg %i not in arm64 reg", mipsReg);
842
return INVALID_REG; // BAAAD
843
}
844
}
845
846
ARM64Reg Arm64IRRegCache::F(IRReg mipsReg) {
847
_dbg_assert_(IsValidFPR(mipsReg));
848
_dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::FREG);
849
if (mr[mipsReg + 32].loc == MIPSLoc::FREG) {
850
return FromNativeReg(mr[mipsReg + 32].nReg);
851
} else {
852
ERROR_LOG_REPORT(Log::JIT, "Reg %i not in arm64 reg", mipsReg);
853
return INVALID_REG; // BAAAD
854
}
855
}
856
857
ARM64Reg Arm64IRRegCache::FD(IRReg mipsReg) {
858
return EncodeRegToDouble(F(mipsReg));
859
}
860
861
ARM64Reg Arm64IRRegCache::FQ(IRReg mipsReg) {
862
return EncodeRegToQuad(F(mipsReg));
863
}
864
865
IRNativeReg Arm64IRRegCache::GPRToNativeReg(ARM64Reg r) {
866
_dbg_assert_msg_(r >= 0 && r < 0x40, "Not a GPR?");
867
return (IRNativeReg)DecodeReg(r);
868
}
869
870
IRNativeReg Arm64IRRegCache::VFPToNativeReg(ARM64Reg r) {
871
_dbg_assert_msg_(r >= 0x40 && r < 0xE0, "Not VFP?");
872
return (IRNativeReg)(NUM_X_REGS + (int)DecodeReg(r));
873
}
874
875
ARM64Reg Arm64IRRegCache::FromNativeReg(IRNativeReg r) {
876
if (r >= NUM_X_REGS)
877
return EncodeRegToSingle((Arm64Gen::ARM64Reg)r);
878
return (Arm64Gen::ARM64Reg)r;
879
}
880
881
ARM64Reg Arm64IRRegCache::FromNativeReg64(IRNativeReg r) {
882
_dbg_assert_msg_(r >= 0 && r < NUM_X_REGS, "Not a GPR?");
883
return EncodeRegTo64((Arm64Gen::ARM64Reg)r);
884
}
885
886
#endif
887
888