CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/x86/X64IRRegCache.cpp
Views: 1401
1
// Copyright (c) 2023- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
20
21
#ifndef offsetof
22
#include <cstddef>
23
#endif
24
25
#include "Common/CPUDetect.h"
26
#include "Core/MemMap.h"
27
#include "Core/MIPS/IR/IRInst.h"
28
#include "Core/MIPS/IR/IRAnalysis.h"
29
#include "Core/MIPS/x86/X64IRRegCache.h"
30
#include "Core/MIPS/JitCommon/JitState.h"
31
#include "Core/Reporting.h"
32
33
using namespace Gen;
34
using namespace X64IRJitConstants;
35
36
X64IRRegCache::X64IRRegCache(MIPSComp::JitOptions *jo)
37
: IRNativeRegCacheBase(jo) {
38
config_.totalNativeRegs = NUM_X_REGS + NUM_X_FREGS;
39
config_.mapFPUSIMD = true;
40
// XMM regs are used for both FPU and Vec, so we don't need VREGs.
41
config_.mapUseVRegs = false;
42
}
43
44
void X64IRRegCache::Init(XEmitter *emitter) {
45
emit_ = emitter;
46
}
47
48
const int *X64IRRegCache::GetAllocationOrder(MIPSLoc type, MIPSMap flags, int &count, int &base) const {
49
if (type == MIPSLoc::REG) {
50
base = RAX;
51
52
static const int allocationOrder[] = {
53
#if PPSSPP_ARCH(AMD64)
54
#ifdef _WIN32
55
RSI, RDI, R8, R9, R10, R11, R12, R13, RDX, RCX,
56
#else
57
RBP, R8, R9, R10, R11, R12, R13, RDX, RCX,
58
#endif
59
// Intentionally last.
60
R15,
61
#elif PPSSPP_ARCH(X86)
62
ESI, EDI, EDX, EBX, ECX,
63
#endif
64
};
65
66
if ((flags & X64Map::MASK) == X64Map::SHIFT) {
67
// It's a single option for shifts.
68
static const int shiftReg[] = { ECX };
69
count = 1;
70
return shiftReg;
71
}
72
if ((flags & X64Map::MASK) == X64Map::HIGH_DATA) {
73
// It's a single option for shifts.
74
static const int shiftReg[] = { EDX };
75
count = 1;
76
return shiftReg;
77
}
78
#if PPSSPP_ARCH(X86)
79
if ((flags & X64Map::MASK) == X64Map::LOW_SUBREG) {
80
static const int lowSubRegAllocationOrder[] = {
81
EDX, EBX, ECX,
82
};
83
count = ARRAY_SIZE(lowSubRegAllocationOrder);
84
return lowSubRegAllocationOrder;
85
}
86
#else
87
if (jo_->reserveR15ForAsm) {
88
count = ARRAY_SIZE(allocationOrder) - 1;
89
return allocationOrder;
90
}
91
#endif
92
count = ARRAY_SIZE(allocationOrder);
93
return allocationOrder;
94
} else if (type == MIPSLoc::FREG) {
95
base = -NUM_X_REGS;
96
97
// TODO: Might have to change this if we can't live without dedicated temps.
98
static const int allocationOrder[] = {
99
#if PPSSPP_ARCH(AMD64)
100
XMM6, XMM7, XMM8, XMM9, XMM10, XMM11, XMM12, XMM13, XMM14, XMM15, XMM1, XMM2, XMM3, XMM4, XMM5, XMM0,
101
#elif PPSSPP_ARCH(X86)
102
XMM1, XMM2, XMM3, XMM4, XMM5, XMM6, XMM7, XMM0,
103
#endif
104
};
105
106
if ((flags & X64Map::MASK) == X64Map::XMM0) {
107
// Certain cases require this reg.
108
static const int blendReg[] = { XMM0 };
109
count = 1;
110
return blendReg;
111
}
112
113
count = ARRAY_SIZE(allocationOrder);
114
return allocationOrder;
115
} else {
116
_assert_msg_(false, "Allocation order not yet implemented");
117
count = 0;
118
return nullptr;
119
}
120
}
121
122
void X64IRRegCache::FlushBeforeCall() {
123
// These registers are not preserved by function calls.
124
#if PPSSPP_ARCH(AMD64)
125
#ifdef _WIN32
126
FlushNativeReg(GPRToNativeReg(RCX));
127
FlushNativeReg(GPRToNativeReg(RDX));
128
FlushNativeReg(GPRToNativeReg(R8));
129
FlushNativeReg(GPRToNativeReg(R9));
130
FlushNativeReg(GPRToNativeReg(R10));
131
FlushNativeReg(GPRToNativeReg(R11));
132
for (int i = 0; i < 6; ++i)
133
FlushNativeReg(NUM_X_REGS + i);
134
#else
135
FlushNativeReg(GPRToNativeReg(R8));
136
FlushNativeReg(GPRToNativeReg(R9));
137
FlushNativeReg(GPRToNativeReg(R10));
138
FlushNativeReg(GPRToNativeReg(R11));
139
for (int i = 0; i < NUM_X_FREGS; ++i)
140
FlushNativeReg(NUM_X_REGS + i);
141
#endif
142
#elif PPSSPP_ARCH(X86)
143
FlushNativeReg(GPRToNativeReg(ECX));
144
FlushNativeReg(GPRToNativeReg(EDX));
145
for (int i = 0; i < NUM_X_FREGS; ++i)
146
FlushNativeReg(NUM_X_REGS + i);
147
#endif
148
}
149
150
void X64IRRegCache::FlushAll(bool gprs, bool fprs) {
151
// Note: make sure not to change the registers when flushing:
152
// Branching code may expect the x64reg to retain its value.
153
154
auto needsFlush = [&](IRReg i) {
155
if (mr[i].loc != MIPSLoc::MEM || mr[i].isStatic)
156
return false;
157
if (mr[i].nReg == -1 || !nr[mr[i].nReg].isDirty)
158
return false;
159
return true;
160
};
161
162
auto isSingleFloat = [&](IRReg i) {
163
if (mr[i].lane != -1 || mr[i].loc != MIPSLoc::FREG)
164
return false;
165
return true;
166
};
167
168
// Sometimes, float/vector regs may be in separate regs in a sequence.
169
// It's worth combining and flushing together.
170
for (int i = 1; i < TOTAL_MAPPABLE_IRREGS - 1; ++i) {
171
if (!needsFlush(i) || !needsFlush(i + 1))
172
continue;
173
// GPRs are probably not worth it. Merging Vec2s might be, but pretty uncommon.
174
if (!isSingleFloat(i) || !isSingleFloat(i + 1))
175
continue;
176
177
X64Reg regs[4]{ INVALID_REG, INVALID_REG, INVALID_REG, INVALID_REG };
178
regs[0] = FromNativeReg(mr[i + 0].nReg);
179
regs[1] = FromNativeReg(mr[i + 1].nReg);
180
181
bool flushVec4 = i + 3 < TOTAL_MAPPABLE_IRREGS && needsFlush(i + 2) && needsFlush(i + 3);
182
if (flushVec4 && isSingleFloat(i + 2) && isSingleFloat(i + 3) && (i & 3) == 0) {
183
regs[2] = FromNativeReg(mr[i + 2].nReg);
184
regs[3] = FromNativeReg(mr[i + 3].nReg);
185
186
// Note that this doesn't change the low lane of any of these regs.
187
emit_->UNPCKLPS(regs[1], ::R(regs[3]));
188
emit_->UNPCKLPS(regs[0], ::R(regs[2]));
189
emit_->UNPCKLPS(regs[0], ::R(regs[1]));
190
emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]);
191
192
for (int j = 0; j < 4; ++j)
193
DiscardReg(i + j);
194
i += 3;
195
continue;
196
}
197
198
// TODO: Maybe this isn't always worth doing.
199
emit_->UNPCKLPS(regs[0], ::R(regs[1]));
200
emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(i)), regs[0]);
201
202
DiscardReg(i);
203
DiscardReg(i + 1);
204
++i;
205
continue;
206
}
207
208
IRNativeRegCacheBase::FlushAll(gprs, fprs);
209
}
210
211
X64Reg X64IRRegCache::TryMapTempImm(IRReg r, X64Map flags) {
212
_dbg_assert_(IsValidGPR(r));
213
214
auto canUseReg = [flags](X64Reg r) {
215
switch (flags & X64Map::MASK) {
216
case X64Map::NONE:
217
return true;
218
case X64Map::LOW_SUBREG:
219
return HasLowSubregister(r);
220
case X64Map::SHIFT:
221
return r == RCX;
222
case X64Map::HIGH_DATA:
223
return r == RCX;
224
default:
225
_assert_msg_(false, "Unexpected flags");
226
}
227
return false;
228
};
229
230
// If already mapped, no need for a temporary.
231
if (IsGPRMapped(r)) {
232
if (canUseReg(RX(r)))
233
return RX(r);
234
}
235
236
if (mr[r].loc == MIPSLoc::IMM) {
237
// Try our luck - check for an exact match in another xreg.
238
for (int i = 0; i < TOTAL_MAPPABLE_IRREGS; ++i) {
239
if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == mr[r].imm) {
240
// Awesome, let's just use this reg.
241
if (canUseReg(FromNativeReg(mr[i].nReg)))
242
return FromNativeReg(mr[i].nReg);
243
}
244
}
245
}
246
247
return INVALID_REG;
248
}
249
250
X64Reg X64IRRegCache::GetAndLockTempGPR() {
251
IRNativeReg reg = AllocateReg(MIPSLoc::REG, MIPSMap::INIT);
252
if (reg != -1) {
253
nr[reg].tempLockIRIndex = irIndex_;
254
}
255
return FromNativeReg(reg);
256
}
257
258
X64Reg X64IRRegCache::GetAndLockTempFPR() {
259
IRNativeReg reg = AllocateReg(MIPSLoc::FREG, MIPSMap::INIT);
260
if (reg != -1) {
261
nr[reg].tempLockIRIndex = irIndex_;
262
}
263
return FromNativeReg(reg);
264
}
265
266
void X64IRRegCache::ReserveAndLockXGPR(Gen::X64Reg r) {
267
IRNativeReg nreg = GPRToNativeReg(r);
268
if (nr[nreg].mipsReg != IRREG_INVALID)
269
FlushNativeReg(nreg);
270
nr[r].tempLockIRIndex = irIndex_;
271
}
272
273
X64Reg X64IRRegCache::MapWithFPRTemp(const IRInst &inst) {
274
return FromNativeReg(MapWithTemp(inst, MIPSLoc::FREG));
275
}
276
277
void X64IRRegCache::MapWithFlags(IRInst inst, X64Map destFlags, X64Map src1Flags, X64Map src2Flags) {
278
Mapping mapping[3];
279
MappingFromInst(inst, mapping);
280
281
mapping[0].flags = mapping[0].flags | destFlags;
282
mapping[1].flags = mapping[1].flags | src1Flags;
283
mapping[2].flags = mapping[2].flags | src2Flags;
284
285
auto flushReg = [&](IRNativeReg nreg) {
286
bool mustKeep = false;
287
bool canDiscard = false;
288
for (int i = 0; i < 3; ++i) {
289
if (mapping[i].reg != nr[nreg].mipsReg)
290
continue;
291
292
if ((mapping[i].flags & MIPSMap::NOINIT) != MIPSMap::NOINIT) {
293
mustKeep = true;
294
break;
295
} else {
296
canDiscard = true;
297
}
298
}
299
300
if (mustKeep || !canDiscard) {
301
FlushNativeReg(nreg);
302
} else {
303
DiscardNativeReg(nreg);
304
}
305
};
306
307
// If there are any special rules, we might need to spill.
308
for (int i = 0; i < 3; ++i) {
309
switch (mapping[i].flags & X64Map::MASK) {
310
case X64Map::SHIFT:
311
if (nr[RCX].mipsReg != mapping[i].reg)
312
flushReg(RCX);
313
break;
314
315
case X64Map::HIGH_DATA:
316
if (nr[RDX].mipsReg != mapping[i].reg)
317
flushReg(RDX);
318
break;
319
320
case X64Map::XMM0:
321
if (nr[XMMToNativeReg(XMM0)].mipsReg != mapping[i].reg)
322
flushReg(XMMToNativeReg(XMM0));
323
break;
324
325
default:
326
break;
327
}
328
}
329
330
ApplyMapping(mapping, 3);
331
CleanupMapping(mapping, 3);
332
}
333
334
X64Reg X64IRRegCache::MapGPR(IRReg mipsReg, MIPSMap mapFlags) {
335
_dbg_assert_(IsValidGPR(mipsReg));
336
337
// Okay, not mapped, so we need to allocate an x64 register.
338
IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 1, mapFlags);
339
return FromNativeReg(nreg);
340
}
341
342
X64Reg X64IRRegCache::MapGPR2(IRReg mipsReg, MIPSMap mapFlags) {
343
_dbg_assert_(IsValidGPR(mipsReg) && IsValidGPR(mipsReg + 1));
344
345
// Okay, not mapped, so we need to allocate an x64 register.
346
IRNativeReg nreg = MapNativeReg(MIPSLoc::REG, mipsReg, 2, mapFlags);
347
return FromNativeReg(nreg);
348
}
349
350
X64Reg X64IRRegCache::MapGPRAsPointer(IRReg reg) {
351
return FromNativeReg(MapNativeRegAsPointer(reg));
352
}
353
354
X64Reg X64IRRegCache::MapFPR(IRReg mipsReg, MIPSMap mapFlags) {
355
_dbg_assert_(IsValidFPR(mipsReg));
356
_dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::MEM || mr[mipsReg + 32].loc == MIPSLoc::FREG);
357
358
IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, mipsReg + 32, 1, mapFlags);
359
if (nreg != -1)
360
return FromNativeReg(nreg);
361
return INVALID_REG;
362
}
363
364
X64Reg X64IRRegCache::MapVec4(IRReg first, MIPSMap mapFlags) {
365
_dbg_assert_(IsValidFPR(first));
366
_dbg_assert_((first & 3) == 0);
367
_dbg_assert_(mr[first + 32].loc == MIPSLoc::MEM || mr[first + 32].loc == MIPSLoc::FREG);
368
369
IRNativeReg nreg = MapNativeReg(MIPSLoc::FREG, first + 32, 4, mapFlags);
370
if (nreg != -1)
371
return FromNativeReg(nreg);
372
return INVALID_REG;
373
}
374
375
void X64IRRegCache::AdjustNativeRegAsPtr(IRNativeReg nreg, bool state) {
376
_assert_(nreg >= 0 && nreg < NUM_X_REGS);
377
X64Reg r = FromNativeReg(nreg);
378
if (state) {
379
#if defined(MASKED_PSP_MEMORY)
380
// This destroys the value...
381
_dbg_assert_(!nr[nreg].isDirty);
382
emit_->AND(PTRBITS, ::R(r), Imm32(Memory::MEMVIEW32_MASK));
383
emit_->ADD(PTRBITS, ::R(r), ImmPtr(Memory::base));
384
#else
385
emit_->ADD(PTRBITS, ::R(r), ::R(MEMBASEREG));
386
#endif
387
} else {
388
#if defined(MASKED_PSP_MEMORY)
389
_dbg_assert_(!nr[nreg].isDirty);
390
emit_->SUB(PTRBITS, ::R(r), ImmPtr(Memory::base));
391
#else
392
emit_->SUB(PTRBITS, ::R(r), ::R(MEMBASEREG));
393
#endif
394
}
395
}
396
397
void X64IRRegCache::LoadNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
398
X64Reg r = FromNativeReg(nreg);
399
_dbg_assert_(first != MIPS_REG_ZERO);
400
if (nreg < NUM_X_REGS) {
401
_assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO));
402
if (lanes == 1)
403
emit_->MOV(32, ::R(r), MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
404
#if PPSSPP_ARCH(AMD64)
405
else if (lanes == 2)
406
emit_->MOV(64, ::R(r), MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
407
#endif
408
else
409
_assert_(false);
410
} else {
411
_dbg_assert_(nreg < NUM_X_REGS + NUM_X_FREGS);
412
_assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot load this type: %d", (int)mr[first].loc);
413
if (lanes == 1)
414
emit_->MOVSS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
415
else if (lanes == 2)
416
emit_->MOVLPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
417
else if (lanes == 4 && (first & 3) == 0)
418
emit_->MOVAPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
419
else if (lanes == 4)
420
emit_->MOVUPS(r, MDisp(CTXREG, -128 + GetMipsRegOffset(first)));
421
else
422
_assert_(false);
423
}
424
}
425
426
void X64IRRegCache::StoreNativeReg(IRNativeReg nreg, IRReg first, int lanes) {
427
X64Reg r = FromNativeReg(nreg);
428
_dbg_assert_(first != MIPS_REG_ZERO);
429
if (nreg < NUM_X_REGS) {
430
_assert_(lanes == 1 || (lanes == 2 && first == IRREG_LO));
431
_assert_(mr[first].loc == MIPSLoc::REG || mr[first].loc == MIPSLoc::REG_IMM);
432
if (lanes == 1)
433
emit_->MOV(32, MDisp(CTXREG, -128 + GetMipsRegOffset(first)), ::R(r));
434
#if PPSSPP_ARCH(AMD64)
435
else if (lanes == 2)
436
emit_->MOV(64, MDisp(CTXREG, -128 + GetMipsRegOffset(first)), ::R(r));
437
#endif
438
else
439
_assert_(false);
440
} else {
441
_dbg_assert_(nreg < NUM_X_REGS + NUM_X_FREGS);
442
_assert_msg_(mr[first].loc == MIPSLoc::FREG, "Cannot store this type: %d", (int)mr[first].loc);
443
if (lanes == 1)
444
emit_->MOVSS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
445
else if (lanes == 2)
446
emit_->MOVLPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
447
else if (lanes == 4 && (first & 3) == 0)
448
emit_->MOVAPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
449
else if (lanes == 4)
450
emit_->MOVUPS(MDisp(CTXREG, -128 + GetMipsRegOffset(first)), r);
451
else
452
_assert_(false);
453
}
454
}
455
456
bool X64IRRegCache::TransferNativeReg(IRNativeReg nreg, IRNativeReg dest, MIPSLoc type, IRReg first, int lanes, MIPSMap flags) {
457
bool allowed = !mr[nr[nreg].mipsReg].isStatic;
458
// There's currently no support for non-XMMs here.
459
allowed = allowed && type == MIPSLoc::FREG;
460
461
if (dest == -1)
462
dest = nreg;
463
464
if (allowed && (flags == MIPSMap::INIT || flags == MIPSMap::DIRTY)) {
465
// Alright, changing lane count (possibly including lane position.)
466
IRReg oldfirst = nr[nreg].mipsReg;
467
int oldlanes = 0;
468
while (mr[oldfirst + oldlanes].nReg == nreg)
469
oldlanes++;
470
_assert_msg_(oldlanes != 0, "TransferNativeReg encountered nreg mismatch");
471
_assert_msg_(oldlanes != lanes, "TransferNativeReg transfer to same lanecount, misaligned?");
472
473
if (lanes == 1 && TransferVecTo1(nreg, dest, first, oldlanes))
474
return true;
475
if (oldlanes == 1 && Transfer1ToVec(nreg, dest, first, lanes))
476
return true;
477
}
478
479
return IRNativeRegCacheBase::TransferNativeReg(nreg, dest, type, first, lanes, flags);
480
}
481
482
bool X64IRRegCache::TransferVecTo1(IRNativeReg nreg, IRNativeReg dest, IRReg first, int oldlanes) {
483
IRReg oldfirst = nr[nreg].mipsReg;
484
485
// Is it worth preserving any of the old regs?
486
int numKept = 0;
487
for (int i = 0; i < oldlanes; ++i) {
488
// Skip whichever one this is extracting.
489
if (oldfirst + i == first)
490
continue;
491
// If 0 isn't being transfered, easy to keep in its original reg.
492
if (i == 0 && dest != nreg) {
493
numKept++;
494
continue;
495
}
496
497
IRNativeReg freeReg = FindFreeReg(MIPSLoc::FREG, MIPSMap::INIT);
498
if (freeReg != -1 && IsRegRead(MIPSLoc::FREG, oldfirst + i)) {
499
// If there's one free, use it. Don't modify nreg, though.
500
u8 shuf = VFPU_SWIZZLE(i, i, i, i);
501
if (i == 0) {
502
emit_->MOVAPS(FromNativeReg(freeReg), ::R(FromNativeReg(nreg)));
503
} else if (cpu_info.bAVX) {
504
emit_->VPERMILPS(128, FromNativeReg(freeReg), ::R(FromNativeReg(nreg)), shuf);
505
} else if (i == 2) {
506
emit_->MOVHLPS(FromNativeReg(freeReg), FromNativeReg(nreg));
507
} else {
508
emit_->MOVAPS(FromNativeReg(freeReg), ::R(FromNativeReg(nreg)));
509
emit_->SHUFPS(FromNativeReg(freeReg), ::R(FromNativeReg(freeReg)), shuf);
510
}
511
512
// Update accounting.
513
nr[freeReg].isDirty = nr[nreg].isDirty;
514
nr[freeReg].mipsReg = oldfirst + i;
515
mr[oldfirst + i].lane = -1;
516
mr[oldfirst + i].nReg = freeReg;
517
numKept++;
518
}
519
}
520
521
// Unless all other lanes were kept, store.
522
if (nr[nreg].isDirty && numKept < oldlanes - 1) {
523
StoreNativeReg(nreg, oldfirst, oldlanes);
524
// Set false even for regs that were split out, since they were flushed too.
525
for (int i = 0; i < oldlanes; ++i) {
526
if (mr[oldfirst + i].nReg != -1)
527
nr[mr[oldfirst + i].nReg].isDirty = false;
528
}
529
}
530
531
// Next, shuffle the desired element into first place.
532
u8 shuf = VFPU_SWIZZLE(mr[first].lane, mr[first].lane, mr[first].lane, mr[first].lane);
533
if (mr[first].lane > 0 && cpu_info.bAVX && dest != nreg) {
534
emit_->VPERMILPS(128, FromNativeReg(dest), ::R(FromNativeReg(nreg)), shuf);
535
} else if (mr[first].lane <= 0 && dest != nreg) {
536
emit_->MOVAPS(FromNativeReg(dest), ::R(FromNativeReg(nreg)));
537
} else if (mr[first].lane == 2) {
538
emit_->MOVHLPS(FromNativeReg(dest), FromNativeReg(nreg));
539
} else if (mr[first].lane > 0) {
540
if (dest != nreg)
541
emit_->MOVAPS(FromNativeReg(dest), ::R(FromNativeReg(nreg)));
542
emit_->SHUFPS(FromNativeReg(dest), ::R(FromNativeReg(dest)), shuf);
543
}
544
545
// Now update accounting.
546
for (int i = 0; i < oldlanes; ++i) {
547
auto &mreg = mr[oldfirst + i];
548
if (oldfirst + i == first) {
549
mreg.lane = -1;
550
mreg.nReg = dest;
551
} else if (mreg.nReg == nreg && i == 0 && nreg != dest) {
552
// Still in the same register, but no longer a vec.
553
mreg.lane = -1;
554
} else if (mreg.nReg == nreg) {
555
// No longer in a register.
556
mreg.nReg = -1;
557
mreg.lane = -1;
558
mreg.loc = MIPSLoc::MEM;
559
}
560
}
561
562
if (dest != nreg) {
563
nr[dest].isDirty = nr[nreg].isDirty;
564
if (oldfirst == first) {
565
nr[nreg].mipsReg = -1;
566
nr[nreg].isDirty = false;
567
}
568
}
569
nr[dest].mipsReg = first;
570
571
return true;
572
}
573
574
bool X64IRRegCache::Transfer1ToVec(IRNativeReg nreg, IRNativeReg dest, IRReg first, int lanes) {
575
X64Reg cur[4]{};
576
int numInRegs = 0;
577
u8 blendMask = 0;
578
for (int i = 0; i < lanes; ++i) {
579
if (mr[first + i].lane != -1 || (i != 0 && mr[first + i].spillLockIRIndex >= irIndex_)) {
580
// Can't do it, either double mapped or overlapping vec.
581
return false;
582
}
583
584
if (mr[first + i].nReg == -1) {
585
cur[i] = INVALID_REG;
586
blendMask |= 1 << i;
587
} else {
588
cur[i] = FromNativeReg(mr[first + i].nReg);
589
numInRegs++;
590
}
591
}
592
593
// Shouldn't happen, this should only get called to transfer one in a reg.
594
if (numInRegs == 0)
595
return false;
596
597
// Move things together into a reg.
598
if (lanes == 4 && cpu_info.bSSE4_1 && numInRegs == 1 && (first & 3) == 0) {
599
// Use a blend to grab the rest. BLENDPS is pretty good.
600
if (cpu_info.bAVX && nreg != dest) {
601
if (cur[0] == INVALID_REG) {
602
// Broadcast to all lanes, then blend from memory to replace.
603
emit_->VPERMILPS(128, FromNativeReg(dest), ::R(FromNativeReg(nreg)), 0);
604
emit_->BLENDPS(FromNativeReg(dest), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
605
} else {
606
emit_->VBLENDPS(128, FromNativeReg(dest), FromNativeReg(nreg), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
607
}
608
cur[0] = FromNativeReg(dest);
609
} else {
610
if (cur[0] == INVALID_REG)
611
emit_->SHUFPS(FromNativeReg(nreg), ::R(FromNativeReg(nreg)), 0);
612
emit_->BLENDPS(FromNativeReg(nreg), MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
613
// If this is not dest, it'll get moved there later.
614
cur[0] = FromNativeReg(nreg);
615
}
616
} else if (lanes == 4) {
617
if (blendMask == 0) {
618
// y = yw##, x = xz##, x = xyzw.
619
emit_->UNPCKLPS(cur[1], ::R(cur[3]));
620
emit_->UNPCKLPS(cur[0], ::R(cur[2]));
621
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
622
} else if (blendMask == 0b1100) {
623
// x = xy##, then load zw.
624
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
625
emit_->MOVHPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 2)));
626
} else if (blendMask == 0b1010 && cpu_info.bSSE4_1 && (first & 3) == 0) {
627
// x = x#z#, x = xyzw.
628
emit_->SHUFPS(cur[0], ::R(cur[2]), VFPU_SWIZZLE(0, 0, 0, 0));
629
emit_->BLENDPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
630
} else if (blendMask == 0b0110 && cpu_info.bSSE4_1 && (first & 3) == 0) {
631
// x = x##w, x = xyzw.
632
emit_->SHUFPS(cur[0], ::R(cur[3]), VFPU_SWIZZLE(0, 0, 0, 0));
633
emit_->BLENDPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
634
} else if (blendMask == 0b1001 && cpu_info.bSSE4_1 && (first & 3) == 0) {
635
// y = #yz#, y = xyzw.
636
emit_->SHUFPS(cur[1], ::R(cur[2]), VFPU_SWIZZLE(0, 0, 0, 0));
637
emit_->BLENDPS(cur[1], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
638
// Will be moved to dest as needed.
639
cur[0] = cur[1];
640
} else if (blendMask == 0b0101 && cpu_info.bSSE4_1 && (first & 3) == 0) {
641
// y = #y#w, y = xyzw.
642
emit_->SHUFPS(cur[1], ::R(cur[3]), VFPU_SWIZZLE(0, 0, 0, 0));
643
emit_->BLENDPS(cur[1], MDisp(CTXREG, -128 + GetMipsRegOffset(first)), blendMask);
644
// Will be moved to dest as needed.
645
cur[0] = cur[1];
646
} else if (blendMask == 0b1000) {
647
// x = xz##, z = w###, y = yw##, x = xyzw.
648
emit_->UNPCKLPS(cur[0], ::R(cur[2]));
649
emit_->MOVSS(cur[2], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 3)));
650
emit_->UNPCKLPS(cur[1], ::R(cur[2]));
651
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
652
} else if (blendMask == 0b0100) {
653
// y = yw##, w = z###, x = xz##, x = xyzw.
654
emit_->UNPCKLPS(cur[1], ::R(cur[3]));
655
emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 2)));
656
emit_->UNPCKLPS(cur[0], ::R(cur[3]));
657
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
658
} else if (blendMask == 0b0010) {
659
// z = zw##, w = y###, x = xy##, x = xyzw.
660
emit_->UNPCKLPS(cur[2], ::R(cur[3]));
661
emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 1)));
662
emit_->UNPCKLPS(cur[0], ::R(cur[3]));
663
emit_->MOVLHPS(cur[0], cur[2]);
664
} else if (blendMask == 0b0001) {
665
// y = yw##, w = x###, w = xz##, w = xyzw.
666
emit_->UNPCKLPS(cur[1], ::R(cur[3]));
667
emit_->MOVSS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 0)));
668
emit_->UNPCKLPS(cur[3], ::R(cur[2]));
669
emit_->UNPCKLPS(cur[3], ::R(cur[1]));
670
// Will be moved to dest as needed.
671
cur[0] = cur[3];
672
} else if (blendMask == 0b0011) {
673
// z = zw##, w = xy##, w = xyzw.
674
emit_->UNPCKLPS(cur[2], ::R(cur[3]));
675
emit_->MOVLPS(cur[3], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 0)));
676
emit_->MOVLHPS(cur[3], cur[2]);
677
// Will be moved to dest as needed.
678
cur[0] = cur[3];
679
} else {
680
// This must mean no SSE4, and numInRegs <= 2 in trickier cases.
681
return false;
682
}
683
} else if (lanes == 2) {
684
if (cur[0] != INVALID_REG && cur[1] != INVALID_REG) {
685
emit_->UNPCKLPS(cur[0], ::R(cur[1]));
686
} else if (cur[0] != INVALID_REG && cpu_info.bSSE4_1) {
687
emit_->INSERTPS(cur[0], MDisp(CTXREG, -128 + GetMipsRegOffset(first + 1)), 1);
688
} else {
689
return false;
690
}
691
} else {
692
return false;
693
}
694
695
mr[first].lane = 0;
696
for (int i = 0; i < lanes; ++i) {
697
if (mr[first + i].nReg != -1) {
698
// If this was dirty, the combined reg is now dirty.
699
if (nr[mr[first + i].nReg].isDirty)
700
nr[dest].isDirty = true;
701
702
// Throw away the other register we're no longer using.
703
if (i != 0)
704
DiscardNativeReg(mr[first + i].nReg);
705
}
706
707
// And set it as using the new one.
708
mr[first + i].lane = i;
709
mr[first + i].loc = MIPSLoc::FREG;
710
mr[first + i].nReg = dest;
711
}
712
713
if (cur[0] != FromNativeReg(dest))
714
emit_->MOVAPS(FromNativeReg(dest), ::R(cur[0]));
715
716
if (dest != nreg) {
717
nr[dest].mipsReg = first;
718
nr[nreg].mipsReg = -1;
719
nr[nreg].isDirty = false;
720
}
721
722
return true;
723
}
724
725
void X64IRRegCache::SetNativeRegValue(IRNativeReg nreg, uint32_t imm) {
726
X64Reg r = FromNativeReg(nreg);
727
_dbg_assert_(nreg >= 0 && nreg < NUM_X_REGS);
728
emit_->MOV(32, ::R(r), Imm32(imm));
729
}
730
731
void X64IRRegCache::StoreRegValue(IRReg mreg, uint32_t imm) {
732
_assert_(IsValidGPRNoZero(mreg));
733
// Try to optimize using a different reg.
734
X64Reg storeReg = INVALID_REG;
735
736
// Could we get lucky? Check for an exact match in another xreg.
737
for (int i = 0; i < TOTAL_MAPPABLE_IRREGS; ++i) {
738
if (mr[i].loc == MIPSLoc::REG_IMM && mr[i].imm == imm) {
739
// Awesome, let's just store this reg.
740
storeReg = (X64Reg)mr[i].nReg;
741
break;
742
}
743
}
744
745
if (storeReg == INVALID_REG)
746
emit_->MOV(32, MDisp(CTXREG, -128 + GetMipsRegOffset(mreg)), Imm32(imm));
747
else
748
emit_->MOV(32, MDisp(CTXREG, -128 + GetMipsRegOffset(mreg)), ::R(storeReg));
749
}
750
751
OpArg X64IRRegCache::R(IRReg mipsReg) {
752
return ::R(RX(mipsReg));
753
}
754
755
OpArg X64IRRegCache::RPtr(IRReg mipsReg) {
756
return ::R(RXPtr(mipsReg));
757
}
758
759
OpArg X64IRRegCache::F(IRReg mipsReg) {
760
return ::R(FX(mipsReg));
761
}
762
763
X64Reg X64IRRegCache::RX(IRReg mipsReg) {
764
_dbg_assert_(IsValidGPR(mipsReg));
765
_dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM);
766
if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {
767
return FromNativeReg(mr[mipsReg].nReg);
768
} else {
769
ERROR_LOG_REPORT(Log::JIT, "Reg %i not in x64 reg", mipsReg);
770
return INVALID_REG; // BAAAD
771
}
772
}
773
774
X64Reg X64IRRegCache::RXPtr(IRReg mipsReg) {
775
_dbg_assert_(IsValidGPR(mipsReg));
776
_dbg_assert_(mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM || mr[mipsReg].loc == MIPSLoc::REG_AS_PTR);
777
if (mr[mipsReg].loc == MIPSLoc::REG_AS_PTR) {
778
return FromNativeReg(mr[mipsReg].nReg);
779
} else if (mr[mipsReg].loc == MIPSLoc::REG || mr[mipsReg].loc == MIPSLoc::REG_IMM) {
780
int r = mr[mipsReg].nReg;
781
_dbg_assert_(nr[r].pointerified);
782
if (nr[r].pointerified) {
783
return FromNativeReg(mr[mipsReg].nReg);
784
} else {
785
ERROR_LOG(Log::JIT, "Tried to use a non-pointer register as a pointer");
786
return INVALID_REG;
787
}
788
} else {
789
ERROR_LOG_REPORT(Log::JIT, "Reg %i not in x64 reg", mipsReg);
790
return INVALID_REG; // BAAAD
791
}
792
}
793
794
X64Reg X64IRRegCache::FX(IRReg mipsReg) {
795
_dbg_assert_(IsValidFPR(mipsReg));
796
_dbg_assert_(mr[mipsReg + 32].loc == MIPSLoc::FREG);
797
if (mr[mipsReg + 32].loc == MIPSLoc::FREG) {
798
return FromNativeReg(mr[mipsReg + 32].nReg);
799
} else {
800
ERROR_LOG_REPORT(Log::JIT, "Reg %i not in x64 reg", mipsReg);
801
return INVALID_REG; // BAAAD
802
}
803
}
804
805
bool X64IRRegCache::HasLowSubregister(Gen::X64Reg reg) {
806
#if !PPSSPP_ARCH(AMD64)
807
// Can't use ESI or EDI (which we use), no 8-bit versions. Only these.
808
return reg == EAX || reg == EBX || reg == ECX || reg == EDX;
809
#else
810
return true;
811
#endif
812
}
813
814
#endif
815
816