Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/CodeGen/src/IrRegAllocX64.cpp
2725 views
1
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
2
#include "Luau/IrRegAllocX64.h"
3
4
#include "Luau/IrUtils.h"
5
#include "Luau/LoweringStats.h"
6
7
#include "EmitCommonX64.h"
8
9
#include "lstate.h"
10
11
namespace Luau
12
{
13
namespace CodeGen
14
{
15
namespace X64
16
{
17
18
static constexpr unsigned kValueDwordSize[] = {0, 0, 1, 1, 2, 1, 2, 4};
19
static_assert(sizeof(kValueDwordSize) / sizeof(kValueDwordSize[0]) == size_t(IrValueKind::Count), "all kinds have to be covered");
20
21
static const RegisterX64 kGprAllocOrder[] = {rax, rdx, rcx, rbx, rsi, rdi, r8, r9, r10, r11};
22
23
IrRegAllocX64::IrRegAllocX64(AssemblyBuilderX64& build, IrFunction& function, LoweringStats* stats)
24
: build(build)
25
, function(function)
26
, stats(stats)
27
, usableXmmRegCount(getXmmRegisterCount(build.abi))
28
{
29
freeGprMap.fill(true);
30
gprInstUsers.fill(kInvalidInstIdx);
31
freeXmmMap.fill(true);
32
xmmInstUsers.fill(kInvalidInstIdx);
33
}
34
35
RegisterX64 IrRegAllocX64::allocReg(SizeX64 size, uint32_t instIdx)
36
{
37
if (size == SizeX64::xmmword)
38
{
39
for (size_t i = 0; i < usableXmmRegCount; ++i)
40
{
41
if (freeXmmMap[i])
42
{
43
freeXmmMap[i] = false;
44
xmmInstUsers[i] = instIdx;
45
return RegisterX64{size, uint8_t(i)};
46
}
47
}
48
}
49
else
50
{
51
for (RegisterX64 reg : kGprAllocOrder)
52
{
53
if (freeGprMap[reg.index])
54
{
55
freeGprMap[reg.index] = false;
56
gprInstUsers[reg.index] = instIdx;
57
return RegisterX64{size, reg.index};
58
}
59
}
60
}
61
62
// Out of registers, spill the value with the furthest next use
63
const std::array<uint32_t, 16>& regInstUsers = size == SizeX64::xmmword ? xmmInstUsers : gprInstUsers;
64
if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(regInstUsers); furthestUseTarget != kInvalidInstIdx)
65
{
66
RegisterX64 reg = function.instructions[furthestUseTarget].regX64;
67
reg.size = size; // Adjust size to the requested
68
69
return takeReg(reg, instIdx);
70
}
71
72
CODEGEN_ASSERT(!"Out of registers to allocate");
73
return noreg;
74
}
75
76
RegisterX64 IrRegAllocX64::allocRegOrReuse(SizeX64 size, uint32_t instIdx, std::initializer_list<IrOp> oprefs)
77
{
78
for (IrOp op : oprefs)
79
{
80
if (op.kind != IrOpKind::Inst)
81
continue;
82
83
IrInst& source = function.instructions[op.index];
84
85
if (source.lastUse == instIdx && !source.reusedReg && !source.spilled && !source.needsReload)
86
{
87
// Not comparing size directly because we only need matching register set
88
if ((size == SizeX64::xmmword) != (source.regX64.size == SizeX64::xmmword))
89
continue;
90
91
CODEGEN_ASSERT(source.regX64 != noreg);
92
93
source.reusedReg = true;
94
95
if (size == SizeX64::xmmword)
96
xmmInstUsers[source.regX64.index] = instIdx;
97
else
98
gprInstUsers[source.regX64.index] = instIdx;
99
100
return RegisterX64{size, source.regX64.index};
101
}
102
}
103
104
return allocReg(size, instIdx);
105
}
106
107
RegisterX64 IrRegAllocX64::takeReg(RegisterX64 reg, uint32_t instIdx)
108
{
109
if (reg.size == SizeX64::xmmword)
110
{
111
if (!freeXmmMap[reg.index])
112
{
113
CODEGEN_ASSERT(xmmInstUsers[reg.index] != kInvalidInstIdx);
114
preserve(function.instructions[xmmInstUsers[reg.index]]);
115
}
116
117
CODEGEN_ASSERT(freeXmmMap[reg.index]);
118
freeXmmMap[reg.index] = false;
119
xmmInstUsers[reg.index] = instIdx;
120
}
121
else
122
{
123
if (!freeGprMap[reg.index])
124
{
125
CODEGEN_ASSERT(gprInstUsers[reg.index] != kInvalidInstIdx);
126
preserve(function.instructions[gprInstUsers[reg.index]]);
127
}
128
129
CODEGEN_ASSERT(freeGprMap[reg.index]);
130
freeGprMap[reg.index] = false;
131
gprInstUsers[reg.index] = instIdx;
132
}
133
134
return reg;
135
}
136
137
bool IrRegAllocX64::canTakeReg(RegisterX64 reg) const
138
{
139
const std::array<bool, 16>& freeMap = reg.size == SizeX64::xmmword ? freeXmmMap : freeGprMap;
140
const std::array<uint32_t, 16>& instUsers = reg.size == SizeX64::xmmword ? xmmInstUsers : gprInstUsers;
141
142
return freeMap[reg.index] || instUsers[reg.index] != kInvalidInstIdx;
143
}
144
145
void IrRegAllocX64::freeReg(RegisterX64 reg)
146
{
147
if (reg.size == SizeX64::xmmword)
148
{
149
CODEGEN_ASSERT(!freeXmmMap[reg.index]);
150
freeXmmMap[reg.index] = true;
151
xmmInstUsers[reg.index] = kInvalidInstIdx;
152
}
153
else
154
{
155
CODEGEN_ASSERT(!freeGprMap[reg.index]);
156
freeGprMap[reg.index] = true;
157
gprInstUsers[reg.index] = kInvalidInstIdx;
158
}
159
}
160
161
void IrRegAllocX64::freeLastUseReg(IrInst& target, uint32_t instIdx)
162
{
163
if (isLastUseReg(target, instIdx))
164
{
165
CODEGEN_ASSERT(!target.spilled && !target.needsReload);
166
167
// Register might have already been freed if it had multiple uses inside a single instruction
168
if (target.regX64 == noreg)
169
return;
170
171
freeReg(target.regX64);
172
target.regX64 = noreg;
173
}
174
}
175
176
void IrRegAllocX64::freeLastUseRegs(const IrInst& inst, uint32_t instIdx)
177
{
178
auto checkOp = [this, instIdx](IrOp op)
179
{
180
if (op.kind == IrOpKind::Inst)
181
freeLastUseReg(function.instructions[op.index], instIdx);
182
};
183
184
for (const IrOp& op : inst.ops)
185
checkOp(op);
186
}
187
188
bool IrRegAllocX64::isLastUseReg(const IrInst& target, uint32_t instIdx) const
189
{
190
return target.lastUse == instIdx && !target.reusedReg;
191
}
192
193
void IrRegAllocX64::preserve(IrInst& inst)
194
{
195
IrSpillX64 spill;
196
spill.instIdx = function.getInstIndex(inst);
197
spill.valueKind = getCmdValueKind(inst.cmd);
198
spill.spillId = nextSpillId++;
199
spill.originalLoc = inst.regX64;
200
201
// Loads from VmReg/VmConst don't have to be spilled, they can be restored from a register later
202
// When checking if value has a restore operation to spill it, we only allow it in the same block
203
if (!function.hasRestoreLocation(inst, /*limitToCurrentBlock*/ true))
204
{
205
unsigned i = findSpillStackSlot(spill.valueKind);
206
207
if (isExtraSpillSlot(i))
208
{
209
int extraOffset = getExtraSpillAddressOffset(i);
210
211
// Tricky situation, no registers left, but need a register to calculate an address
212
// We will try to take r11 unless it's actually the register being spilled
213
RegisterX64 emergencyTemp = inst.regX64.size == SizeX64::xmmword || inst.regX64.index != 11 ? r11 : r10;
214
215
build.mov(qword[sTemporarySlot + 0], emergencyTemp);
216
217
build.mov(emergencyTemp, qword[rState + offsetof(lua_State, global)]);
218
build.lea(emergencyTemp, addr[emergencyTemp + offsetof(global_State, ecbdata) + extraOffset]);
219
220
if (spill.valueKind == IrValueKind::Tvalue)
221
build.vmovups(xmmword[emergencyTemp], inst.regX64);
222
else if (spill.valueKind == IrValueKind::Double)
223
build.vmovsd(qword[emergencyTemp], inst.regX64);
224
else if (spill.valueKind == IrValueKind::Pointer)
225
build.mov(qword[emergencyTemp], inst.regX64);
226
else if (spill.valueKind == IrValueKind::Tag || spill.valueKind == IrValueKind::Int)
227
build.mov(dword[emergencyTemp], inst.regX64);
228
else if (spill.valueKind == IrValueKind::Float)
229
build.vmovss(dword[emergencyTemp], inst.regX64);
230
else
231
CODEGEN_ASSERT(!"Unsupported value kind");
232
233
build.mov(emergencyTemp, qword[sTemporarySlot + 0]);
234
}
235
else
236
{
237
if (spill.valueKind == IrValueKind::Tvalue)
238
build.vmovups(xmmword[sSpillArea + i * 4], inst.regX64);
239
else if (spill.valueKind == IrValueKind::Double)
240
build.vmovsd(qword[sSpillArea + i * 4], inst.regX64);
241
else if (spill.valueKind == IrValueKind::Pointer)
242
build.mov(qword[sSpillArea + i * 4], inst.regX64);
243
else if (spill.valueKind == IrValueKind::Tag || spill.valueKind == IrValueKind::Int)
244
build.mov(dword[sSpillArea + i * 4], inst.regX64);
245
else if (spill.valueKind == IrValueKind::Float)
246
build.vmovss(dword[sSpillArea + i * 4], inst.regX64);
247
else
248
CODEGEN_ASSERT(!"Unsupported value kind");
249
}
250
251
unsigned end = i + kValueDwordSize[int(spill.valueKind)];
252
253
for (unsigned pos = i; pos < end; pos++)
254
usedSpillSlotHalfs.set(pos);
255
256
if ((end + 1) / 2 > maxUsedSlot)
257
maxUsedSlot = (end + 1) / 2;
258
259
spill.stackSlot = uint8_t(i);
260
inst.spilled = true;
261
262
if (stats)
263
stats->spillsToSlot++;
264
}
265
else
266
{
267
inst.needsReload = true;
268
269
if (stats)
270
stats->spillsToRestore++;
271
}
272
273
spills.push_back(spill);
274
275
freeReg(inst.regX64);
276
inst.regX64 = noreg;
277
}
278
279
void IrRegAllocX64::restore(IrInst& inst, bool intoOriginalLocation)
280
{
281
uint32_t instIdx = function.getInstIndex(inst);
282
283
for (size_t i = 0; i < spills.size(); i++)
284
{
285
if (spills[i].instIdx == instIdx)
286
{
287
RegisterX64 reg = intoOriginalLocation ? takeReg(spills[i].originalLoc, instIdx) : allocReg(spills[i].originalLoc.size, instIdx);
288
289
// When restoring the value, we allow cross-block restore because we have commited to the target location at spill time
290
ValueRestoreLocation restoreLocation = function.findRestoreLocation(inst, /*limitToCurrentBlock*/ false);
291
292
OperandX64 restoreAddr = noreg;
293
294
RegisterX64 emergencyTemp = reg.size == SizeX64::xmmword ? r11 : qwordReg(reg);
295
296
// Previous call might have relocated the spill vector, so this reference can't be taken earlier
297
const IrSpillX64& spill = spills[i];
298
299
if (spill.stackSlot != kNoStackSlot)
300
{
301
if (isExtraSpillSlot(spill.stackSlot))
302
{
303
int extraOffset = getExtraSpillAddressOffset(spill.stackSlot);
304
305
// Need to calculate an address, but everything might be taken
306
if (reg.size == SizeX64::xmmword)
307
build.mov(qword[sTemporarySlot + 0], emergencyTemp);
308
309
build.mov(emergencyTemp, qword[rState + offsetof(lua_State, global)]);
310
build.lea(emergencyTemp, addr[emergencyTemp + offsetof(global_State, ecbdata) + extraOffset]);
311
312
restoreAddr = addr[emergencyTemp];
313
restoreAddr.memSize = reg.size;
314
}
315
else
316
{
317
restoreAddr = addr[sSpillArea + spill.stackSlot * 4];
318
restoreAddr.memSize = reg.size;
319
}
320
321
if (spill.valueKind == IrValueKind::Double)
322
restoreAddr.memSize = SizeX64::qword;
323
else if (spill.valueKind == IrValueKind::Float)
324
restoreAddr.memSize = SizeX64::dword;
325
326
unsigned end = spill.stackSlot + kValueDwordSize[int(spill.valueKind)];
327
328
for (unsigned pos = spill.stackSlot; pos < end; pos++)
329
usedSpillSlotHalfs.set(pos, false);
330
}
331
else
332
{
333
restoreAddr = getRestoreAddress(inst, restoreLocation);
334
}
335
336
if (spill.valueKind == IrValueKind::Tvalue)
337
{
338
build.vmovups(reg, restoreAddr);
339
}
340
else if (spill.valueKind == IrValueKind::Double)
341
{
342
build.vmovsd(reg, restoreAddr);
343
}
344
else if (spill.valueKind == IrValueKind::Int && restoreLocation.kind == IrValueKind::Double)
345
{
346
// Handle restore of an int/uint value from a location storing a double number
347
if (restoreLocation.conversionCmd == IrCmd::INT_TO_NUM)
348
build.vcvttsd2si(reg, restoreAddr);
349
else if (restoreLocation.conversionCmd == IrCmd::UINT_TO_NUM)
350
build.vcvttsd2si(qwordReg(reg), restoreAddr); // Note: we perform 'uint64_t = (long long)double' for consistency with C++ code
351
else
352
CODEGEN_ASSERT(!"re-materialization not supported for this conversion command");
353
}
354
else if (spill.valueKind == IrValueKind::Tag || spill.valueKind == IrValueKind::Int || spill.valueKind == IrValueKind::Pointer)
355
{
356
build.mov(reg, restoreAddr);
357
}
358
else if (spill.valueKind == IrValueKind::Float)
359
{
360
build.vmovss(reg, restoreAddr);
361
}
362
else
363
{
364
CODEGEN_ASSERT(!"value kind not supported for restore");
365
}
366
367
if (spill.stackSlot != kNoStackSlot && isExtraSpillSlot(spill.stackSlot))
368
{
369
if (reg.size == SizeX64::xmmword)
370
build.mov(emergencyTemp, qword[sTemporarySlot + 0]);
371
}
372
373
inst.regX64 = reg;
374
inst.spilled = false;
375
inst.needsReload = false;
376
377
spills[i] = spills.back();
378
spills.pop_back();
379
return;
380
}
381
}
382
}
383
384
void IrRegAllocX64::preserveAndFreeInstValues()
385
{
386
for (uint32_t instIdx : gprInstUsers)
387
{
388
if (instIdx != kInvalidInstIdx)
389
preserve(function.instructions[instIdx]);
390
}
391
392
for (uint32_t instIdx : xmmInstUsers)
393
{
394
if (instIdx != kInvalidInstIdx)
395
preserve(function.instructions[instIdx]);
396
}
397
}
398
399
bool IrRegAllocX64::shouldFreeGpr(RegisterX64 reg) const
400
{
401
if (reg == noreg)
402
return false;
403
404
CODEGEN_ASSERT(reg.size != SizeX64::xmmword);
405
406
for (RegisterX64 gpr : kGprAllocOrder)
407
{
408
if (reg.index == gpr.index)
409
return true;
410
}
411
412
return false;
413
}
414
415
unsigned IrRegAllocX64::findSpillStackSlot(IrValueKind valueKind)
416
{
417
if (valueKind == IrValueKind::Float || valueKind == IrValueKind::Int)
418
{
419
for (unsigned i = 0; i < unsigned(usedSpillSlotHalfs.size()); ++i)
420
{
421
if (usedSpillSlotHalfs.test(i))
422
continue;
423
424
return i;
425
}
426
}
427
else
428
{
429
// Find a free stack slot. Four consecutive slots might be required for 16 byte TValues, so '- 3' is used
430
// For 8 and 16 byte types we search in steps of 2 to return slot indices aligned by 2
431
for (unsigned i = 0; i < unsigned(usedSpillSlotHalfs.size() - 3); i += 2)
432
{
433
if (usedSpillSlotHalfs.test(i) || usedSpillSlotHalfs.test(i + 1))
434
continue;
435
436
if (valueKind == IrValueKind::Tvalue)
437
{
438
if (usedSpillSlotHalfs.test(i + 2) || usedSpillSlotHalfs.test(i + 3))
439
{
440
i += 2; // No need to retest this double position
441
continue;
442
}
443
}
444
445
return i;
446
}
447
}
448
449
CODEGEN_ASSERT(!"Nowhere to spill");
450
return ~0u;
451
}
452
453
OperandX64 IrRegAllocX64::getRestoreAddress(const IrInst& inst, ValueRestoreLocation restoreLocation)
454
{
455
IrOp op = restoreLocation.op;
456
CODEGEN_ASSERT(op.kind != IrOpKind::None);
457
458
[[maybe_unused]] IrValueKind instKind = getCmdValueKind(inst.cmd);
459
460
switch (restoreLocation.kind)
461
{
462
case IrValueKind::Unknown:
463
case IrValueKind::None:
464
case IrValueKind::Float:
465
case IrValueKind::Count:
466
CODEGEN_ASSERT(!"Invalid operand restore value kind");
467
break;
468
case IrValueKind::Tag:
469
return op.kind == IrOpKind::VmReg ? luauRegTag(vmRegOp(op)) : luauConstantTag(vmConstOp(op));
470
case IrValueKind::Int:
471
CODEGEN_ASSERT(op.kind == IrOpKind::VmReg);
472
return luauRegValueInt(vmRegOp(op));
473
case IrValueKind::Pointer:
474
return restoreLocation.op.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(op)) : luauConstantValue(vmConstOp(op));
475
case IrValueKind::Double:
476
return restoreLocation.op.kind == IrOpKind::VmReg ? luauRegValue(vmRegOp(op)) : luauConstantValue(vmConstOp(op));
477
case IrValueKind::Tvalue:
478
return restoreLocation.op.kind == IrOpKind::VmReg ? luauReg(vmRegOp(op)) : luauConstant(vmConstOp(op));
479
}
480
481
CODEGEN_ASSERT(!"Failed to find restore operand location");
482
return noreg;
483
}
484
485
uint32_t IrRegAllocX64::findInstructionWithFurthestNextUse(const std::array<uint32_t, 16>& regInstUsers) const
486
{
487
if (currInstIdx == kInvalidInstIdx)
488
return kInvalidInstIdx;
489
490
uint32_t furthestUseTarget = kInvalidInstIdx;
491
uint32_t furthestUseLocation = 0;
492
493
for (uint32_t regInstUser : regInstUsers)
494
{
495
// Cannot spill temporary registers or the register of the value that's defined in the current instruction
496
if (regInstUser == kInvalidInstIdx || regInstUser == currInstIdx)
497
continue;
498
499
uint32_t nextUse = getNextInstUse(function, regInstUser, currInstIdx);
500
501
// Cannot spill value that is about to be used in the current instruction
502
if (nextUse == currInstIdx)
503
continue;
504
505
if (furthestUseTarget == kInvalidInstIdx || nextUse > furthestUseLocation)
506
{
507
furthestUseLocation = nextUse;
508
furthestUseTarget = regInstUser;
509
}
510
}
511
512
return furthestUseTarget;
513
}
514
515
bool IrRegAllocX64::isExtraSpillSlot(unsigned slot) const
516
{
517
CODEGEN_ASSERT(slot != kNoStackSlot);
518
519
return slot >= kSpillSlots_NEW * 2;
520
}
521
522
int IrRegAllocX64::getExtraSpillAddressOffset(unsigned slot) const
523
{
524
CODEGEN_ASSERT(isExtraSpillSlot(slot));
525
526
return (slot - kSpillSlots_NEW * 2) * 4;
527
}
528
529
void IrRegAllocX64::assertFree(RegisterX64 reg) const
530
{
531
if (reg.size == SizeX64::xmmword)
532
CODEGEN_ASSERT(freeXmmMap[reg.index]);
533
else
534
CODEGEN_ASSERT(freeGprMap[reg.index]);
535
}
536
537
void IrRegAllocX64::assertAllFree() const
538
{
539
for (RegisterX64 reg : kGprAllocOrder)
540
CODEGEN_ASSERT(freeGprMap[reg.index]);
541
542
for (bool free : freeXmmMap)
543
CODEGEN_ASSERT(free);
544
}
545
546
void IrRegAllocX64::assertNoSpills() const
547
{
548
CODEGEN_ASSERT(spills.empty());
549
}
550
551
ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner)
552
: owner(owner)
553
, reg(noreg)
554
{
555
}
556
557
ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner, SizeX64 size)
558
: owner(owner)
559
, reg(noreg)
560
{
561
alloc(size);
562
}
563
564
ScopedRegX64::ScopedRegX64(IrRegAllocX64& owner, RegisterX64 reg)
565
: owner(owner)
566
, reg(reg)
567
{
568
}
569
570
ScopedRegX64::~ScopedRegX64()
571
{
572
if (reg != noreg)
573
owner.freeReg(reg);
574
}
575
576
void ScopedRegX64::take(RegisterX64 reg)
577
{
578
CODEGEN_ASSERT(this->reg == noreg);
579
this->reg = owner.takeReg(reg, kInvalidInstIdx);
580
}
581
582
void ScopedRegX64::alloc(SizeX64 size)
583
{
584
CODEGEN_ASSERT(reg == noreg);
585
reg = owner.allocReg(size, kInvalidInstIdx);
586
}
587
588
void ScopedRegX64::free()
589
{
590
CODEGEN_ASSERT(reg != noreg);
591
owner.freeReg(reg);
592
reg = noreg;
593
}
594
595
RegisterX64 ScopedRegX64::release()
596
{
597
RegisterX64 tmp = reg;
598
reg = noreg;
599
return tmp;
600
}
601
602
ScopedSpills::ScopedSpills(IrRegAllocX64& owner)
603
: owner(owner)
604
{
605
startSpillId = owner.nextSpillId;
606
}
607
608
ScopedSpills::~ScopedSpills()
609
{
610
unsigned endSpillId = owner.nextSpillId;
611
612
for (size_t i = 0; i < owner.spills.size();)
613
{
614
IrSpillX64& spill = owner.spills[i];
615
616
// Restoring spills inside this scope cannot create new spills
617
CODEGEN_ASSERT(spill.spillId < endSpillId);
618
619
// If spill was created inside current scope, it has to be restored
620
if (spill.spillId >= startSpillId)
621
{
622
IrInst& inst = owner.function.instructions[spill.instIdx];
623
624
owner.restore(inst, /*intoOriginalLocation*/ true);
625
626
// Spill restore removes the spill entry, so loop is repeated at the same 'i'
627
}
628
else
629
{
630
i++;
631
}
632
}
633
}
634
635
} // namespace X64
636
} // namespace CodeGen
637
} // namespace Luau
638
639