Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/CodeGen/src/IrRegAllocA64.cpp
2725 views
1
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
2
#include "IrRegAllocA64.h"
3
4
#include "Luau/AssemblyBuilderA64.h"
5
#include "Luau/IrUtils.h"
6
#include "Luau/LoweringStats.h"
7
8
#include "BitUtils.h"
9
#include "EmitCommonA64.h"
10
11
#include <string.h>
12
13
LUAU_FASTFLAGVARIABLE(DebugCodegenChaosA64)
14
15
namespace Luau
16
{
17
namespace CodeGen
18
{
19
namespace A64
20
{
21
22
static const int8_t kInvalidSpill = 64;
23
static_assert(kSpillSlots + kExtraSpillSlots < 64, "arm64 lowering can only handle 63 spill slots");
24
25
static int allocSpill(uint64_t& free, KindA64 kind)
26
{
27
CODEGEN_ASSERT(kStackSize <= 256); // to support larger stack frames, we need to ensure qN is allocated at 16b boundary to fit in ldr/str encoding
28
29
// qN registers use two consecutive slots
30
int slot = countrz(kind == KindA64::q ? free & (free >> 1) : free);
31
if (slot == 64)
32
return -1;
33
34
uint64_t mask = (kind == KindA64::q ? 3ull : 1ull) << (unsigned long long)slot;
35
36
CODEGEN_ASSERT((free & mask) == mask);
37
free &= ~mask;
38
39
return slot;
40
}
41
42
static void freeSpill(uint64_t& free, KindA64 kind, uint8_t slot)
43
{
44
// qN registers use two consecutive slots
45
uint64_t mask = (kind == KindA64::q ? 3ull : 1ull) << (unsigned long long)slot;
46
47
CODEGEN_ASSERT((free & mask) == 0);
48
free |= mask;
49
}
50
51
static int getReloadOffset(IrValueKind kind)
52
{
53
switch (kind)
54
{
55
case IrValueKind::Unknown:
56
case IrValueKind::None:
57
case IrValueKind::Float:
58
case IrValueKind::Count:
59
CODEGEN_ASSERT(!"Invalid operand restore value kind");
60
break;
61
case IrValueKind::Tag:
62
return offsetof(TValue, tt);
63
case IrValueKind::Int:
64
return offsetof(TValue, value);
65
case IrValueKind::Pointer:
66
return offsetof(TValue, value.gc);
67
case IrValueKind::Double:
68
return offsetof(TValue, value.n);
69
case IrValueKind::Tvalue:
70
return 0;
71
}
72
73
CODEGEN_ASSERT(!"Invalid operand restore value kind");
74
LUAU_UNREACHABLE();
75
}
76
77
static AddressA64 getReloadAddress(ValueRestoreLocation location)
78
{
79
IrOp op = location.op;
80
81
if (op.kind == IrOpKind::VmReg)
82
return mem(rBase, vmRegOp(op) * sizeof(TValue) + getReloadOffset(location.kind));
83
84
// loads are 4/8/16 bytes; we conservatively limit the offset to fit assuming a 4b index
85
if (op.kind == IrOpKind::VmConst && vmConstOp(op) * sizeof(TValue) <= AddressA64::kMaxOffset * 4)
86
return mem(rConstants, vmConstOp(op) * sizeof(TValue) + getReloadOffset(location.kind));
87
88
return AddressA64(xzr); // dummy
89
}
90
91
IrRegAllocA64::IrRegAllocA64(
92
AssemblyBuilderA64& build,
93
IrFunction& function,
94
LoweringStats* stats,
95
std::initializer_list<std::pair<RegisterA64, RegisterA64>> regs
96
)
97
: build(build)
98
, function(function)
99
, stats(stats)
100
{
101
for (auto& p : regs)
102
{
103
CODEGEN_ASSERT(p.first.kind == p.second.kind && p.first.index <= p.second.index);
104
105
Set& set = getSet(p.first.kind);
106
107
for (int i = p.first.index; i <= p.second.index; ++i)
108
set.base |= 1u << i;
109
}
110
111
gpr.free = gpr.base;
112
simd.free = simd.base;
113
114
memset(gpr.defs, -1, sizeof(gpr.defs));
115
memset(simd.defs, -1, sizeof(simd.defs));
116
117
CODEGEN_ASSERT(kSpillSlots + kExtraSpillSlots < 64);
118
freeSpillSlots = (1ull << (kSpillSlots + kExtraSpillSlots)) - 1ull;
119
}
120
121
RegisterA64 IrRegAllocA64::allocReg(KindA64 kind, uint32_t index)
122
{
123
Set& set = getSet(kind);
124
125
if (set.free == 0)
126
{
127
// Try to find and spill a register that is not used in the current instruction and has the furthest next use
128
if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(set); furthestUseTarget != kInvalidInstIdx)
129
{
130
spill(set, index, furthestUseTarget);
131
CODEGEN_ASSERT(set.free != 0);
132
}
133
else
134
{
135
error = true;
136
return RegisterA64{kind, 0};
137
}
138
}
139
140
int reg = 31 - countlz(set.free);
141
142
if (FFlag::DebugCodegenChaosA64)
143
reg = countrz(set.free); // allocate from low end; this causes extra conflicts for calls
144
145
set.free &= ~(1u << reg);
146
set.defs[reg] = index;
147
148
return RegisterA64{kind, uint8_t(reg)};
149
}
150
151
RegisterA64 IrRegAllocA64::allocTemp(KindA64 kind)
152
{
153
Set& set = getSet(kind);
154
155
if (set.free == 0)
156
{
157
// Try to find and spill a register that is not used in the current instruction and has the furthest next use
158
if (uint32_t furthestUseTarget = findInstructionWithFurthestNextUse(set); furthestUseTarget != kInvalidInstIdx)
159
{
160
spill(set, currInstIdx, furthestUseTarget);
161
CODEGEN_ASSERT(set.free != 0);
162
}
163
else
164
{
165
error = true;
166
return RegisterA64{kind, 0};
167
}
168
}
169
170
int reg = 31 - countlz(set.free);
171
172
if (FFlag::DebugCodegenChaosA64)
173
reg = countrz(set.free); // allocate from low end; this causes extra conflicts for calls
174
175
set.free &= ~(1u << reg);
176
set.temp |= 1u << reg;
177
CODEGEN_ASSERT(set.defs[reg] == kInvalidInstIdx);
178
179
return RegisterA64{kind, uint8_t(reg)};
180
}
181
182
RegisterA64 IrRegAllocA64::allocReuse(KindA64 kind, uint32_t index, std::initializer_list<IrOp> oprefs)
183
{
184
for (IrOp op : oprefs)
185
{
186
if (op.kind != IrOpKind::Inst)
187
continue;
188
189
IrInst& source = function.instructions[op.index];
190
191
if (source.lastUse == index && !source.reusedReg && source.regA64 != noreg)
192
{
193
CODEGEN_ASSERT(!source.spilled && !source.needsReload);
194
CODEGEN_ASSERT(source.regA64.kind == kind);
195
196
Set& set = getSet(kind);
197
CODEGEN_ASSERT(set.defs[source.regA64.index] == op.index);
198
set.defs[source.regA64.index] = index;
199
200
source.reusedReg = true;
201
return source.regA64;
202
}
203
}
204
205
return allocReg(kind, index);
206
}
207
208
RegisterA64 IrRegAllocA64::takeReg(RegisterA64 reg, uint32_t index)
209
{
210
Set& set = getSet(reg.kind);
211
212
CODEGEN_ASSERT(set.free & (1u << reg.index));
213
CODEGEN_ASSERT(set.defs[reg.index] == kInvalidInstIdx);
214
215
set.free &= ~(1u << reg.index);
216
set.defs[reg.index] = index;
217
218
return reg;
219
}
220
221
void IrRegAllocA64::freeReg(RegisterA64 reg)
222
{
223
Set& set = getSet(reg.kind);
224
225
CODEGEN_ASSERT((set.base & (1u << reg.index)) != 0);
226
CODEGEN_ASSERT((set.free & (1u << reg.index)) == 0);
227
CODEGEN_ASSERT((set.temp & (1u << reg.index)) == 0);
228
229
set.free |= 1u << reg.index;
230
set.defs[reg.index] = kInvalidInstIdx;
231
}
232
233
void IrRegAllocA64::freeLastUseReg(IrInst& target, uint32_t index)
234
{
235
if (target.lastUse == index && !target.reusedReg)
236
{
237
CODEGEN_ASSERT(!target.spilled && !target.needsReload);
238
239
// Register might have already been freed if it had multiple uses inside a single instruction
240
if (target.regA64 == noreg)
241
return;
242
243
freeReg(target.regA64);
244
target.regA64 = noreg;
245
}
246
}
247
248
void IrRegAllocA64::freeLastUseRegs(const IrInst& inst, uint32_t index)
249
{
250
auto checkOp = [this, index](IrOp op)
251
{
252
if (op.kind == IrOpKind::Inst)
253
freeLastUseReg(function.instructions[op.index], index);
254
};
255
256
for (const IrOp& op : inst.ops)
257
checkOp(op);
258
}
259
260
void IrRegAllocA64::freeTemp(RegisterA64 reg)
261
{
262
Set& set = getSet(reg.kind);
263
264
CODEGEN_ASSERT((set.base & (1u << reg.index)) != 0);
265
CODEGEN_ASSERT((set.free & (1u << reg.index)) == 0);
266
CODEGEN_ASSERT((set.temp & (1u << reg.index)) != 0);
267
268
set.free |= 1u << reg.index;
269
set.temp &= ~(1u << reg.index);
270
}
271
272
void IrRegAllocA64::freeTempRegs()
273
{
274
CODEGEN_ASSERT((gpr.free & gpr.temp) == 0);
275
gpr.free |= gpr.temp;
276
gpr.temp = 0;
277
278
CODEGEN_ASSERT((simd.free & simd.temp) == 0);
279
simd.free |= simd.temp;
280
simd.temp = 0;
281
}
282
283
size_t IrRegAllocA64::spill(uint32_t index, std::initializer_list<RegisterA64> live)
284
{
285
static const KindA64 sets[] = {KindA64::x, KindA64::q};
286
287
size_t start = spills.size();
288
289
uint32_t poisongpr = 0;
290
uint32_t poisonsimd = 0;
291
292
if (FFlag::DebugCodegenChaosA64)
293
{
294
poisongpr = gpr.base & ~gpr.free;
295
poisonsimd = simd.base & ~simd.free;
296
297
for (RegisterA64 reg : live)
298
{
299
Set& set = getSet(reg.kind);
300
(&set == &simd ? poisonsimd : poisongpr) &= ~(1u << reg.index);
301
}
302
}
303
304
for (KindA64 kind : sets)
305
{
306
Set& set = getSet(kind);
307
308
// early-out
309
if (set.free == set.base)
310
continue;
311
312
// free all temp registers
313
CODEGEN_ASSERT((set.free & set.temp) == 0);
314
set.free |= set.temp;
315
set.temp = 0;
316
317
// spill all allocated registers unless they aren't used anymore
318
uint32_t regs = set.base & ~set.free;
319
320
while (regs)
321
{
322
int reg = 31 - countlz(regs);
323
324
uint32_t targetInstIdx = set.defs[reg];
325
326
CODEGEN_ASSERT(targetInstIdx != kInvalidInstIdx);
327
CODEGEN_ASSERT(function.instructions[targetInstIdx].regA64.index == reg);
328
329
spill(set, index, targetInstIdx);
330
331
regs &= ~(1u << reg);
332
}
333
334
CODEGEN_ASSERT(set.free == set.base);
335
}
336
337
if (FFlag::DebugCodegenChaosA64)
338
{
339
for (int reg = 0; reg < 32; ++reg)
340
{
341
if (poisongpr & (1u << reg))
342
build.mov(RegisterA64{KindA64::x, uint8_t(reg)}, 0xdead);
343
if (poisonsimd & (1u << reg))
344
build.fmov(RegisterA64{KindA64::d, uint8_t(reg)}, -0.125);
345
}
346
}
347
348
return start;
349
}
350
351
void IrRegAllocA64::restore(size_t start)
352
{
353
CODEGEN_ASSERT(start <= spills.size());
354
355
if (start < spills.size())
356
{
357
for (size_t i = start; i < spills.size(); ++i)
358
{
359
Spill s = spills[i]; // copy in case takeReg reallocates spills
360
RegisterA64 reg = takeReg(s.origin, s.inst);
361
362
restore(s, reg);
363
}
364
365
spills.resize(start);
366
}
367
}
368
369
void IrRegAllocA64::restoreReg(IrInst& inst)
370
{
371
uint32_t index = function.getInstIndex(inst);
372
373
for (size_t i = 0; i < spills.size(); ++i)
374
{
375
if (spills[i].inst == index)
376
{
377
Spill s = spills[i]; // copy in case allocReg reallocates spills
378
RegisterA64 reg = allocReg(s.origin.kind, index);
379
380
restore(s, reg);
381
382
spills[i] = spills.back();
383
spills.pop_back();
384
return;
385
}
386
}
387
388
CODEGEN_ASSERT(!"Expected to find a spill record");
389
}
390
391
void IrRegAllocA64::restore(const IrRegAllocA64::Spill& s, RegisterA64 reg)
392
{
393
IrInst& inst = function.instructions[s.inst];
394
CODEGEN_ASSERT(inst.regA64 == noreg);
395
396
if (s.slot >= 0)
397
{
398
if (isExtraSpillSlot(s.slot))
399
{
400
int extraOffset = getExtraSpillAddressOffset(s.slot);
401
402
// Need to calculate an address, but everything might be taken
403
// If we are restoring an integer register, we can just use it as a temporary
404
RegisterA64 emergencyTemp = reg.kind == KindA64::w ? castReg(KindA64::x, reg) : (reg.kind == KindA64::x ? reg : x17);
405
406
if (reg.kind != KindA64::w && reg.kind != KindA64::x)
407
build.str(emergencyTemp, sTemporary);
408
409
build.ldr(emergencyTemp, mem(rState, offsetof(lua_State, global)));
410
build.ldr(emergencyTemp, mem(emergencyTemp, offsetof(global_State, ecbdata)));
411
412
build.ldr(reg, mem(emergencyTemp, extraOffset));
413
414
if (reg.kind != KindA64::w && reg.kind != KindA64::x)
415
build.ldr(emergencyTemp, sTemporary);
416
}
417
else
418
{
419
build.ldr(reg, mem(sp, sSpillArea.data + s.slot * 8));
420
}
421
422
if (s.slot != kInvalidSpill)
423
freeSpill(freeSpillSlots, reg.kind, s.slot);
424
}
425
else
426
{
427
CODEGEN_ASSERT(!inst.spilled && inst.needsReload);
428
429
// When restoring the value, we allow cross-block restore because we have commited to the target location at spill time
430
ValueRestoreLocation restoreLocation = function.findRestoreLocation(inst, /*limitToCurrentBlock*/ false);
431
432
AddressA64 addr = getReloadAddress(restoreLocation);
433
CODEGEN_ASSERT(addr.base != xzr);
434
435
IrValueKind spillValueKind = getCmdValueKind(inst.cmd);
436
437
if (spillValueKind == IrValueKind::Int && restoreLocation.kind == IrValueKind::Double)
438
{
439
// Handle restore of an int/uint value from a location storing a double number
440
RegisterA64 temp = allocTemp(KindA64::d);
441
build.ldr(temp, addr);
442
443
if (restoreLocation.conversionCmd == IrCmd::INT_TO_NUM)
444
build.fcvtzs(reg, temp);
445
else if (restoreLocation.conversionCmd == IrCmd::UINT_TO_NUM)
446
build.fcvtzs(castReg(KindA64::x, reg), temp); // note: we don't use fcvtzu for consistency with C++ code
447
else
448
CODEGEN_ASSERT(!"re-materialization not supported for this conversion command");
449
450
// Temporary might have taken a spot needed for other registers in spill restore process
451
freeTemp(temp);
452
}
453
else
454
{
455
build.ldr(reg, addr);
456
}
457
}
458
459
inst.spilled = false;
460
inst.needsReload = false;
461
inst.regA64 = reg;
462
}
463
464
void IrRegAllocA64::spill(Set& set, uint32_t index, uint32_t targetInstIdx)
465
{
466
IrInst& def = function.instructions[targetInstIdx];
467
int reg = def.regA64.index;
468
469
CODEGEN_ASSERT(!def.reusedReg);
470
CODEGEN_ASSERT(!def.spilled);
471
CODEGEN_ASSERT(!def.needsReload);
472
473
if (def.lastUse == index)
474
{
475
// instead of spilling the register to never reload it, we assume the register is not needed anymore
476
}
477
else if (function.hasRestoreLocation(def, /*limitToCurrentBlock*/ true))
478
{
479
// when checking if value has a restore operation to spill it, we only allow it in the same block
480
// instead of spilling the register to stack, we can reload it from VM stack/constants
481
// we still need to record the spill for restore(start) to work
482
Spill s = {targetInstIdx, def.regA64, -1};
483
spills.push_back(s);
484
485
def.needsReload = true;
486
487
if (stats)
488
stats->spillsToRestore++;
489
}
490
else
491
{
492
int slot = allocSpill(freeSpillSlots, def.regA64.kind);
493
if (slot < 0)
494
{
495
slot = kInvalidSpill;
496
error = true;
497
}
498
499
if (isExtraSpillSlot(slot))
500
{
501
int extraOffset = getExtraSpillAddressOffset(slot);
502
503
// Tricky situation, no registers left, but need a register to calculate an address
504
// We will try to take x17 unless it's actually the register being spilled
505
RegisterA64 emergencyTemp = def.regA64 == x17 || def.regA64 == w17 ? x16 : x17;
506
build.str(emergencyTemp, sTemporary);
507
508
build.ldr(emergencyTemp, mem(rState, offsetof(lua_State, global)));
509
build.ldr(emergencyTemp, mem(emergencyTemp, offsetof(global_State, ecbdata)));
510
511
build.str(def.regA64, mem(emergencyTemp, extraOffset));
512
513
build.ldr(emergencyTemp, sTemporary);
514
}
515
else
516
{
517
build.str(def.regA64, mem(sp, sSpillArea.data + slot * 8));
518
}
519
520
Spill s = {targetInstIdx, def.regA64, int8_t(slot)};
521
spills.push_back(s);
522
523
def.spilled = true;
524
525
if (stats)
526
{
527
stats->spillsToSlot++;
528
529
if (slot != kInvalidSpill && unsigned(slot + 1) > stats->maxSpillSlotsUsed)
530
stats->maxSpillSlotsUsed = slot + 1;
531
}
532
}
533
534
def.regA64 = noreg;
535
536
set.free |= 1u << reg;
537
set.defs[reg] = kInvalidInstIdx;
538
}
539
540
uint32_t IrRegAllocA64::findInstructionWithFurthestNextUse(Set& set) const
541
{
542
if (currInstIdx == kInvalidInstIdx)
543
return kInvalidInstIdx;
544
545
uint32_t furthestUseTarget = kInvalidInstIdx;
546
uint32_t furthestUseLocation = 0;
547
548
for (uint32_t regInstUser : set.defs)
549
{
550
// Cannot spill temporary registers or the register of the value that's defined in the current instruction
551
if (regInstUser == kInvalidInstIdx || regInstUser == currInstIdx)
552
continue;
553
554
uint32_t nextUse = getNextInstUse(function, regInstUser, currInstIdx);
555
556
// Cannot spill value that is about to be used in the current instruction
557
if (nextUse == currInstIdx)
558
continue;
559
560
if (furthestUseTarget == kInvalidInstIdx || nextUse > furthestUseLocation)
561
{
562
furthestUseLocation = nextUse;
563
furthestUseTarget = regInstUser;
564
}
565
}
566
567
return furthestUseTarget;
568
}
569
570
571
bool IrRegAllocA64::isExtraSpillSlot(unsigned slot) const
572
{
573
return slot >= kSpillSlots;
574
}
575
576
int IrRegAllocA64::getExtraSpillAddressOffset(unsigned slot) const
577
{
578
CODEGEN_ASSERT(isExtraSpillSlot(slot));
579
580
return (slot - kSpillSlots) * 8;
581
}
582
583
IrRegAllocA64::Set& IrRegAllocA64::getSet(KindA64 kind)
584
{
585
switch (kind)
586
{
587
case KindA64::x:
588
case KindA64::w:
589
return gpr;
590
591
case KindA64::s:
592
case KindA64::d:
593
case KindA64::q:
594
return simd;
595
596
default:
597
CODEGEN_ASSERT(!"Unexpected register kind");
598
LUAU_UNREACHABLE();
599
}
600
}
601
602
} // namespace A64
603
} // namespace CodeGen
604
} // namespace Luau
605
606