Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/CodeGen/src/IrLoweringA64.cpp
2725 views
1
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
2
#include "IrLoweringA64.h"
3
4
#include "Luau/DenseHash.h"
5
#include "Luau/IrData.h"
6
#include "Luau/IrUtils.h"
7
#include "Luau/LoweringStats.h"
8
9
#include "EmitCommonA64.h"
10
#include "NativeState.h"
11
12
#include "lstate.h"
13
#include "lgc.h"
14
15
LUAU_FASTFLAG(LuauCodegenBlockSafeEnv)
16
LUAU_FASTFLAG(LuauCodegenBufferRangeMerge4)
17
LUAU_FASTFLAG(LuauCodegenBufNoDefTag)
18
19
namespace Luau
20
{
21
namespace CodeGen
22
{
23
namespace A64
24
{
25
26
inline ConditionA64 getConditionFP(IrCondition cond)
27
{
28
switch (cond)
29
{
30
case IrCondition::Equal:
31
return ConditionA64::Equal;
32
33
case IrCondition::NotEqual:
34
return ConditionA64::NotEqual;
35
36
case IrCondition::Less:
37
return ConditionA64::Minus;
38
39
case IrCondition::NotLess:
40
return ConditionA64::Plus;
41
42
case IrCondition::LessEqual:
43
return ConditionA64::UnsignedLessEqual;
44
45
case IrCondition::NotLessEqual:
46
return ConditionA64::UnsignedGreater;
47
48
case IrCondition::Greater:
49
return ConditionA64::Greater;
50
51
case IrCondition::NotGreater:
52
return ConditionA64::LessEqual;
53
54
case IrCondition::GreaterEqual:
55
return ConditionA64::GreaterEqual;
56
57
case IrCondition::NotGreaterEqual:
58
return ConditionA64::Less;
59
60
default:
61
CODEGEN_ASSERT(!"Unexpected condition code");
62
return ConditionA64::Always;
63
}
64
}
65
66
inline ConditionA64 getConditionInt(IrCondition cond)
67
{
68
switch (cond)
69
{
70
case IrCondition::Equal:
71
return ConditionA64::Equal;
72
73
case IrCondition::NotEqual:
74
return ConditionA64::NotEqual;
75
76
case IrCondition::Less:
77
return ConditionA64::Minus;
78
79
case IrCondition::NotLess:
80
return ConditionA64::Plus;
81
82
case IrCondition::LessEqual:
83
return ConditionA64::LessEqual;
84
85
case IrCondition::NotLessEqual:
86
return ConditionA64::Greater;
87
88
case IrCondition::Greater:
89
return ConditionA64::Greater;
90
91
case IrCondition::NotGreater:
92
return ConditionA64::LessEqual;
93
94
case IrCondition::GreaterEqual:
95
return ConditionA64::GreaterEqual;
96
97
case IrCondition::NotGreaterEqual:
98
return ConditionA64::Less;
99
100
case IrCondition::UnsignedLess:
101
return ConditionA64::CarryClear;
102
103
case IrCondition::UnsignedLessEqual:
104
return ConditionA64::UnsignedLessEqual;
105
106
case IrCondition::UnsignedGreater:
107
return ConditionA64::UnsignedGreater;
108
109
case IrCondition::UnsignedGreaterEqual:
110
return ConditionA64::CarrySet;
111
112
default:
113
CODEGEN_ASSERT(!"Unexpected condition code");
114
return ConditionA64::Always;
115
}
116
}
117
118
static void emitAddOffset(AssemblyBuilderA64& build, RegisterA64 dst, RegisterA64 src, size_t offset)
119
{
120
CODEGEN_ASSERT(dst != src);
121
CODEGEN_ASSERT(offset <= INT_MAX);
122
123
if (offset <= AssemblyBuilderA64::kMaxImmediate)
124
{
125
build.add(dst, src, uint16_t(offset));
126
}
127
else
128
{
129
build.mov(dst, int(offset));
130
build.add(dst, dst, src);
131
}
132
}
133
134
static void emitAbort(AssemblyBuilderA64& build, Label& abort)
135
{
136
Label skip;
137
build.b(skip);
138
build.setLabel(abort);
139
build.udf();
140
build.setLabel(skip);
141
}
142
143
static void emitFallback(AssemblyBuilderA64& build, int offset, int pcpos)
144
{
145
// fallback(L, instruction, base, k)
146
build.mov(x0, rState);
147
emitAddOffset(build, x1, rCode, pcpos * sizeof(Instruction));
148
build.mov(x2, rBase);
149
build.mov(x3, rConstants);
150
build.ldr(x4, mem(rNativeContext, offset));
151
build.blr(x4);
152
153
emitUpdateBase(build);
154
}
155
156
static void emitInvokeLibm1P(AssemblyBuilderA64& build, size_t func, int arg)
157
{
158
CODEGEN_ASSERT(kTempSlots >= 1);
159
CODEGEN_ASSERT(unsigned(sTemporary.data) <= AssemblyBuilderA64::kMaxImmediate);
160
build.ldr(d0, mem(rBase, arg * sizeof(TValue) + offsetof(TValue, value.n)));
161
build.add(x0, sp, uint16_t(sTemporary.data)); // sp-relative offset
162
build.ldr(x1, mem(rNativeContext, uint32_t(func)));
163
build.blr(x1);
164
}
165
166
static bool emitBuiltin(AssemblyBuilderA64& build, IrFunction& function, IrRegAllocA64& regs, int bfid, int res, int arg, int nresults)
167
{
168
switch (bfid)
169
{
170
case LBF_MATH_FREXP:
171
{
172
CODEGEN_ASSERT(nresults == 1 || nresults == 2);
173
emitInvokeLibm1P(build, offsetof(NativeContext, libm_frexp), arg);
174
build.str(d0, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
175
176
RegisterA64 temp = regs.allocTemp(KindA64::w);
177
build.mov(temp, LUA_TNUMBER);
178
build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt)));
179
180
if (nresults == 2)
181
{
182
build.ldr(w0, sTemporary);
183
build.scvtf(d1, w0);
184
build.str(d1, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
185
build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt)));
186
}
187
return true;
188
}
189
case LBF_MATH_MODF:
190
{
191
CODEGEN_ASSERT(nresults == 1 || nresults == 2);
192
emitInvokeLibm1P(build, offsetof(NativeContext, libm_modf), arg);
193
build.ldr(d1, sTemporary);
194
build.str(d1, mem(rBase, res * sizeof(TValue) + offsetof(TValue, value.n)));
195
196
RegisterA64 temp = regs.allocTemp(KindA64::w);
197
build.mov(temp, LUA_TNUMBER);
198
build.str(temp, mem(rBase, res * sizeof(TValue) + offsetof(TValue, tt)));
199
200
if (nresults == 2)
201
{
202
build.str(d0, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, value.n)));
203
build.str(temp, mem(rBase, (res + 1) * sizeof(TValue) + offsetof(TValue, tt)));
204
}
205
return true;
206
}
207
208
default:
209
CODEGEN_ASSERT(!"Missing A64 lowering");
210
return false;
211
}
212
}
213
214
static uint64_t getDoubleBits(double value)
215
{
216
uint64_t result;
217
static_assert(sizeof(result) == sizeof(value), "Expecting double to be 64-bit");
218
memcpy(&result, &value, sizeof(value));
219
return result;
220
}
221
222
static uint32_t getFloatBits(float value)
223
{
224
uint32_t result;
225
static_assert(sizeof(result) == sizeof(value), "Expecting float to be 32-bit");
226
memcpy(&result, &value, sizeof(value));
227
return result;
228
}
229
230
IrLoweringA64::IrLoweringA64(AssemblyBuilderA64& build, ModuleHelpers& helpers, IrFunction& function, LoweringStats* stats)
231
: build(build)
232
, helpers(helpers)
233
, function(function)
234
, stats(stats)
235
, regs(build, function, stats, {{x0, x15}, {x16, x17}, {q0, q7}, {q16, q31}})
236
, valueTracker(function)
237
, exitHandlerMap(~0u)
238
{
239
valueTracker.setRestoreCallback(
240
this,
241
[](void* context, IrInst& inst)
242
{
243
IrLoweringA64* self = static_cast<IrLoweringA64*>(context);
244
self->regs.restoreReg(inst);
245
}
246
);
247
}
248
249
void IrLoweringA64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
250
{
251
regs.currInstIdx = index;
252
253
valueTracker.beforeInstLowering(inst);
254
255
switch (inst.cmd)
256
{
257
case IrCmd::LOAD_TAG:
258
{
259
inst.regA64 = regs.allocReg(KindA64::w, index);
260
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, tt));
261
build.ldr(inst.regA64, addr);
262
break;
263
}
264
case IrCmd::LOAD_POINTER:
265
{
266
inst.regA64 = regs.allocReg(KindA64::x, index);
267
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, value.gc));
268
build.ldr(inst.regA64, addr);
269
break;
270
}
271
case IrCmd::LOAD_DOUBLE:
272
{
273
inst.regA64 = regs.allocReg(KindA64::d, index);
274
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, value.n));
275
build.ldr(inst.regA64, addr);
276
break;
277
}
278
case IrCmd::LOAD_INT:
279
{
280
inst.regA64 = regs.allocReg(KindA64::w, index);
281
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, value));
282
build.ldr(inst.regA64, addr);
283
break;
284
}
285
case IrCmd::LOAD_FLOAT:
286
{
287
inst.regA64 = regs.allocReg(KindA64::s, index);
288
AddressA64 addr = tempAddr(OP_A(inst), intOp(OP_B(inst)));
289
290
build.ldr(inst.regA64, addr);
291
break;
292
}
293
case IrCmd::LOAD_TVALUE:
294
{
295
inst.regA64 = regs.allocReg(KindA64::q, index);
296
297
int addrOffset = HAS_OP_B(inst) ? intOp(OP_B(inst)) : 0;
298
AddressA64 addr = tempAddr(OP_A(inst), addrOffset);
299
build.ldr(inst.regA64, addr);
300
break;
301
}
302
case IrCmd::LOAD_ENV:
303
inst.regA64 = regs.allocReg(KindA64::x, index);
304
build.ldr(inst.regA64, mem(rClosure, offsetof(Closure, env)));
305
break;
306
case IrCmd::GET_ARR_ADDR:
307
{
308
inst.regA64 = regs.allocReuse(KindA64::x, index, {OP_A(inst)});
309
build.ldr(inst.regA64, mem(regOp(OP_A(inst)), offsetof(LuaTable, array)));
310
311
if (OP_B(inst).kind == IrOpKind::Inst)
312
{
313
build.add(inst.regA64, inst.regA64, regOp(OP_B(inst)), kTValueSizeLog2); // implicit uxtw
314
}
315
else if (OP_B(inst).kind == IrOpKind::Constant)
316
{
317
if (intOp(OP_B(inst)) == 0)
318
{
319
// no offset required
320
}
321
else if (intOp(OP_B(inst)) * sizeof(TValue) <= AssemblyBuilderA64::kMaxImmediate)
322
{
323
build.add(inst.regA64, inst.regA64, uint16_t(intOp(OP_B(inst)) * sizeof(TValue)));
324
}
325
else
326
{
327
RegisterA64 temp = regs.allocTemp(KindA64::x);
328
build.mov(temp, intOp(OP_B(inst)) * sizeof(TValue));
329
build.add(inst.regA64, inst.regA64, temp);
330
}
331
}
332
else
333
CODEGEN_ASSERT(!"Unsupported instruction form");
334
break;
335
}
336
case IrCmd::GET_SLOT_NODE_ADDR:
337
{
338
inst.regA64 = regs.allocReuse(KindA64::x, index, {OP_A(inst)});
339
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
340
RegisterA64 temp1w = castReg(KindA64::w, temp1);
341
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
342
RegisterA64 temp2x = castReg(KindA64::x, temp2);
343
344
// note: since the stride of the load is the same as the destination register size, we can range check the array index, not the byte offset
345
if (uintOp(OP_B(inst)) <= AddressA64::kMaxOffset)
346
build.ldr(temp1w, mem(rCode, uintOp(OP_B(inst)) * sizeof(Instruction)));
347
else
348
{
349
build.mov(temp1, uintOp(OP_B(inst)) * sizeof(Instruction));
350
build.ldr(temp1w, mem(rCode, temp1));
351
}
352
353
// C field can be shifted as long as it's at the most significant byte of the instruction word
354
CODEGEN_ASSERT(kOffsetOfInstructionC == 3);
355
build.ldrb(temp2, mem(regOp(OP_A(inst)), offsetof(LuaTable, nodemask8)));
356
build.and_(temp2, temp2, temp1w, -24);
357
358
// note: this may clobber OP_A(inst), so it's important that we don't use it after this
359
build.ldr(inst.regA64, mem(regOp(OP_A(inst)), offsetof(LuaTable, node)));
360
build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero)
361
break;
362
}
363
case IrCmd::GET_HASH_NODE_ADDR:
364
{
365
inst.regA64 = regs.allocReuse(KindA64::x, index, {OP_A(inst)});
366
RegisterA64 temp1 = regs.allocTemp(KindA64::w);
367
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
368
RegisterA64 temp2x = castReg(KindA64::x, temp2);
369
370
// hash & ((1 << lsizenode) - 1) == hash & ~(-1 << lsizenode)
371
build.mov(temp1, -1);
372
build.ldrb(temp2, mem(regOp(OP_A(inst)), offsetof(LuaTable, lsizenode)));
373
build.lsl(temp1, temp1, temp2);
374
build.mov(temp2, uintOp(OP_B(inst)));
375
build.bic(temp2, temp2, temp1);
376
377
// note: this may clobber OP_A(inst), so it's important that we don't use it after this
378
build.ldr(inst.regA64, mem(regOp(OP_A(inst)), offsetof(LuaTable, node)));
379
build.add(inst.regA64, inst.regA64, temp2x, kLuaNodeSizeLog2); // "zero extend" temp2 to get a larger shift (top 32 bits are zero)
380
break;
381
}
382
case IrCmd::GET_CLOSURE_UPVAL_ADDR:
383
{
384
inst.regA64 = regs.allocReuse(KindA64::x, index, {OP_A(inst)});
385
RegisterA64 cl = OP_A(inst).kind == IrOpKind::Undef ? rClosure : regOp(OP_A(inst));
386
387
build.add(inst.regA64, cl, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(OP_B(inst))));
388
break;
389
}
390
case IrCmd::STORE_TAG:
391
{
392
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, tt));
393
if (tagOp(OP_B(inst)) == 0)
394
{
395
build.str(wzr, addr);
396
}
397
else
398
{
399
RegisterA64 temp = regs.allocTemp(KindA64::w);
400
build.mov(temp, tagOp(OP_B(inst)));
401
build.str(temp, addr);
402
}
403
break;
404
}
405
case IrCmd::STORE_POINTER:
406
{
407
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, value));
408
if (OP_B(inst).kind == IrOpKind::Constant)
409
{
410
CODEGEN_ASSERT(intOp(OP_B(inst)) == 0);
411
build.str(xzr, addr);
412
}
413
else
414
{
415
build.str(regOp(OP_B(inst)), addr);
416
}
417
break;
418
}
419
case IrCmd::STORE_EXTRA:
420
{
421
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, extra));
422
if (intOp(OP_B(inst)) == 0)
423
{
424
build.str(wzr, addr);
425
}
426
else
427
{
428
RegisterA64 temp = regs.allocTemp(KindA64::w);
429
build.mov(temp, intOp(OP_B(inst)));
430
build.str(temp, addr);
431
}
432
break;
433
}
434
case IrCmd::STORE_DOUBLE:
435
{
436
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, value));
437
if (OP_B(inst).kind == IrOpKind::Constant && getDoubleBits(doubleOp(OP_B(inst))) == 0)
438
{
439
build.str(xzr, addr);
440
}
441
else
442
{
443
RegisterA64 temp = tempDouble(OP_B(inst));
444
build.str(temp, addr);
445
}
446
break;
447
}
448
case IrCmd::STORE_INT:
449
{
450
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, value));
451
if (OP_B(inst).kind == IrOpKind::Constant && intOp(OP_B(inst)) == 0)
452
{
453
build.str(wzr, addr);
454
}
455
else
456
{
457
RegisterA64 temp = tempInt(OP_B(inst));
458
build.str(temp, addr);
459
}
460
break;
461
}
462
case IrCmd::STORE_VECTOR:
463
{
464
RegisterA64 temp1 = tempFloat(OP_B(inst));
465
RegisterA64 temp2 = tempFloat(OP_C(inst));
466
RegisterA64 temp3 = tempFloat(OP_D(inst));
467
468
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, value));
469
CODEGEN_ASSERT(addr.kind == AddressKindA64::imm && addr.data % 4 == 0 && unsigned(addr.data + 8) / 4 <= AddressA64::kMaxOffset);
470
471
build.str(temp1, AddressA64(addr.base, addr.data + 0));
472
build.str(temp2, AddressA64(addr.base, addr.data + 4));
473
build.str(temp3, AddressA64(addr.base, addr.data + 8));
474
475
if (HAS_OP_E(inst))
476
{
477
RegisterA64 temp = regs.allocTemp(KindA64::w);
478
build.mov(temp, tagOp(OP_E(inst)));
479
build.str(temp, tempAddr(OP_A(inst), offsetof(TValue, tt)));
480
}
481
break;
482
}
483
case IrCmd::STORE_TVALUE:
484
{
485
int addrOffset = HAS_OP_C(inst) ? intOp(OP_C(inst)) : 0;
486
AddressA64 addr = tempAddr(OP_A(inst), addrOffset);
487
build.str(regOp(OP_B(inst)), addr);
488
break;
489
}
490
case IrCmd::STORE_SPLIT_TVALUE:
491
{
492
int addrOffset = HAS_OP_D(inst) ? intOp(OP_D(inst)) : 0;
493
494
RegisterA64 tempt = regs.allocTemp(KindA64::w);
495
AddressA64 addrt = tempAddr(OP_A(inst), offsetof(TValue, tt) + addrOffset);
496
build.mov(tempt, tagOp(OP_B(inst)));
497
build.str(tempt, addrt);
498
499
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, value) + addrOffset);
500
501
if (tagOp(OP_B(inst)) == LUA_TBOOLEAN)
502
{
503
if (OP_C(inst).kind == IrOpKind::Constant)
504
{
505
// note: we reuse tag temp register as value for true booleans, and use built-in zero register for false values
506
CODEGEN_ASSERT(LUA_TBOOLEAN == 1);
507
build.str(intOp(OP_C(inst)) ? tempt : wzr, addr);
508
}
509
else
510
build.str(regOp(OP_C(inst)), addr);
511
}
512
else if (tagOp(OP_B(inst)) == LUA_TNUMBER)
513
{
514
RegisterA64 temp = tempDouble(OP_C(inst));
515
build.str(temp, addr);
516
}
517
else if (isGCO(tagOp(OP_B(inst))))
518
{
519
build.str(regOp(OP_C(inst)), addr);
520
}
521
else
522
{
523
CODEGEN_ASSERT(!"Unsupported instruction form");
524
}
525
break;
526
}
527
case IrCmd::ADD_INT:
528
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
529
if (OP_B(inst).kind == IrOpKind::Constant && unsigned(intOp(OP_B(inst))) <= AssemblyBuilderA64::kMaxImmediate)
530
build.add(inst.regA64, regOp(OP_A(inst)), uint16_t(intOp(OP_B(inst))));
531
else if (OP_A(inst).kind == IrOpKind::Constant && unsigned(intOp(OP_A(inst))) <= AssemblyBuilderA64::kMaxImmediate)
532
build.add(inst.regA64, regOp(OP_B(inst)), uint16_t(intOp(OP_A(inst))));
533
else
534
{
535
RegisterA64 temp1 = tempInt(OP_A(inst));
536
RegisterA64 temp2 = tempInt(OP_B(inst));
537
build.add(inst.regA64, temp1, temp2);
538
}
539
break;
540
case IrCmd::SUB_INT:
541
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
542
if (OP_B(inst).kind == IrOpKind::Constant && unsigned(intOp(OP_B(inst))) <= AssemblyBuilderA64::kMaxImmediate)
543
build.sub(inst.regA64, regOp(OP_A(inst)), uint16_t(intOp(OP_B(inst))));
544
else
545
{
546
RegisterA64 temp1 = tempInt(OP_A(inst));
547
RegisterA64 temp2 = tempInt(OP_B(inst));
548
build.sub(inst.regA64, temp1, temp2);
549
}
550
break;
551
case IrCmd::SEXTI8_INT:
552
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst)});
553
554
build.sbfx(inst.regA64, regOp(OP_A(inst)), 0, 8); // sextb
555
break;
556
case IrCmd::SEXTI16_INT:
557
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst)});
558
559
build.sbfx(inst.regA64, regOp(OP_A(inst)), 0, 16); // sexth
560
break;
561
case IrCmd::ADD_NUM:
562
{
563
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst), OP_B(inst)});
564
RegisterA64 temp1 = tempDouble(OP_A(inst));
565
RegisterA64 temp2 = tempDouble(OP_B(inst));
566
build.fadd(inst.regA64, temp1, temp2);
567
break;
568
}
569
case IrCmd::SUB_NUM:
570
{
571
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst), OP_B(inst)});
572
RegisterA64 temp1 = tempDouble(OP_A(inst));
573
RegisterA64 temp2 = tempDouble(OP_B(inst));
574
build.fsub(inst.regA64, temp1, temp2);
575
break;
576
}
577
case IrCmd::MUL_NUM:
578
{
579
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst), OP_B(inst)});
580
RegisterA64 temp1 = tempDouble(OP_A(inst));
581
RegisterA64 temp2 = tempDouble(OP_B(inst));
582
build.fmul(inst.regA64, temp1, temp2);
583
break;
584
}
585
case IrCmd::DIV_NUM:
586
{
587
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst), OP_B(inst)});
588
RegisterA64 temp1 = tempDouble(OP_A(inst));
589
RegisterA64 temp2 = tempDouble(OP_B(inst));
590
build.fdiv(inst.regA64, temp1, temp2);
591
break;
592
}
593
case IrCmd::IDIV_NUM:
594
{
595
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst), OP_B(inst)});
596
RegisterA64 temp1 = tempDouble(OP_A(inst));
597
RegisterA64 temp2 = tempDouble(OP_B(inst));
598
build.fdiv(inst.regA64, temp1, temp2);
599
build.frintm(inst.regA64, inst.regA64);
600
break;
601
}
602
case IrCmd::MOD_NUM:
603
{
604
inst.regA64 = regs.allocReg(KindA64::d, index); // can't allocReuse because both A and B are used twice
605
RegisterA64 temp1 = tempDouble(OP_A(inst));
606
RegisterA64 temp2 = tempDouble(OP_B(inst));
607
build.fdiv(inst.regA64, temp1, temp2);
608
build.frintm(inst.regA64, inst.regA64);
609
build.fmul(inst.regA64, inst.regA64, temp2);
610
build.fsub(inst.regA64, temp1, inst.regA64);
611
break;
612
}
613
case IrCmd::MULADD_NUM:
614
{
615
RegisterA64 tempA = tempDouble(OP_A(inst));
616
RegisterA64 tempB = tempDouble(OP_B(inst));
617
RegisterA64 tempC = tempDouble(OP_C(inst));
618
619
if ((build.features & Feature_AdvSIMD) != 0)
620
{
621
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_C(inst)});
622
if (inst.regA64 != tempC)
623
build.fmov(inst.regA64, tempC);
624
build.fmla(inst.regA64, tempB, tempA);
625
}
626
else
627
{
628
inst.regA64 = regs.allocReg(KindA64::d, index);
629
build.fmul(inst.regA64, tempB, tempA);
630
build.fadd(inst.regA64, inst.regA64, tempC);
631
}
632
break;
633
}
634
case IrCmd::MIN_NUM:
635
{
636
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst), OP_B(inst)});
637
RegisterA64 temp1 = tempDouble(OP_A(inst));
638
RegisterA64 temp2 = tempDouble(OP_B(inst));
639
build.fcmp(temp1, temp2);
640
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Less));
641
break;
642
}
643
case IrCmd::MAX_NUM:
644
{
645
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst), OP_B(inst)});
646
RegisterA64 temp1 = tempDouble(OP_A(inst));
647
RegisterA64 temp2 = tempDouble(OP_B(inst));
648
build.fcmp(temp1, temp2);
649
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Greater));
650
break;
651
}
652
case IrCmd::UNM_NUM:
653
{
654
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst)});
655
RegisterA64 temp = tempDouble(OP_A(inst));
656
build.fneg(inst.regA64, temp);
657
break;
658
}
659
case IrCmd::FLOOR_NUM:
660
{
661
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst)});
662
RegisterA64 temp = tempDouble(OP_A(inst));
663
build.frintm(inst.regA64, temp);
664
break;
665
}
666
case IrCmd::CEIL_NUM:
667
{
668
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst)});
669
RegisterA64 temp = tempDouble(OP_A(inst));
670
build.frintp(inst.regA64, temp);
671
break;
672
}
673
case IrCmd::ROUND_NUM:
674
{
675
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst)});
676
RegisterA64 temp = tempDouble(OP_A(inst));
677
build.frinta(inst.regA64, temp);
678
break;
679
}
680
case IrCmd::SQRT_NUM:
681
{
682
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst)});
683
RegisterA64 temp = tempDouble(OP_A(inst));
684
build.fsqrt(inst.regA64, temp);
685
break;
686
}
687
case IrCmd::ABS_NUM:
688
{
689
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst)});
690
RegisterA64 temp = tempDouble(OP_A(inst));
691
build.fabs(inst.regA64, temp);
692
break;
693
}
694
case IrCmd::SIGN_NUM:
695
{
696
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst)});
697
698
RegisterA64 temp = tempDouble(OP_A(inst));
699
RegisterA64 temp0 = regs.allocTemp(KindA64::d);
700
RegisterA64 temp1 = regs.allocTemp(KindA64::d);
701
702
build.fcmpz(temp);
703
build.fmov(temp0, 0.0);
704
build.fmov(temp1, 1.0);
705
build.fcsel(inst.regA64, temp1, temp0, getConditionFP(IrCondition::Greater));
706
build.fmov(temp1, -1.0);
707
build.fcsel(inst.regA64, temp1, inst.regA64, getConditionFP(IrCondition::Less));
708
break;
709
}
710
case IrCmd::ADD_FLOAT:
711
{
712
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst), OP_B(inst)});
713
RegisterA64 temp1 = tempFloat(OP_A(inst));
714
RegisterA64 temp2 = tempFloat(OP_B(inst));
715
build.fadd(inst.regA64, temp1, temp2);
716
break;
717
}
718
case IrCmd::SUB_FLOAT:
719
{
720
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst), OP_B(inst)});
721
RegisterA64 temp1 = tempFloat(OP_A(inst));
722
RegisterA64 temp2 = tempFloat(OP_B(inst));
723
build.fsub(inst.regA64, temp1, temp2);
724
break;
725
}
726
case IrCmd::MUL_FLOAT:
727
{
728
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst), OP_B(inst)});
729
RegisterA64 temp1 = tempFloat(OP_A(inst));
730
RegisterA64 temp2 = tempFloat(OP_B(inst));
731
build.fmul(inst.regA64, temp1, temp2);
732
break;
733
}
734
case IrCmd::DIV_FLOAT:
735
{
736
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst), OP_B(inst)});
737
RegisterA64 temp1 = tempFloat(OP_A(inst));
738
RegisterA64 temp2 = tempFloat(OP_B(inst));
739
build.fdiv(inst.regA64, temp1, temp2);
740
break;
741
}
742
case IrCmd::MIN_FLOAT:
743
{
744
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst), OP_B(inst)});
745
RegisterA64 temp1 = tempFloat(OP_A(inst));
746
RegisterA64 temp2 = tempFloat(OP_B(inst));
747
build.fcmp(temp1, temp2);
748
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Less));
749
break;
750
}
751
case IrCmd::MAX_FLOAT:
752
{
753
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst), OP_B(inst)});
754
RegisterA64 temp1 = tempFloat(OP_A(inst));
755
RegisterA64 temp2 = tempFloat(OP_B(inst));
756
build.fcmp(temp1, temp2);
757
build.fcsel(inst.regA64, temp1, temp2, getConditionFP(IrCondition::Greater));
758
break;
759
}
760
case IrCmd::UNM_FLOAT:
761
{
762
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst)});
763
RegisterA64 temp = tempFloat(OP_A(inst));
764
build.fneg(inst.regA64, temp);
765
break;
766
}
767
case IrCmd::FLOOR_FLOAT:
768
{
769
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst)});
770
RegisterA64 temp = tempFloat(OP_A(inst));
771
build.frintm(inst.regA64, temp);
772
break;
773
}
774
case IrCmd::CEIL_FLOAT:
775
{
776
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst)});
777
RegisterA64 temp = tempFloat(OP_A(inst));
778
build.frintp(inst.regA64, temp);
779
break;
780
}
781
case IrCmd::SQRT_FLOAT:
782
{
783
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst)});
784
RegisterA64 temp = tempFloat(OP_A(inst));
785
build.fsqrt(inst.regA64, temp);
786
break;
787
}
788
case IrCmd::ABS_FLOAT:
789
{
790
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst)});
791
RegisterA64 temp = tempFloat(OP_A(inst));
792
build.fabs(inst.regA64, temp);
793
break;
794
}
795
case IrCmd::SIGN_FLOAT:
796
{
797
inst.regA64 = regs.allocReuse(KindA64::s, index, {OP_A(inst)});
798
799
RegisterA64 temp = tempFloat(OP_A(inst));
800
RegisterA64 temp0 = regs.allocTemp(KindA64::s);
801
RegisterA64 temp1 = regs.allocTemp(KindA64::s);
802
803
build.fcmpz(temp);
804
build.fmov(temp0, 0.0f);
805
build.fmov(temp1, 1.0f);
806
build.fcsel(inst.regA64, temp1, temp0, getConditionFP(IrCondition::Greater));
807
build.fmov(temp1, -1.0f);
808
build.fcsel(inst.regA64, temp1, inst.regA64, getConditionFP(IrCondition::Less));
809
break;
810
}
811
case IrCmd::SELECT_NUM:
812
{
813
inst.regA64 = regs.allocReuse(KindA64::d, index, {OP_A(inst), OP_B(inst), OP_C(inst), OP_D(inst)});
814
815
RegisterA64 temp1 = tempDouble(OP_A(inst));
816
RegisterA64 temp2 = tempDouble(OP_B(inst));
817
RegisterA64 temp3 = tempDouble(OP_C(inst));
818
RegisterA64 temp4 = tempDouble(OP_D(inst));
819
820
build.fcmp(temp3, temp4);
821
build.fcsel(inst.regA64, temp2, temp1, getConditionFP(IrCondition::Equal));
822
break;
823
}
824
case IrCmd::SELECT_VEC:
825
{
826
// `OP_B(inst)` cannot be reused for return value, because it can be overwritten with A before the first usage
827
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst), OP_C(inst), OP_D(inst)});
828
829
RegisterA64 temp1 = regOp(OP_A(inst));
830
RegisterA64 temp2 = regOp(OP_B(inst));
831
RegisterA64 temp3 = regOp(OP_C(inst));
832
RegisterA64 temp4 = regOp(OP_D(inst));
833
834
RegisterA64 mask = regs.allocTemp(KindA64::q);
835
836
// Evaluate predicate and calculate mask.
837
build.fcmeq_4s(mask, temp3, temp4);
838
// mov A to res register
839
build.mov(inst.regA64, temp1);
840
// If numbers are equal override A with B in res register.
841
build.bit(inst.regA64, temp2, mask);
842
break;
843
}
844
case IrCmd::SELECT_IF_TRUTHY:
845
{
846
inst.regA64 = regs.allocReg(KindA64::q, index);
847
848
// Place lhs as the result, we will overwrite it with rhs if 'A' is falsy later
849
build.mov(inst.regA64, regOp(OP_B(inst)));
850
851
// Get rhs register early, so a potential restore happens on both sides of a conditional control flow
852
RegisterA64 c = regOp(OP_C(inst));
853
854
RegisterA64 temp = regs.allocTemp(KindA64::w);
855
Label saveRhs, exit;
856
857
// Check tag first
858
build.umov_4s(temp, regOp(OP_A(inst)), 3);
859
build.cmp(temp, uint16_t(LUA_TBOOLEAN));
860
861
build.b(ConditionA64::UnsignedLess, saveRhs); // rhs if 'A' is nil
862
build.b(ConditionA64::UnsignedGreater, exit); // Keep lhs if 'A' is not a boolean
863
864
// Check the boolean value
865
build.umov_4s(temp, regOp(OP_A(inst)), 0);
866
build.cbnz(temp, exit); // Keep lhs if 'A' is true
867
868
build.setLabel(saveRhs);
869
build.mov(inst.regA64, c);
870
871
build.setLabel(exit);
872
break;
873
}
874
case IrCmd::MULADD_VEC:
875
{
876
RegisterA64 tempA = regOp(OP_A(inst));
877
RegisterA64 tempB = regOp(OP_B(inst));
878
RegisterA64 tempC = regOp(OP_C(inst));
879
880
if ((build.features & Feature_AdvSIMD) != 0)
881
{
882
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_C(inst)});
883
if (inst.regA64 != tempC)
884
build.mov(inst.regA64, tempC);
885
build.fmla(inst.regA64, tempB, tempA);
886
}
887
else
888
{
889
inst.regA64 = regs.allocReg(KindA64::q, index);
890
build.fmul(inst.regA64, tempB, tempA);
891
build.fadd(inst.regA64, inst.regA64, tempC);
892
}
893
break;
894
}
895
case IrCmd::ADD_VEC:
896
{
897
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst), OP_B(inst)});
898
899
build.fadd(inst.regA64, regOp(OP_A(inst)), regOp(OP_B(inst)));
900
break;
901
}
902
case IrCmd::SUB_VEC:
903
{
904
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst), OP_B(inst)});
905
906
build.fsub(inst.regA64, regOp(OP_A(inst)), regOp(OP_B(inst)));
907
break;
908
}
909
case IrCmd::MUL_VEC:
910
{
911
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst), OP_B(inst)});
912
913
build.fmul(inst.regA64, regOp(OP_A(inst)), regOp(OP_B(inst)));
914
break;
915
}
916
case IrCmd::DIV_VEC:
917
{
918
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst), OP_B(inst)});
919
920
build.fdiv(inst.regA64, regOp(OP_A(inst)), regOp(OP_B(inst)));
921
break;
922
}
923
case IrCmd::IDIV_VEC:
924
{
925
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst), OP_B(inst)});
926
927
build.fdiv(inst.regA64, regOp(OP_A(inst)), regOp(OP_B(inst)));
928
build.frintm(inst.regA64, inst.regA64);
929
break;
930
}
931
case IrCmd::UNM_VEC:
932
{
933
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst)});
934
935
build.fneg(inst.regA64, regOp(OP_A(inst)));
936
break;
937
}
938
case IrCmd::MIN_VEC:
939
{
940
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst), OP_B(inst)});
941
942
RegisterA64 temp1 = regOp(OP_A(inst));
943
RegisterA64 temp2 = regOp(OP_B(inst));
944
945
RegisterA64 mask = regs.allocTemp(KindA64::q);
946
947
// b > a == a < b
948
build.fcmgt_4s(mask, temp2, temp1);
949
950
// If A is already at the target, select B where mask is 0
951
if (inst.regA64 == temp1)
952
{
953
build.bif(inst.regA64, temp2, mask);
954
}
955
else
956
{
957
// Store B at the target unless it's there, select A where mask is 1
958
if (inst.regA64 != temp2)
959
build.mov(inst.regA64, temp2);
960
961
build.bit(inst.regA64, temp1, mask);
962
}
963
break;
964
}
965
case IrCmd::MAX_VEC:
966
{
967
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst), OP_B(inst)});
968
969
RegisterA64 temp1 = regOp(OP_A(inst));
970
RegisterA64 temp2 = regOp(OP_B(inst));
971
972
RegisterA64 mask = regs.allocTemp(KindA64::q);
973
974
build.fcmgt_4s(mask, temp1, temp2);
975
976
// If A is already at the target, select B where mask is 0
977
if (inst.regA64 == temp1)
978
{
979
build.bif(inst.regA64, temp2, mask);
980
}
981
else
982
{
983
// Store B at the target unless it's there, select A where mask is 1
984
if (inst.regA64 != temp2)
985
build.mov(inst.regA64, temp2);
986
987
build.bit(inst.regA64, temp1, mask);
988
}
989
break;
990
}
991
case IrCmd::FLOOR_VEC:
992
{
993
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst)});
994
995
build.frintm(inst.regA64, regOp(OP_A(inst)));
996
break;
997
}
998
case IrCmd::CEIL_VEC:
999
{
1000
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst)});
1001
1002
build.frintp(inst.regA64, regOp(OP_A(inst)));
1003
break;
1004
}
1005
case IrCmd::ABS_VEC:
1006
{
1007
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst)});
1008
build.fabs(inst.regA64, regOp(OP_A(inst)));
1009
break;
1010
}
1011
case IrCmd::DOT_VEC:
1012
{
1013
inst.regA64 = regs.allocReg(KindA64::s, index);
1014
1015
RegisterA64 temp = regs.allocTemp(KindA64::q);
1016
RegisterA64 temps = castReg(KindA64::s, temp);
1017
1018
build.fmul(temp, regOp(OP_A(inst)), regOp(OP_B(inst)));
1019
build.faddp(inst.regA64, temps); // x+y
1020
build.dup_4s(temp, temp, 2);
1021
build.fadd(inst.regA64, inst.regA64, temps); // +z
1022
break;
1023
}
1024
case IrCmd::EXTRACT_VEC:
1025
{
1026
inst.regA64 = regs.allocReg(KindA64::s, index);
1027
1028
if (intOp(OP_B(inst)) == 0)
1029
{
1030
// Lane vN.s[0] can just be read directly as sN
1031
build.fmov(inst.regA64, castReg(KindA64::s, regOp(OP_A(inst))));
1032
}
1033
else
1034
{
1035
build.dup_4s(inst.regA64, regOp(OP_A(inst)), intOp(OP_B(inst)));
1036
}
1037
break;
1038
}
1039
case IrCmd::NOT_ANY:
1040
{
1041
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
1042
1043
if (OP_A(inst).kind == IrOpKind::Constant)
1044
{
1045
// other cases should've been constant folded
1046
CODEGEN_ASSERT(tagOp(OP_A(inst)) == LUA_TBOOLEAN);
1047
build.eor(inst.regA64, regOp(OP_B(inst)), 1);
1048
}
1049
else
1050
{
1051
Label notBool, exit;
1052
1053
// use the fact that NIL is the only value less than BOOLEAN to do two tag comparisons at once
1054
CODEGEN_ASSERT(LUA_TNIL == 0 && LUA_TBOOLEAN == 1);
1055
build.cmp(regOp(OP_A(inst)), uint16_t(LUA_TBOOLEAN));
1056
build.b(ConditionA64::NotEqual, notBool);
1057
1058
if (OP_B(inst).kind == IrOpKind::Constant)
1059
build.mov(inst.regA64, intOp(OP_B(inst)) == 0 ? 1 : 0);
1060
else
1061
build.eor(inst.regA64, regOp(OP_B(inst)), 1); // boolean => invert value
1062
1063
build.b(exit);
1064
1065
// not boolean => result is true iff tag was nil
1066
build.setLabel(notBool);
1067
build.cset(inst.regA64, ConditionA64::Less);
1068
1069
build.setLabel(exit);
1070
}
1071
break;
1072
}
1073
case IrCmd::CMP_INT:
1074
{
1075
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
1076
1077
IrCondition cond = conditionOp(OP_C(inst));
1078
1079
if (OP_A(inst).kind == IrOpKind::Constant)
1080
{
1081
if (unsigned(intOp(OP_A(inst))) <= AssemblyBuilderA64::kMaxImmediate)
1082
build.cmp(regOp(OP_B(inst)), uint16_t(intOp(OP_A(inst))));
1083
else
1084
build.cmp(regOp(OP_B(inst)), tempInt(OP_A(inst)));
1085
1086
build.cset(inst.regA64, getInverseCondition(getConditionInt(cond)));
1087
}
1088
else if (OP_A(inst).kind == IrOpKind::Inst)
1089
{
1090
if (unsigned(intOp(OP_B(inst))) <= AssemblyBuilderA64::kMaxImmediate)
1091
build.cmp(regOp(OP_A(inst)), uint16_t(intOp(OP_B(inst))));
1092
else
1093
build.cmp(regOp(OP_A(inst)), tempInt(OP_B(inst)));
1094
1095
build.cset(inst.regA64, getConditionInt(cond));
1096
}
1097
else
1098
{
1099
CODEGEN_ASSERT(!"Unsupported instruction form");
1100
}
1101
break;
1102
}
1103
case IrCmd::CMP_ANY:
1104
{
1105
CODEGEN_ASSERT(OP_A(inst).kind == IrOpKind::VmReg && OP_B(inst).kind == IrOpKind::VmReg);
1106
IrCondition cond = conditionOp(OP_C(inst));
1107
1108
Label skip, exit;
1109
1110
// For equality comparison, 'luaV_lessequal' expects tag to be equal before the call
1111
if (cond == IrCondition::Equal)
1112
{
1113
RegisterA64 tempa = regs.allocTemp(KindA64::w);
1114
RegisterA64 tempb = regs.allocTemp(KindA64::w);
1115
1116
build.ldr(tempa, tempAddr(OP_A(inst), offsetof(TValue, tt)));
1117
build.ldr(tempb, tempAddr(OP_B(inst), offsetof(TValue, tt)));
1118
build.cmp(tempa, tempb);
1119
1120
// If the tags are not equal, skip 'luaV_lessequal' call and set result to 0
1121
build.b(ConditionA64::NotEqual, skip);
1122
}
1123
1124
regs.spill(index);
1125
build.mov(x0, rState);
1126
build.add(x1, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
1127
build.add(x2, rBase, uint16_t(vmRegOp(OP_B(inst)) * sizeof(TValue)));
1128
1129
if (cond == IrCondition::LessEqual)
1130
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessequal)));
1131
else if (cond == IrCondition::Less)
1132
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_lessthan)));
1133
else if (cond == IrCondition::Equal)
1134
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_equalval)));
1135
else
1136
CODEGEN_ASSERT(!"Unsupported condition");
1137
1138
build.blr(x3);
1139
1140
emitUpdateBase(build);
1141
1142
inst.regA64 = regs.takeReg(w0, index);
1143
1144
if (cond == IrCondition::Equal)
1145
{
1146
build.b(exit);
1147
build.setLabel(skip);
1148
1149
build.mov(inst.regA64, 0);
1150
build.setLabel(exit);
1151
}
1152
1153
// If case we made a call, skip high register bits clear, only consumer is JUMP_CMP_INT which doesn't read them
1154
break;
1155
}
1156
case IrCmd::CMP_TAG:
1157
{
1158
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
1159
1160
IrCondition cond = conditionOp(OP_C(inst));
1161
CODEGEN_ASSERT(cond == IrCondition::Equal || cond == IrCondition::NotEqual);
1162
RegisterA64 aReg = noreg;
1163
RegisterA64 bReg = noreg;
1164
1165
if (OP_A(inst).kind == IrOpKind::Inst)
1166
{
1167
aReg = regOp(OP_A(inst));
1168
}
1169
else if (OP_A(inst).kind == IrOpKind::VmReg)
1170
{
1171
aReg = regs.allocTemp(KindA64::w);
1172
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, tt));
1173
build.ldr(aReg, addr);
1174
}
1175
else
1176
{
1177
CODEGEN_ASSERT(OP_A(inst).kind == IrOpKind::Constant);
1178
}
1179
1180
if (OP_B(inst).kind == IrOpKind::Inst)
1181
{
1182
bReg = regOp(OP_B(inst));
1183
}
1184
else if (OP_B(inst).kind == IrOpKind::VmReg)
1185
{
1186
bReg = regs.allocTemp(KindA64::w);
1187
AddressA64 addr = tempAddr(OP_B(inst), offsetof(TValue, tt));
1188
build.ldr(bReg, addr);
1189
}
1190
else
1191
{
1192
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::Constant);
1193
}
1194
1195
if (OP_A(inst).kind == IrOpKind::Constant)
1196
{
1197
build.cmp(bReg, uint16_t(tagOp(OP_A(inst))));
1198
build.cset(inst.regA64, getInverseCondition(getConditionInt(cond)));
1199
}
1200
else if (OP_B(inst).kind == IrOpKind::Constant)
1201
{
1202
build.cmp(aReg, uint16_t(tagOp(OP_B(inst))));
1203
build.cset(inst.regA64, getConditionInt(cond));
1204
}
1205
else
1206
{
1207
build.cmp(aReg, bReg);
1208
build.cset(inst.regA64, getConditionInt(cond));
1209
}
1210
break;
1211
}
1212
case IrCmd::CMP_SPLIT_TVALUE:
1213
{
1214
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
1215
1216
// Second operand of this instruction must be a constant
1217
// Without a constant type, we wouldn't know the correct way to compare the values at lowering time
1218
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::Constant);
1219
1220
IrCondition cond = conditionOp(OP_E(inst));
1221
CODEGEN_ASSERT(cond == IrCondition::Equal || cond == IrCondition::NotEqual);
1222
1223
// Check tag equality first
1224
RegisterA64 temp = regs.allocTemp(KindA64::w);
1225
1226
if (OP_A(inst).kind != IrOpKind::Constant)
1227
{
1228
build.cmp(regOp(OP_A(inst)), uint16_t(tagOp(OP_B(inst))));
1229
build.cset(temp, getConditionInt(cond));
1230
}
1231
else
1232
{
1233
// Constant folding had to handle different constant tags
1234
CODEGEN_ASSERT(tagOp(OP_A(inst)) == tagOp(OP_B(inst)));
1235
}
1236
1237
if (tagOp(OP_B(inst)) == LUA_TBOOLEAN)
1238
{
1239
if (OP_C(inst).kind == IrOpKind::Constant)
1240
{
1241
CODEGEN_ASSERT(intOp(OP_C(inst)) == 0 || intOp(OP_C(inst)) == 1);
1242
build.cmp(regOp(OP_D(inst)), uint16_t(intOp(OP_C(inst)))); // swapped arguments
1243
}
1244
else if (OP_D(inst).kind == IrOpKind::Constant)
1245
{
1246
CODEGEN_ASSERT(intOp(OP_D(inst)) == 0 || intOp(OP_D(inst)) == 1);
1247
build.cmp(regOp(OP_C(inst)), uint16_t(intOp(OP_D(inst))));
1248
}
1249
else
1250
{
1251
build.cmp(regOp(OP_C(inst)), regOp(OP_D(inst)));
1252
}
1253
1254
build.cset(inst.regA64, getConditionInt(cond));
1255
}
1256
else if (tagOp(OP_B(inst)) == LUA_TSTRING)
1257
{
1258
build.cmp(regOp(OP_C(inst)), regOp(OP_D(inst)));
1259
build.cset(inst.regA64, getConditionInt(cond));
1260
}
1261
else if (tagOp(OP_B(inst)) == LUA_TNUMBER)
1262
{
1263
RegisterA64 temp1 = tempDouble(OP_C(inst));
1264
RegisterA64 temp2 = tempDouble(OP_D(inst));
1265
1266
build.fcmp(temp1, temp2);
1267
build.cset(inst.regA64, getConditionFP(cond));
1268
}
1269
else
1270
{
1271
CODEGEN_ASSERT(!"unsupported type tag in CMP_SPLIT_TVALUE");
1272
}
1273
1274
if (OP_A(inst).kind != IrOpKind::Constant)
1275
{
1276
if (cond == IrCondition::Equal)
1277
build.and_(inst.regA64, inst.regA64, temp);
1278
else
1279
build.orr(inst.regA64, inst.regA64, temp);
1280
}
1281
break;
1282
}
1283
case IrCmd::JUMP:
1284
if (OP_A(inst).kind == IrOpKind::Undef || OP_A(inst).kind == IrOpKind::VmExit)
1285
{
1286
Label fresh;
1287
build.b(getTargetLabel(OP_A(inst), fresh));
1288
finalizeTargetLabel(OP_A(inst), fresh);
1289
}
1290
else
1291
{
1292
jumpOrFallthrough(blockOp(OP_A(inst)), next);
1293
}
1294
break;
1295
case IrCmd::JUMP_IF_TRUTHY:
1296
{
1297
RegisterA64 temp = regs.allocTemp(KindA64::w);
1298
build.ldr(temp, mem(rBase, vmRegOp(OP_A(inst)) * sizeof(TValue) + offsetof(TValue, tt)));
1299
// nil => falsy
1300
CODEGEN_ASSERT(LUA_TNIL == 0);
1301
build.cbz(temp, labelOp(OP_C(inst)));
1302
// not boolean => truthy
1303
build.cmp(temp, uint16_t(LUA_TBOOLEAN));
1304
build.b(ConditionA64::NotEqual, labelOp(OP_B(inst)));
1305
// compare boolean value
1306
build.ldr(temp, mem(rBase, vmRegOp(OP_A(inst)) * sizeof(TValue) + offsetof(TValue, value)));
1307
build.cbnz(temp, labelOp(OP_B(inst)));
1308
jumpOrFallthrough(blockOp(OP_C(inst)), next);
1309
break;
1310
}
1311
case IrCmd::JUMP_IF_FALSY:
1312
{
1313
RegisterA64 temp = regs.allocTemp(KindA64::w);
1314
build.ldr(temp, mem(rBase, vmRegOp(OP_A(inst)) * sizeof(TValue) + offsetof(TValue, tt)));
1315
// nil => falsy
1316
CODEGEN_ASSERT(LUA_TNIL == 0);
1317
build.cbz(temp, labelOp(OP_B(inst)));
1318
// not boolean => truthy
1319
build.cmp(temp, uint16_t(LUA_TBOOLEAN));
1320
build.b(ConditionA64::NotEqual, labelOp(OP_C(inst)));
1321
// compare boolean value
1322
build.ldr(temp, mem(rBase, vmRegOp(OP_A(inst)) * sizeof(TValue) + offsetof(TValue, value)));
1323
build.cbz(temp, labelOp(OP_B(inst)));
1324
jumpOrFallthrough(blockOp(OP_C(inst)), next);
1325
break;
1326
}
1327
case IrCmd::JUMP_EQ_TAG:
1328
{
1329
RegisterA64 zr = noreg;
1330
RegisterA64 aReg = noreg;
1331
RegisterA64 bReg = noreg;
1332
1333
if (OP_A(inst).kind == IrOpKind::Inst)
1334
{
1335
aReg = regOp(OP_A(inst));
1336
}
1337
else if (OP_A(inst).kind == IrOpKind::VmReg)
1338
{
1339
aReg = regs.allocTemp(KindA64::w);
1340
AddressA64 addr = tempAddr(OP_A(inst), offsetof(TValue, tt));
1341
build.ldr(aReg, addr);
1342
}
1343
else
1344
{
1345
CODEGEN_ASSERT(OP_A(inst).kind == IrOpKind::Constant);
1346
}
1347
1348
if (OP_B(inst).kind == IrOpKind::Inst)
1349
{
1350
bReg = regOp(OP_B(inst));
1351
}
1352
else if (OP_B(inst).kind == IrOpKind::VmReg)
1353
{
1354
bReg = regs.allocTemp(KindA64::w);
1355
AddressA64 addr = tempAddr(OP_B(inst), offsetof(TValue, tt));
1356
build.ldr(bReg, addr);
1357
}
1358
else
1359
{
1360
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::Constant);
1361
}
1362
1363
if (OP_A(inst).kind == IrOpKind::Constant && tagOp(OP_A(inst)) == 0)
1364
zr = bReg;
1365
else if (OP_B(inst).kind == IrOpKind::Constant && tagOp(OP_B(inst)) == 0)
1366
zr = aReg;
1367
else if (OP_B(inst).kind == IrOpKind::Constant)
1368
build.cmp(aReg, uint16_t(tagOp(OP_B(inst))));
1369
else if (OP_A(inst).kind == IrOpKind::Constant)
1370
build.cmp(bReg, uint16_t(tagOp(OP_A(inst))));
1371
else
1372
build.cmp(aReg, bReg);
1373
1374
if (isFallthroughBlock(blockOp(OP_D(inst)), next))
1375
{
1376
if (zr != noreg)
1377
build.cbz(zr, labelOp(OP_C(inst)));
1378
else
1379
build.b(ConditionA64::Equal, labelOp(OP_C(inst)));
1380
jumpOrFallthrough(blockOp(OP_D(inst)), next);
1381
}
1382
else
1383
{
1384
if (zr != noreg)
1385
build.cbnz(zr, labelOp(OP_D(inst)));
1386
else
1387
build.b(ConditionA64::NotEqual, labelOp(OP_D(inst)));
1388
jumpOrFallthrough(blockOp(OP_C(inst)), next);
1389
}
1390
break;
1391
}
1392
case IrCmd::JUMP_CMP_INT:
1393
{
1394
IrCondition cond = conditionOp(OP_C(inst));
1395
1396
if (cond == IrCondition::Equal && intOp(OP_B(inst)) == 0)
1397
{
1398
build.cbz(regOp(OP_A(inst)), labelOp(OP_D(inst)));
1399
}
1400
else if (cond == IrCondition::NotEqual && intOp(OP_B(inst)) == 0)
1401
{
1402
build.cbnz(regOp(OP_A(inst)), labelOp(OP_D(inst)));
1403
}
1404
else
1405
{
1406
CODEGEN_ASSERT(unsigned(intOp(OP_B(inst))) <= AssemblyBuilderA64::kMaxImmediate);
1407
build.cmp(regOp(OP_A(inst)), uint16_t(intOp(OP_B(inst))));
1408
build.b(getConditionInt(cond), labelOp(OP_D(inst)));
1409
}
1410
jumpOrFallthrough(blockOp(OP_E(inst)), next);
1411
break;
1412
}
1413
case IrCmd::JUMP_EQ_POINTER:
1414
build.cmp(regOp(OP_A(inst)), regOp(OP_B(inst)));
1415
build.b(ConditionA64::Equal, labelOp(OP_C(inst)));
1416
jumpOrFallthrough(blockOp(OP_D(inst)), next);
1417
break;
1418
case IrCmd::JUMP_CMP_NUM:
1419
{
1420
IrCondition cond = conditionOp(OP_C(inst));
1421
1422
if (OP_B(inst).kind == IrOpKind::Constant && doubleOp(OP_B(inst)) == 0.0)
1423
{
1424
RegisterA64 temp = tempDouble(OP_A(inst));
1425
1426
build.fcmpz(temp);
1427
}
1428
else
1429
{
1430
RegisterA64 temp1 = tempDouble(OP_A(inst));
1431
RegisterA64 temp2 = tempDouble(OP_B(inst));
1432
1433
build.fcmp(temp1, temp2);
1434
}
1435
1436
build.b(getConditionFP(cond), labelOp(OP_D(inst)));
1437
jumpOrFallthrough(blockOp(OP_E(inst)), next);
1438
break;
1439
}
1440
case IrCmd::JUMP_CMP_FLOAT:
1441
{
1442
IrCondition cond = conditionOp(OP_C(inst));
1443
1444
if (OP_B(inst).kind == IrOpKind::Constant && float(doubleOp(OP_B(inst))) == 0.0f)
1445
{
1446
RegisterA64 temp = tempFloat(OP_A(inst));
1447
1448
build.fcmpz(temp);
1449
}
1450
else
1451
{
1452
RegisterA64 temp1 = tempFloat(OP_A(inst));
1453
RegisterA64 temp2 = tempFloat(OP_B(inst));
1454
1455
build.fcmp(temp1, temp2);
1456
}
1457
1458
build.b(getConditionFP(cond), labelOp(OP_D(inst)));
1459
jumpOrFallthrough(blockOp(OP_E(inst)), next);
1460
break;
1461
}
1462
case IrCmd::JUMP_FORN_LOOP_COND:
1463
{
1464
RegisterA64 index = tempDouble(OP_A(inst));
1465
RegisterA64 limit = tempDouble(OP_B(inst));
1466
RegisterA64 step = tempDouble(OP_C(inst));
1467
1468
Label direct;
1469
1470
// step > 0
1471
build.fcmpz(step);
1472
build.b(getConditionFP(IrCondition::Greater), direct);
1473
1474
// !(limit <= index)
1475
build.fcmp(limit, index);
1476
build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(OP_E(inst)));
1477
build.b(labelOp(OP_D(inst)));
1478
1479
// !(index <= limit)
1480
build.setLabel(direct);
1481
1482
build.fcmp(index, limit);
1483
build.b(getConditionFP(IrCondition::NotLessEqual), labelOp(OP_E(inst)));
1484
jumpOrFallthrough(blockOp(OP_D(inst)), next);
1485
break;
1486
}
1487
// IrCmd::JUMP_SLOT_MATCH implemented below
1488
case IrCmd::TABLE_LEN:
1489
{
1490
RegisterA64 reg = regOp(OP_A(inst)); // note: we need to call regOp before spill so that we don't do redundant reloads
1491
regs.spill(index, {reg});
1492
build.mov(x0, reg);
1493
build.ldr(x1, mem(rNativeContext, offsetof(NativeContext, luaH_getn)));
1494
build.blr(x1);
1495
1496
inst.regA64 = regs.takeReg(w0, index);
1497
1498
build.ubfx(inst.regA64, inst.regA64, 0, 32); // Ensure high register bits are cleared
1499
break;
1500
}
1501
case IrCmd::STRING_LEN:
1502
{
1503
inst.regA64 = regs.allocReg(KindA64::w, index);
1504
1505
build.ldr(inst.regA64, mem(regOp(OP_A(inst)), offsetof(TString, len)));
1506
break;
1507
}
1508
case IrCmd::TABLE_SETNUM:
1509
{
1510
// note: we need to call regOp before spill so that we don't do redundant reloads
1511
RegisterA64 table = regOp(OP_A(inst));
1512
RegisterA64 key = regOp(OP_B(inst));
1513
RegisterA64 temp = regs.allocTemp(KindA64::w);
1514
1515
regs.spill(index, {table, key});
1516
1517
if (w1 != key)
1518
{
1519
build.mov(x1, table);
1520
build.mov(w2, key);
1521
}
1522
else
1523
{
1524
build.mov(temp, w1);
1525
build.mov(x1, table);
1526
build.mov(w2, temp);
1527
}
1528
1529
build.mov(x0, rState);
1530
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_setnum)));
1531
build.blr(x3);
1532
inst.regA64 = regs.takeReg(x0, index);
1533
break;
1534
}
1535
case IrCmd::NEW_TABLE:
1536
{
1537
regs.spill(index);
1538
build.mov(x0, rState);
1539
build.mov(x1, uintOp(OP_A(inst)));
1540
build.mov(x2, uintOp(OP_B(inst)));
1541
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaH_new)));
1542
build.blr(x3);
1543
inst.regA64 = regs.takeReg(x0, index);
1544
break;
1545
}
1546
case IrCmd::DUP_TABLE:
1547
{
1548
RegisterA64 reg = regOp(OP_A(inst)); // note: we need to call regOp before spill so that we don't do redundant reloads
1549
regs.spill(index, {reg});
1550
build.mov(x1, reg);
1551
build.mov(x0, rState);
1552
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaH_clone)));
1553
build.blr(x2);
1554
inst.regA64 = regs.takeReg(x0, index);
1555
break;
1556
}
1557
case IrCmd::TRY_NUM_TO_INDEX:
1558
{
1559
inst.regA64 = regs.allocReg(KindA64::w, index);
1560
RegisterA64 temp1 = tempDouble(OP_A(inst));
1561
1562
if (build.features & Feature_JSCVT)
1563
{
1564
build.fjcvtzs(inst.regA64, temp1); // fjcvtzs sets PSTATE.Z (equal) iff conversion is exact
1565
build.b(ConditionA64::NotEqual, labelOp(OP_B(inst)));
1566
}
1567
else
1568
{
1569
RegisterA64 temp2 = regs.allocTemp(KindA64::d);
1570
1571
build.fcvtzs(inst.regA64, temp1);
1572
build.scvtf(temp2, inst.regA64);
1573
build.fcmp(temp1, temp2);
1574
build.b(ConditionA64::NotEqual, labelOp(OP_B(inst)));
1575
}
1576
break;
1577
}
1578
case IrCmd::TRY_CALL_FASTGETTM:
1579
{
1580
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
1581
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
1582
1583
build.ldr(temp1, mem(regOp(OP_A(inst)), offsetof(LuaTable, metatable)));
1584
build.cbz(temp1, labelOp(OP_C(inst))); // no metatable
1585
1586
build.ldrb(temp2, mem(temp1, offsetof(LuaTable, tmcache)));
1587
build.tst(temp2, 1 << intOp(OP_B(inst))); // can't use tbz/tbnz because their jump offsets are too short
1588
build.b(ConditionA64::NotEqual, labelOp(OP_C(inst))); // Equal = Zero after tst; tmcache caches *absence* of metamethods
1589
1590
regs.spill(index, {temp1});
1591
build.mov(x0, temp1);
1592
build.mov(w1, intOp(OP_B(inst)));
1593
build.ldr(x2, mem(rGlobalState, offsetof(global_State, tmname) + intOp(OP_B(inst)) * sizeof(TString*)));
1594
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaT_gettm)));
1595
build.blr(x3);
1596
1597
build.cbz(x0, labelOp(OP_C(inst))); // no tag method
1598
1599
inst.regA64 = regs.takeReg(x0, index);
1600
break;
1601
}
1602
case IrCmd::NEW_USERDATA:
1603
{
1604
regs.spill(index);
1605
build.mov(x0, rState);
1606
build.mov(x1, intOp(OP_A(inst)));
1607
build.mov(x2, intOp(OP_B(inst)));
1608
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, newUserdata)));
1609
build.blr(x3);
1610
inst.regA64 = regs.takeReg(x0, index);
1611
break;
1612
}
1613
case IrCmd::INT_TO_NUM:
1614
{
1615
inst.regA64 = regs.allocReg(KindA64::d, index);
1616
RegisterA64 temp = tempInt(OP_A(inst));
1617
build.scvtf(inst.regA64, temp);
1618
break;
1619
}
1620
case IrCmd::UINT_TO_NUM:
1621
{
1622
inst.regA64 = regs.allocReg(KindA64::d, index);
1623
RegisterA64 temp = tempInt(OP_A(inst));
1624
build.ucvtf(inst.regA64, temp);
1625
break;
1626
}
1627
case IrCmd::UINT_TO_FLOAT:
1628
{
1629
inst.regA64 = regs.allocReg(KindA64::s, index);
1630
RegisterA64 temp = tempInt(OP_A(inst));
1631
build.ucvtf(inst.regA64, temp);
1632
break;
1633
}
1634
case IrCmd::NUM_TO_INT:
1635
{
1636
inst.regA64 = regs.allocReg(KindA64::w, index);
1637
RegisterA64 temp = tempDouble(OP_A(inst));
1638
build.fcvtzs(inst.regA64, temp);
1639
break;
1640
}
1641
case IrCmd::NUM_TO_UINT:
1642
{
1643
inst.regA64 = regs.allocReg(KindA64::w, index);
1644
RegisterA64 temp = tempDouble(OP_A(inst));
1645
// note: we don't use fcvtzu for consistency with C++ code
1646
build.fcvtzs(castReg(KindA64::x, inst.regA64), temp);
1647
break;
1648
}
1649
case IrCmd::FLOAT_TO_NUM:
1650
inst.regA64 = regs.allocReg(KindA64::d, index);
1651
1652
build.fcvt(inst.regA64, regOp(OP_A(inst)));
1653
break;
1654
case IrCmd::NUM_TO_FLOAT:
1655
inst.regA64 = regs.allocReg(KindA64::s, index);
1656
1657
build.fcvt(inst.regA64, regOp(OP_A(inst)));
1658
break;
1659
case IrCmd::FLOAT_TO_VEC:
1660
{
1661
inst.regA64 = regs.allocReg(KindA64::q, index);
1662
1663
if (OP_A(inst).kind == IrOpKind::Constant)
1664
{
1665
float value = float(doubleOp(OP_A(inst)));
1666
uint32_t asU32;
1667
static_assert(sizeof(asU32) == sizeof(value), "Expecting float to be 32-bit");
1668
memcpy(&asU32, &value, sizeof(value));
1669
1670
if (AssemblyBuilderA64::isFmovSupportedFp32(value))
1671
{
1672
build.fmov(inst.regA64, value);
1673
}
1674
else
1675
{
1676
RegisterA64 temp = regs.allocTemp(KindA64::x);
1677
1678
uint32_t vec[4] = {asU32, asU32, asU32, 0};
1679
build.adr(temp, vec, sizeof(vec));
1680
build.ldr(inst.regA64, temp);
1681
}
1682
}
1683
else
1684
{
1685
RegisterA64 temp = tempFloat(OP_A(inst));
1686
1687
build.dup_4s(inst.regA64, castReg(KindA64::q, temp), 0);
1688
}
1689
break;
1690
}
1691
case IrCmd::TAG_VECTOR:
1692
{
1693
inst.regA64 = regs.allocReuse(KindA64::q, index, {OP_A(inst)});
1694
1695
RegisterA64 reg = regOp(OP_A(inst));
1696
RegisterA64 tempw = regs.allocTemp(KindA64::w);
1697
1698
if (inst.regA64 != reg)
1699
build.mov(inst.regA64, reg);
1700
1701
build.mov(tempw, LUA_TVECTOR);
1702
build.ins_4s(inst.regA64, tempw, 3);
1703
break;
1704
}
1705
case IrCmd::TRUNCATE_UINT:
1706
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst)});
1707
1708
build.ubfx(castReg(KindA64::x, inst.regA64), castReg(KindA64::x, regOp(OP_A(inst))), 0, 32); // explicit uxtw
1709
break;
1710
case IrCmd::ADJUST_STACK_TO_REG:
1711
{
1712
RegisterA64 temp = regs.allocTemp(KindA64::x);
1713
1714
if (OP_B(inst).kind == IrOpKind::Constant)
1715
{
1716
build.add(temp, rBase, uint16_t((vmRegOp(OP_A(inst)) + intOp(OP_B(inst))) * sizeof(TValue)));
1717
build.str(temp, mem(rState, offsetof(lua_State, top)));
1718
}
1719
else if (OP_B(inst).kind == IrOpKind::Inst)
1720
{
1721
build.add(temp, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
1722
build.add(temp, temp, regOp(OP_B(inst)), kTValueSizeLog2); // implicit uxtw
1723
build.str(temp, mem(rState, offsetof(lua_State, top)));
1724
}
1725
else
1726
CODEGEN_ASSERT(!"Unsupported instruction form");
1727
break;
1728
}
1729
case IrCmd::ADJUST_STACK_TO_TOP:
1730
{
1731
RegisterA64 temp = regs.allocTemp(KindA64::x);
1732
build.ldr(temp, mem(rState, offsetof(lua_State, ci)));
1733
build.ldr(temp, mem(temp, offsetof(CallInfo, top)));
1734
build.str(temp, mem(rState, offsetof(lua_State, top)));
1735
break;
1736
}
1737
case IrCmd::FASTCALL:
1738
regs.spill(index);
1739
1740
error |= !emitBuiltin(build, function, regs, uintOp(OP_A(inst)), vmRegOp(OP_B(inst)), vmRegOp(OP_C(inst)), intOp(OP_D(inst)));
1741
break;
1742
case IrCmd::INVOKE_FASTCALL:
1743
{
1744
// We might need a temporary and we have to preserve it over the spill
1745
RegisterA64 temp = regs.allocTemp(KindA64::q);
1746
regs.spill(index, {temp});
1747
1748
build.mov(x0, rState);
1749
build.add(x1, rBase, uint16_t(vmRegOp(OP_B(inst)) * sizeof(TValue)));
1750
build.add(x2, rBase, uint16_t(vmRegOp(OP_C(inst)) * sizeof(TValue)));
1751
build.mov(w3, intOp(OP_G(inst))); // nresults
1752
1753
// 'E' argument can only be produced by LOP_FASTCALL3 lowering
1754
if (OP_E(inst).kind != IrOpKind::Undef)
1755
{
1756
CODEGEN_ASSERT(intOp(OP_F(inst)) == 3);
1757
1758
build.ldr(x4, mem(rState, offsetof(lua_State, top)));
1759
1760
build.ldr(temp, mem(rBase, vmRegOp(OP_D(inst)) * sizeof(TValue)));
1761
build.str(temp, mem(x4, 0));
1762
1763
build.ldr(temp, mem(rBase, vmRegOp(OP_E(inst)) * sizeof(TValue)));
1764
build.str(temp, mem(x4, sizeof(TValue)));
1765
}
1766
else
1767
{
1768
if (OP_D(inst).kind == IrOpKind::VmReg)
1769
build.add(x4, rBase, uint16_t(vmRegOp(OP_D(inst)) * sizeof(TValue)));
1770
else if (OP_D(inst).kind == IrOpKind::VmConst)
1771
emitAddOffset(build, x4, rConstants, vmConstOp(OP_D(inst)) * sizeof(TValue));
1772
else
1773
CODEGEN_ASSERT(OP_D(inst).kind == IrOpKind::Undef);
1774
}
1775
1776
// nparams
1777
if (intOp(OP_F(inst)) == LUA_MULTRET)
1778
{
1779
// L->top - (ra + 1)
1780
build.ldr(x5, mem(rState, offsetof(lua_State, top)));
1781
build.sub(x5, x5, rBase);
1782
build.sub(x5, x5, uint16_t((vmRegOp(OP_B(inst)) + 1) * sizeof(TValue)));
1783
build.lsr(x5, x5, kTValueSizeLog2);
1784
}
1785
else
1786
build.mov(w5, intOp(OP_F(inst)));
1787
1788
build.ldr(x6, mem(rNativeContext, offsetof(NativeContext, luauF_table) + uintOp(OP_A(inst)) * sizeof(luau_FastFunction)));
1789
build.blr(x6);
1790
1791
inst.regA64 = regs.takeReg(w0, index);
1792
// Skipping high register bits clear, only consumer is CHECK_FASTCALL_RES which doesn't read them
1793
break;
1794
}
1795
case IrCmd::CHECK_FASTCALL_RES:
1796
build.cmp(regOp(OP_A(inst)), uint16_t(0));
1797
build.b(ConditionA64::Less, labelOp(OP_B(inst)));
1798
break;
1799
case IrCmd::DO_ARITH:
1800
regs.spill(index);
1801
build.mov(x0, rState);
1802
build.add(x1, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
1803
1804
if (OP_B(inst).kind == IrOpKind::VmConst)
1805
emitAddOffset(build, x2, rConstants, vmConstOp(OP_B(inst)) * sizeof(TValue));
1806
else
1807
build.add(x2, rBase, uint16_t(vmRegOp(OP_B(inst)) * sizeof(TValue)));
1808
1809
if (OP_C(inst).kind == IrOpKind::VmConst)
1810
emitAddOffset(build, x3, rConstants, vmConstOp(OP_C(inst)) * sizeof(TValue));
1811
else
1812
build.add(x3, rBase, uint16_t(vmRegOp(OP_C(inst)) * sizeof(TValue)));
1813
1814
switch (TMS(intOp(OP_D(inst))))
1815
{
1816
case TM_ADD:
1817
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithadd)));
1818
break;
1819
case TM_SUB:
1820
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithsub)));
1821
break;
1822
case TM_MUL:
1823
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmul)));
1824
break;
1825
case TM_DIV:
1826
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithdiv)));
1827
break;
1828
case TM_IDIV:
1829
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithidiv)));
1830
break;
1831
case TM_MOD:
1832
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithmod)));
1833
break;
1834
case TM_POW:
1835
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithpow)));
1836
break;
1837
case TM_UNM:
1838
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_doarithunm)));
1839
break;
1840
default:
1841
CODEGEN_ASSERT(!"Invalid doarith helper operation tag");
1842
break;
1843
}
1844
1845
build.blr(x4);
1846
1847
emitUpdateBase(build);
1848
break;
1849
case IrCmd::DO_LEN:
1850
regs.spill(index);
1851
build.mov(x0, rState);
1852
build.add(x1, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
1853
build.add(x2, rBase, uint16_t(vmRegOp(OP_B(inst)) * sizeof(TValue)));
1854
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_dolen)));
1855
build.blr(x3);
1856
1857
emitUpdateBase(build);
1858
break;
1859
case IrCmd::GET_TABLE:
1860
regs.spill(index);
1861
build.mov(x0, rState);
1862
build.add(x1, rBase, uint16_t(vmRegOp(OP_B(inst)) * sizeof(TValue)));
1863
1864
if (OP_C(inst).kind == IrOpKind::VmReg)
1865
build.add(x2, rBase, uint16_t(vmRegOp(OP_C(inst)) * sizeof(TValue)));
1866
else if (OP_C(inst).kind == IrOpKind::Constant)
1867
{
1868
TValue n = {};
1869
setnvalue(&n, uintOp(OP_C(inst)));
1870
build.adr(x2, &n, sizeof(n));
1871
}
1872
else
1873
CODEGEN_ASSERT(!"Unsupported instruction form");
1874
1875
build.add(x3, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
1876
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_gettable)));
1877
build.blr(x4);
1878
1879
emitUpdateBase(build);
1880
break;
1881
case IrCmd::SET_TABLE:
1882
regs.spill(index);
1883
build.mov(x0, rState);
1884
build.add(x1, rBase, uint16_t(vmRegOp(OP_B(inst)) * sizeof(TValue)));
1885
1886
if (OP_C(inst).kind == IrOpKind::VmReg)
1887
build.add(x2, rBase, uint16_t(vmRegOp(OP_C(inst)) * sizeof(TValue)));
1888
else if (OP_C(inst).kind == IrOpKind::Constant)
1889
{
1890
TValue n = {};
1891
setnvalue(&n, uintOp(OP_C(inst)));
1892
build.adr(x2, &n, sizeof(n));
1893
}
1894
else
1895
CODEGEN_ASSERT(!"Unsupported instruction form");
1896
1897
build.add(x3, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
1898
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaV_settable)));
1899
build.blr(x4);
1900
1901
emitUpdateBase(build);
1902
break;
1903
case IrCmd::GET_CACHED_IMPORT:
1904
{
1905
regs.spill(index);
1906
1907
Label skip, exit;
1908
1909
RegisterA64 tempTag = regs.allocTemp(KindA64::w);
1910
1911
AddressA64 addrConstTag = tempAddr(OP_B(inst), offsetof(TValue, tt));
1912
build.ldr(tempTag, addrConstTag);
1913
1914
// If the constant for the import is set, we will use it directly, otherwise we have to call an import path lookup function
1915
CODEGEN_ASSERT(LUA_TNIL == 0);
1916
build.cbnz(tempTag, skip);
1917
1918
{
1919
build.mov(x0, rState);
1920
build.add(x1, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
1921
build.mov(w2, importOp(OP_C(inst)));
1922
build.mov(w3, uintOp(OP_D(inst)));
1923
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, getImport)));
1924
build.blr(x4);
1925
1926
emitUpdateBase(build);
1927
build.b(exit);
1928
}
1929
1930
build.setLabel(skip);
1931
1932
RegisterA64 tempTv = regs.allocTemp(KindA64::q);
1933
1934
AddressA64 addrConst = tempAddr(OP_B(inst), 0);
1935
build.ldr(tempTv, addrConst);
1936
1937
AddressA64 addrReg = tempAddr(OP_A(inst), 0);
1938
build.str(tempTv, addrReg);
1939
1940
build.setLabel(exit);
1941
break;
1942
}
1943
case IrCmd::CONCAT:
1944
regs.spill(index);
1945
build.mov(x0, rState);
1946
build.mov(w1, uintOp(OP_B(inst)));
1947
build.mov(w2, vmRegOp(OP_A(inst)) + uintOp(OP_B(inst)) - 1);
1948
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaV_concat)));
1949
build.blr(x3);
1950
1951
emitUpdateBase(build);
1952
break;
1953
case IrCmd::GET_UPVALUE:
1954
{
1955
inst.regA64 = regs.allocReg(KindA64::q, index);
1956
1957
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
1958
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
1959
1960
build.add(temp1, rClosure, uint16_t(offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(OP_A(inst))));
1961
1962
// uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value
1963
Label skip;
1964
build.ldr(temp2, mem(temp1, offsetof(TValue, tt)));
1965
build.cmp(temp2, uint16_t(LUA_TUPVAL));
1966
build.b(ConditionA64::NotEqual, skip);
1967
1968
// UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally)
1969
build.ldr(temp1, mem(temp1, offsetof(TValue, value.gc)));
1970
build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));
1971
1972
build.setLabel(skip);
1973
1974
build.ldr(inst.regA64, temp1);
1975
break;
1976
}
1977
case IrCmd::SET_UPVALUE:
1978
{
1979
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
1980
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
1981
1982
// UpVal*
1983
build.ldr(temp1, mem(rClosure, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(OP_A(inst)) + offsetof(TValue, value.gc)));
1984
1985
build.ldr(temp2, mem(temp1, offsetof(UpVal, v)));
1986
build.str(regOp(OP_B(inst)), temp2);
1987
1988
if (OP_C(inst).kind == IrOpKind::Undef || isGCO(tagOp(OP_C(inst))))
1989
{
1990
RegisterA64 value = regOp(OP_B(inst));
1991
1992
Label skip;
1993
checkObjectBarrierConditions(temp1, temp2, value, OP_B(inst), OP_C(inst).kind == IrOpKind::Undef ? -1 : tagOp(OP_C(inst)), skip);
1994
1995
size_t spills = regs.spill(index, {temp1, value});
1996
1997
build.mov(x1, temp1);
1998
build.mov(x0, rState);
1999
build.fmov(x2, castReg(KindA64::d, value));
2000
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
2001
build.blr(x3);
2002
2003
regs.restore(spills); // need to restore before skip so that registers are in a consistent state
2004
2005
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
2006
build.setLabel(skip);
2007
}
2008
break;
2009
}
2010
case IrCmd::CHECK_TAG:
2011
{
2012
Label fresh; // used when guard aborts execution or jumps to a VM exit
2013
Label& fail = getTargetLabel(OP_C(inst), fresh);
2014
2015
if (tagOp(OP_B(inst)) == 0)
2016
{
2017
build.cbnz(regOp(OP_A(inst)), fail);
2018
}
2019
else
2020
{
2021
build.cmp(regOp(OP_A(inst)), uint16_t(tagOp(OP_B(inst))));
2022
build.b(ConditionA64::NotEqual, fail);
2023
}
2024
2025
finalizeTargetLabel(OP_C(inst), fresh);
2026
break;
2027
}
2028
case IrCmd::CHECK_TRUTHY:
2029
{
2030
// Constant tags which don't require boolean value check should've been removed in constant folding
2031
CODEGEN_ASSERT(OP_A(inst).kind != IrOpKind::Constant || tagOp(OP_A(inst)) == LUA_TBOOLEAN);
2032
2033
Label fresh; // used when guard aborts execution or jumps to a VM exit
2034
Label& target = getTargetLabel(OP_C(inst), fresh);
2035
2036
Label skip;
2037
2038
if (OP_A(inst).kind != IrOpKind::Constant)
2039
{
2040
// fail to fallback on 'nil' (falsy)
2041
CODEGEN_ASSERT(LUA_TNIL == 0);
2042
build.cbz(regOp(OP_A(inst)), target);
2043
2044
// skip value test if it's not a boolean (truthy)
2045
build.cmp(regOp(OP_A(inst)), uint16_t(LUA_TBOOLEAN));
2046
build.b(ConditionA64::NotEqual, skip);
2047
}
2048
2049
// fail to fallback on 'false' boolean value (falsy)
2050
if (OP_B(inst).kind != IrOpKind::Constant)
2051
{
2052
build.cbz(regOp(OP_B(inst)), target);
2053
}
2054
else
2055
{
2056
if (intOp(OP_B(inst)) == 0)
2057
build.b(target);
2058
}
2059
2060
if (OP_A(inst).kind != IrOpKind::Constant)
2061
build.setLabel(skip);
2062
2063
finalizeTargetLabel(OP_C(inst), fresh);
2064
break;
2065
}
2066
case IrCmd::CHECK_READONLY:
2067
{
2068
Label fresh; // used when guard aborts execution or jumps to a VM exit
2069
RegisterA64 temp = regs.allocTemp(KindA64::w);
2070
build.ldrb(temp, mem(regOp(OP_A(inst)), offsetof(LuaTable, readonly)));
2071
build.cbnz(temp, getTargetLabel(OP_B(inst), fresh));
2072
finalizeTargetLabel(OP_B(inst), fresh);
2073
break;
2074
}
2075
case IrCmd::CHECK_NO_METATABLE:
2076
{
2077
Label fresh; // used when guard aborts execution or jumps to a VM exit
2078
RegisterA64 temp = regs.allocTemp(KindA64::x);
2079
build.ldr(temp, mem(regOp(OP_A(inst)), offsetof(LuaTable, metatable)));
2080
build.cbnz(temp, getTargetLabel(OP_B(inst), fresh));
2081
finalizeTargetLabel(OP_B(inst), fresh);
2082
break;
2083
}
2084
case IrCmd::CHECK_SAFE_ENV:
2085
{
2086
if (FFlag::LuauCodegenBlockSafeEnv)
2087
{
2088
checkSafeEnv(OP_A(inst), next);
2089
}
2090
else
2091
{
2092
Label fresh; // used when guard aborts execution or jumps to a VM exit
2093
RegisterA64 temp = regs.allocTemp(KindA64::x);
2094
RegisterA64 tempw = castReg(KindA64::w, temp);
2095
build.ldr(temp, mem(rClosure, offsetof(Closure, env)));
2096
build.ldrb(tempw, mem(temp, offsetof(LuaTable, safeenv)));
2097
build.cbz(tempw, getTargetLabel(OP_A(inst), fresh));
2098
finalizeTargetLabel(OP_A(inst), fresh);
2099
}
2100
break;
2101
}
2102
case IrCmd::CHECK_ARRAY_SIZE:
2103
{
2104
Label fresh; // used when guard aborts execution or jumps to a VM exit
2105
Label& fail = getTargetLabel(OP_C(inst), fresh);
2106
2107
RegisterA64 temp = regs.allocTemp(KindA64::w);
2108
build.ldr(temp, mem(regOp(OP_A(inst)), offsetof(LuaTable, sizearray)));
2109
2110
if (OP_B(inst).kind == IrOpKind::Inst)
2111
{
2112
build.cmp(temp, regOp(OP_B(inst)));
2113
build.b(ConditionA64::UnsignedLessEqual, fail);
2114
}
2115
else if (OP_B(inst).kind == IrOpKind::Constant)
2116
{
2117
if (intOp(OP_B(inst)) == 0)
2118
{
2119
build.cbz(temp, fail);
2120
}
2121
else if (size_t(intOp(OP_B(inst))) <= AssemblyBuilderA64::kMaxImmediate)
2122
{
2123
build.cmp(temp, uint16_t(intOp(OP_B(inst))));
2124
build.b(ConditionA64::UnsignedLessEqual, fail);
2125
}
2126
else
2127
{
2128
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
2129
build.mov(temp2, intOp(OP_B(inst)));
2130
build.cmp(temp, temp2);
2131
build.b(ConditionA64::UnsignedLessEqual, fail);
2132
}
2133
}
2134
else
2135
CODEGEN_ASSERT(!"Unsupported instruction form");
2136
2137
finalizeTargetLabel(OP_C(inst), fresh);
2138
break;
2139
}
2140
case IrCmd::JUMP_SLOT_MATCH:
2141
case IrCmd::CHECK_SLOT_MATCH:
2142
{
2143
Label abort; // used when guard aborts execution
2144
const IrOp& mismatchOp = inst.cmd == IrCmd::JUMP_SLOT_MATCH ? OP_D(inst) : OP_C(inst);
2145
Label& mismatch = mismatchOp.kind == IrOpKind::Undef ? abort : labelOp(mismatchOp);
2146
2147
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
2148
RegisterA64 temp1w = castReg(KindA64::w, temp1);
2149
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
2150
2151
static_assert(offsetof(LuaNode, key.value) == offsetof(LuaNode, key) && kOffsetOfTKeyTagNext >= 8 && kOffsetOfTKeyTagNext < 16);
2152
build.ldp(
2153
temp1, temp2, mem(regOp(OP_A(inst)), offsetof(LuaNode, key))
2154
); // load key.value into temp1 and key.tt (alongside other bits) into temp2
2155
build.ubfx(temp2, temp2, (kOffsetOfTKeyTagNext - 8) * 8, kTKeyTagBits); // .tt is right before .next, and 8 bytes are skipped by ldp
2156
build.cmp(temp2, uint16_t(LUA_TSTRING));
2157
build.b(ConditionA64::NotEqual, mismatch);
2158
2159
AddressA64 addr = tempAddr(OP_B(inst), offsetof(TValue, value));
2160
build.ldr(temp2, addr);
2161
build.cmp(temp1, temp2);
2162
build.b(ConditionA64::NotEqual, mismatch);
2163
2164
build.ldr(temp1w, mem(regOp(OP_A(inst)), offsetof(LuaNode, val.tt)));
2165
CODEGEN_ASSERT(LUA_TNIL == 0);
2166
build.cbz(temp1w, mismatch);
2167
2168
if (inst.cmd == IrCmd::JUMP_SLOT_MATCH)
2169
jumpOrFallthrough(blockOp(OP_C(inst)), next);
2170
else if (abort.id)
2171
emitAbort(build, abort);
2172
break;
2173
}
2174
case IrCmd::CHECK_NODE_NO_NEXT:
2175
{
2176
Label fresh; // used when guard aborts execution or jumps to a VM exit
2177
RegisterA64 temp = regs.allocTemp(KindA64::w);
2178
2179
build.ldr(temp, mem(regOp(OP_A(inst)), offsetof(LuaNode, key) + kOffsetOfTKeyTagNext));
2180
build.lsr(temp, temp, kTKeyTagBits);
2181
build.cbnz(temp, getTargetLabel(OP_B(inst), fresh));
2182
finalizeTargetLabel(OP_B(inst), fresh);
2183
break;
2184
}
2185
case IrCmd::CHECK_NODE_VALUE:
2186
{
2187
Label fresh; // used when guard aborts execution or jumps to a VM exit
2188
RegisterA64 temp = regs.allocTemp(KindA64::w);
2189
2190
build.ldr(temp, mem(regOp(OP_A(inst)), offsetof(LuaNode, val.tt)));
2191
CODEGEN_ASSERT(LUA_TNIL == 0);
2192
build.cbz(temp, getTargetLabel(OP_B(inst), fresh));
2193
finalizeTargetLabel(OP_B(inst), fresh);
2194
break;
2195
}
2196
case IrCmd::CHECK_BUFFER_LEN:
2197
{
2198
if (FFlag::LuauCodegenBufferRangeMerge4)
2199
{
2200
int minOffset = intOp(OP_C(inst));
2201
int maxOffset = intOp(OP_D(inst));
2202
CODEGEN_ASSERT(minOffset < maxOffset);
2203
CODEGEN_ASSERT(minOffset >= -int(AssemblyBuilderA64::kMaxImmediate) && minOffset <= int(AssemblyBuilderA64::kMaxImmediate));
2204
2205
int accessSize = maxOffset - minOffset;
2206
CODEGEN_ASSERT(accessSize > 0 && accessSize <= int(AssemblyBuilderA64::kMaxImmediate));
2207
2208
Label fresh; // used when guard aborts execution or jumps to a VM exit
2209
Label& target = getTargetLabel(OP_F(inst), fresh);
2210
2211
// Check if we are acting not only as a guard for the size, but as a guard that offset represents an exact integer
2212
if (OP_E(inst).kind != IrOpKind::Undef)
2213
{
2214
CODEGEN_ASSERT(getCmdValueKind(function.instOp(OP_B(inst)).cmd) == IrValueKind::Int);
2215
CODEGEN_ASSERT(!producesDirtyHighRegisterBits(function.instOp(OP_B(inst)).cmd)); // Ensure that high register bits are cleared
2216
2217
if ((build.features & Feature_JSCVT) != 0)
2218
{
2219
RegisterA64 temp = regs.allocTemp(KindA64::w);
2220
2221
build.fjcvtzs(temp, regOp(OP_E(inst))); // fjcvtzs sets PSTATE.Z (equal) iff conversion is exact
2222
build.b(ConditionA64::NotEqual, target);
2223
}
2224
else
2225
{
2226
RegisterA64 temp = regs.allocTemp(KindA64::d);
2227
2228
build.scvtf(temp, regOp(OP_B(inst)));
2229
build.fcmp(regOp(OP_E(inst)), temp);
2230
build.b(ConditionA64::NotEqual, target);
2231
}
2232
}
2233
2234
RegisterA64 temp = regs.allocTemp(KindA64::w);
2235
build.ldr(temp, mem(regOp(OP_A(inst)), offsetof(Buffer, len)));
2236
2237
if (OP_B(inst).kind == IrOpKind::Inst)
2238
{
2239
CODEGEN_ASSERT(!producesDirtyHighRegisterBits(function.instOp(OP_B(inst)).cmd)); // Ensure that high register bits are cleared
2240
2241
if (accessSize == 1 && minOffset == 0)
2242
{
2243
// fails if offset >= len
2244
build.cmp(temp, regOp(OP_B(inst)));
2245
build.b(ConditionA64::UnsignedLessEqual, target);
2246
}
2247
else if (minOffset >= 0 && maxOffset <= int(AssemblyBuilderA64::kMaxImmediate))
2248
{
2249
// fails if offset + size > len; we compute it as len - offset < size
2250
RegisterA64 tempx = castReg(KindA64::x, temp);
2251
build.sub(tempx, tempx, regOp(OP_B(inst))); // implicit uxtw
2252
build.cmp(tempx, uint16_t(maxOffset));
2253
build.b(ConditionA64::Less, target); // note: this is a signed 64-bit comparison so that out of bounds offset fails
2254
}
2255
else
2256
{
2257
RegisterA64 tempx = castReg(KindA64::x, temp);
2258
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
2259
2260
// Get the base offset in 32 bits
2261
if (minOffset >= 0)
2262
build.add(castReg(KindA64::w, temp2), regOp(OP_B(inst)), uint16_t(minOffset));
2263
else
2264
build.sub(castReg(KindA64::w, temp2), regOp(OP_B(inst)), uint16_t(-minOffset));
2265
2266
// fail if uint64_t(uint32_t(offset + minOffset)) + accessSize > length
2267
build.add(temp2, temp2, uint16_t(accessSize));
2268
build.cmp(temp2, tempx);
2269
build.b(ConditionA64::UnsignedGreater, target);
2270
}
2271
}
2272
else if (OP_B(inst).kind == IrOpKind::Constant)
2273
{
2274
int offset = intOp(OP_B(inst));
2275
2276
// Constant folding can take care of it, but for safety we avoid overflow/underflow cases here
2277
if (offset < 0 || unsigned(offset) + unsigned(accessSize) >= unsigned(INT_MAX))
2278
{
2279
build.b(target);
2280
}
2281
else if (offset + accessSize <= int(AssemblyBuilderA64::kMaxImmediate))
2282
{
2283
build.cmp(temp, uint16_t(offset + accessSize));
2284
build.b(ConditionA64::UnsignedLessEqual, target);
2285
}
2286
else
2287
{
2288
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
2289
build.mov(temp2, offset + accessSize);
2290
build.cmp(temp, temp2);
2291
build.b(ConditionA64::UnsignedLessEqual, target);
2292
}
2293
}
2294
else
2295
{
2296
CODEGEN_ASSERT(!"Unsupported instruction form");
2297
}
2298
finalizeTargetLabel(OP_F(inst), fresh);
2299
}
2300
else
2301
{
2302
int accessSize = intOp(OP_C(inst));
2303
CODEGEN_ASSERT(accessSize > 0 && accessSize <= int(AssemblyBuilderA64::kMaxImmediate));
2304
2305
Label fresh; // used when guard aborts execution or jumps to a VM exit
2306
Label& target = getTargetLabel(OP_D(inst), fresh);
2307
2308
RegisterA64 temp = regs.allocTemp(KindA64::w);
2309
build.ldr(temp, mem(regOp(OP_A(inst)), offsetof(Buffer, len)));
2310
2311
if (OP_B(inst).kind == IrOpKind::Inst)
2312
{
2313
CODEGEN_ASSERT(!producesDirtyHighRegisterBits(function.instOp(OP_B(inst)).cmd)); // Ensure that high register bits are cleared
2314
2315
if (accessSize == 1)
2316
{
2317
// fails if offset >= len
2318
build.cmp(temp, regOp(OP_B(inst)));
2319
build.b(ConditionA64::UnsignedLessEqual, target);
2320
}
2321
else
2322
{
2323
// fails if offset + size > len; we compute it as len - offset < size
2324
RegisterA64 tempx = castReg(KindA64::x, temp);
2325
build.sub(tempx, tempx, regOp(OP_B(inst))); // implicit uxtw
2326
build.cmp(tempx, uint16_t(accessSize));
2327
build.b(ConditionA64::Less, target); // note: this is a signed 64-bit comparison so that out of bounds offset fails
2328
}
2329
}
2330
else if (OP_B(inst).kind == IrOpKind::Constant)
2331
{
2332
int offset = intOp(OP_B(inst));
2333
2334
// Constant folding can take care of it, but for safety we avoid overflow/underflow cases here
2335
if (offset < 0 || unsigned(offset) + unsigned(accessSize) >= unsigned(INT_MAX))
2336
{
2337
build.b(target);
2338
}
2339
else if (offset + accessSize <= int(AssemblyBuilderA64::kMaxImmediate))
2340
{
2341
build.cmp(temp, uint16_t(offset + accessSize));
2342
build.b(ConditionA64::UnsignedLessEqual, target);
2343
}
2344
else
2345
{
2346
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
2347
build.mov(temp2, offset + accessSize);
2348
build.cmp(temp, temp2);
2349
build.b(ConditionA64::UnsignedLessEqual, target);
2350
}
2351
}
2352
else
2353
{
2354
CODEGEN_ASSERT(!"Unsupported instruction form");
2355
}
2356
finalizeTargetLabel(OP_D(inst), fresh);
2357
}
2358
break;
2359
}
2360
case IrCmd::CHECK_USERDATA_TAG:
2361
{
2362
CODEGEN_ASSERT(unsigned(intOp(OP_B(inst))) <= AssemblyBuilderA64::kMaxImmediate);
2363
2364
Label fresh; // used when guard aborts execution or jumps to a VM exit
2365
Label& fail = getTargetLabel(OP_C(inst), fresh);
2366
RegisterA64 temp = regs.allocTemp(KindA64::w);
2367
build.ldrb(temp, mem(regOp(OP_A(inst)), offsetof(Udata, tag)));
2368
build.cmp(temp, uint16_t(intOp(OP_B(inst))));
2369
build.b(ConditionA64::NotEqual, fail);
2370
finalizeTargetLabel(OP_C(inst), fresh);
2371
break;
2372
}
2373
case IrCmd::CHECK_CMP_INT:
2374
{
2375
IrCondition cond = conditionOp(OP_C(inst));
2376
2377
Label fresh; // used when guard aborts execution or jumps to a VM exit
2378
Label& fail = getTargetLabel(OP_D(inst), fresh);
2379
2380
if (cond == IrCondition::Equal && intOp(OP_B(inst)) == 0)
2381
{
2382
build.cbnz(regOp(OP_A(inst)), fail);
2383
}
2384
else if (cond == IrCondition::NotEqual && intOp(OP_B(inst)) == 0)
2385
{
2386
build.cbz(regOp(OP_A(inst)), fail);
2387
}
2388
else
2389
{
2390
RegisterA64 tempA = tempInt(OP_A(inst));
2391
2392
if (OP_B(inst).kind == IrOpKind::Constant && unsigned(intOp(OP_B(inst))) <= AssemblyBuilderA64::kMaxImmediate)
2393
build.cmp(tempA, uint16_t(intOp(OP_B(inst))));
2394
else
2395
build.cmp(tempA, tempInt(OP_B(inst)));
2396
2397
build.b(getConditionInt(getNegatedCondition(cond)), fail);
2398
}
2399
finalizeTargetLabel(OP_D(inst), fresh);
2400
break;
2401
}
2402
case IrCmd::INTERRUPT:
2403
{
2404
regs.spill(index);
2405
2406
Label self;
2407
2408
build.ldr(x0, mem(rGlobalState, offsetof(global_State, cb.interrupt)));
2409
build.cbnz(x0, self);
2410
2411
Label next = build.setLabel();
2412
2413
interruptHandlers.push_back({self, uintOp(OP_A(inst)), next});
2414
break;
2415
}
2416
case IrCmd::CHECK_GC:
2417
{
2418
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
2419
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
2420
2421
static_assert(offsetof(global_State, totalbytes) == offsetof(global_State, GCthreshold) + sizeof(global_State::GCthreshold));
2422
Label skip;
2423
build.ldp(temp1, temp2, mem(rGlobalState, offsetof(global_State, GCthreshold)));
2424
build.cmp(temp1, temp2);
2425
build.b(ConditionA64::UnsignedGreater, skip);
2426
2427
size_t spills = regs.spill(index);
2428
2429
build.mov(x0, rState);
2430
build.mov(w1, 1);
2431
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaC_step)));
2432
build.blr(x2);
2433
2434
emitUpdateBase(build);
2435
2436
regs.restore(spills); // need to restore before skip so that registers are in a consistent state
2437
2438
build.setLabel(skip);
2439
break;
2440
}
2441
case IrCmd::BARRIER_OBJ:
2442
{
2443
RegisterA64 temp = regs.allocTemp(KindA64::x);
2444
2445
Label skip;
2446
checkObjectBarrierConditions(regOp(OP_A(inst)), temp, noreg, OP_B(inst), OP_C(inst).kind == IrOpKind::Undef ? -1 : tagOp(OP_C(inst)), skip);
2447
2448
RegisterA64 reg = regOp(OP_A(inst)); // note: we need to call regOp before spill so that we don't do redundant reloads
2449
size_t spills = regs.spill(index, {reg});
2450
build.mov(x1, reg);
2451
build.mov(x0, rState);
2452
build.ldr(x2, mem(rBase, vmRegOp(OP_B(inst)) * sizeof(TValue) + offsetof(TValue, value)));
2453
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierf)));
2454
build.blr(x3);
2455
2456
regs.restore(spills); // need to restore before skip so that registers are in a consistent state
2457
2458
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
2459
build.setLabel(skip);
2460
break;
2461
}
2462
case IrCmd::BARRIER_TABLE_BACK:
2463
{
2464
Label skip;
2465
RegisterA64 temp = regs.allocTemp(KindA64::w);
2466
2467
// isblack(obj2gco(t))
2468
build.ldrb(temp, mem(regOp(OP_A(inst)), offsetof(GCheader, marked)));
2469
build.tbz(temp, BLACKBIT, skip);
2470
2471
RegisterA64 reg = regOp(OP_A(inst)); // note: we need to call regOp before spill so that we don't do redundant reloads
2472
size_t spills = regs.spill(index, {reg});
2473
build.mov(x1, reg);
2474
build.mov(x0, rState);
2475
build.add(x2, x1, uint16_t(offsetof(LuaTable, gclist)));
2476
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barrierback)));
2477
build.blr(x3);
2478
2479
regs.restore(spills); // need to restore before skip so that registers are in a consistent state
2480
2481
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
2482
build.setLabel(skip);
2483
break;
2484
}
2485
case IrCmd::BARRIER_TABLE_FORWARD:
2486
{
2487
RegisterA64 temp = regs.allocTemp(KindA64::x);
2488
2489
Label skip;
2490
checkObjectBarrierConditions(regOp(OP_A(inst)), temp, noreg, OP_B(inst), OP_C(inst).kind == IrOpKind::Undef ? -1 : tagOp(OP_C(inst)), skip);
2491
2492
RegisterA64 reg = regOp(OP_A(inst)); // note: we need to call regOp before spill so that we don't do redundant reloads
2493
AddressA64 addr = tempAddr(OP_B(inst), offsetof(TValue, value));
2494
size_t spills = regs.spill(index, {reg});
2495
build.mov(x1, reg);
2496
build.mov(x0, rState);
2497
build.ldr(x2, addr);
2498
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, luaC_barriertable)));
2499
build.blr(x3);
2500
2501
regs.restore(spills); // need to restore before skip so that registers are in a consistent state
2502
2503
// note: no emitUpdateBase necessary because luaC_ barriers do not reallocate stack
2504
build.setLabel(skip);
2505
break;
2506
}
2507
case IrCmd::SET_SAVEDPC:
2508
{
2509
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
2510
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
2511
2512
emitAddOffset(build, temp1, rCode, uintOp(OP_A(inst)) * sizeof(Instruction));
2513
build.ldr(temp2, mem(rState, offsetof(lua_State, ci)));
2514
build.str(temp1, mem(temp2, offsetof(CallInfo, savedpc)));
2515
break;
2516
}
2517
case IrCmd::CLOSE_UPVALS:
2518
{
2519
Label skip;
2520
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
2521
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
2522
2523
// L->openupval != 0
2524
build.ldr(temp1, mem(rState, offsetof(lua_State, openupval)));
2525
build.cbz(temp1, skip);
2526
2527
// ra <= L->openupval->v
2528
build.ldr(temp1, mem(temp1, offsetof(UpVal, v)));
2529
build.add(temp2, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
2530
build.cmp(temp2, temp1);
2531
build.b(ConditionA64::UnsignedGreater, skip);
2532
2533
size_t spills = regs.spill(index, {temp2});
2534
build.mov(x1, temp2);
2535
build.mov(x0, rState);
2536
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_close)));
2537
build.blr(x2);
2538
2539
regs.restore(spills); // need to restore before skip so that registers are in a consistent state
2540
2541
build.setLabel(skip);
2542
break;
2543
}
2544
case IrCmd::CAPTURE:
2545
// no-op
2546
break;
2547
case IrCmd::SETLIST:
2548
regs.spill(index);
2549
emitFallback(build, offsetof(NativeContext, executeSETLIST), uintOp(OP_A(inst)));
2550
break;
2551
case IrCmd::CALL:
2552
regs.spill(index);
2553
// argtop = (nparams == LUA_MULTRET) ? L->top : ra + 1 + nparams;
2554
if (intOp(OP_B(inst)) == LUA_MULTRET)
2555
build.ldr(x2, mem(rState, offsetof(lua_State, top)));
2556
else
2557
build.add(x2, rBase, uint16_t((vmRegOp(OP_A(inst)) + 1 + intOp(OP_B(inst))) * sizeof(TValue)));
2558
2559
// callFallback(L, ra, argtop, nresults)
2560
build.mov(x0, rState);
2561
build.add(x1, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
2562
build.mov(w3, intOp(OP_C(inst)));
2563
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, callFallback)));
2564
build.blr(x4);
2565
2566
emitUpdateBase(build);
2567
2568
// reentry with x0=closure (NULL implies C function; CALL_FALLBACK_YIELD will trigger exit)
2569
build.cbnz(x0, helpers.continueCall);
2570
break;
2571
case IrCmd::RETURN:
2572
regs.spill(index);
2573
2574
if (function.variadic)
2575
{
2576
build.ldr(x1, mem(rState, offsetof(lua_State, ci)));
2577
build.ldr(x1, mem(x1, offsetof(CallInfo, func)));
2578
}
2579
else if (intOp(OP_B(inst)) != 1)
2580
build.sub(x1, rBase, uint16_t(sizeof(TValue))); // invariant: ci->func + 1 == ci->base for non-variadic frames
2581
2582
if (intOp(OP_B(inst)) == 0)
2583
{
2584
build.mov(w2, 0);
2585
build.b(helpers.return_);
2586
}
2587
else if (intOp(OP_B(inst)) == 1 && !function.variadic)
2588
{
2589
// fast path: minimizes x1 adjustments
2590
// note that we skipped x1 computation for this specific case above
2591
build.ldr(q0, mem(rBase, vmRegOp(OP_A(inst)) * sizeof(TValue)));
2592
build.str(q0, mem(rBase, -int(sizeof(TValue))));
2593
build.mov(x1, rBase);
2594
build.mov(w2, 1);
2595
build.b(helpers.return_);
2596
}
2597
else if (intOp(OP_B(inst)) >= 1 && intOp(OP_B(inst)) <= 3)
2598
{
2599
for (int r = 0; r < intOp(OP_B(inst)); ++r)
2600
{
2601
build.ldr(q0, mem(rBase, (vmRegOp(OP_A(inst)) + r) * sizeof(TValue)));
2602
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
2603
}
2604
build.mov(w2, intOp(OP_B(inst)));
2605
build.b(helpers.return_);
2606
}
2607
else
2608
{
2609
build.mov(w2, 0);
2610
2611
// vali = ra
2612
build.add(x3, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
2613
2614
// valend = (n == LUA_MULTRET) ? L->top : ra + n
2615
if (intOp(OP_B(inst)) == LUA_MULTRET)
2616
build.ldr(x4, mem(rState, offsetof(lua_State, top)));
2617
else
2618
build.add(x4, rBase, uint16_t((vmRegOp(OP_A(inst)) + intOp(OP_B(inst))) * sizeof(TValue)));
2619
2620
Label repeatValueLoop, exitValueLoop;
2621
2622
if (intOp(OP_B(inst)) == LUA_MULTRET)
2623
{
2624
build.cmp(x3, x4);
2625
build.b(ConditionA64::CarrySet, exitValueLoop); // CarrySet == UnsignedGreaterEqual
2626
}
2627
2628
build.setLabel(repeatValueLoop);
2629
build.ldr(q0, mem(x3, sizeof(TValue), AddressKindA64::post));
2630
build.str(q0, mem(x1, sizeof(TValue), AddressKindA64::post));
2631
build.add(w2, w2, uint16_t(1));
2632
build.cmp(x3, x4);
2633
build.b(ConditionA64::CarryClear, repeatValueLoop); // CarryClear == UnsignedLess
2634
2635
build.setLabel(exitValueLoop);
2636
build.b(helpers.return_);
2637
}
2638
break;
2639
case IrCmd::FORGLOOP:
2640
// register layout: ra + 1 = table, ra + 2 = internal index, ra + 3 .. ra + aux = iteration variables
2641
regs.spill(index);
2642
// clear extra variables since we might have more than two
2643
if (intOp(OP_B(inst)) > 2)
2644
{
2645
CODEGEN_ASSERT(LUA_TNIL == 0);
2646
for (int i = 2; i < intOp(OP_B(inst)); ++i)
2647
build.str(wzr, mem(rBase, (vmRegOp(OP_A(inst)) + 3 + i) * sizeof(TValue) + offsetof(TValue, tt)));
2648
}
2649
// we use full iter fallback for now; in the future it could be worthwhile to accelerate array iteration here
2650
build.mov(x0, rState);
2651
build.ldr(x1, mem(rBase, (vmRegOp(OP_A(inst)) + 1) * sizeof(TValue) + offsetof(TValue, value.gc)));
2652
build.ldr(w2, mem(rBase, (vmRegOp(OP_A(inst)) + 2) * sizeof(TValue) + offsetof(TValue, value.p)));
2653
build.add(x3, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
2654
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, forgLoopTableIter)));
2655
build.blr(x4);
2656
// note: no emitUpdateBase necessary because forgLoopTableIter does not reallocate stack
2657
build.cbnz(w0, labelOp(OP_C(inst)));
2658
jumpOrFallthrough(blockOp(OP_D(inst)), next);
2659
break;
2660
case IrCmd::FORGLOOP_FALLBACK:
2661
regs.spill(index);
2662
build.mov(x0, rState);
2663
build.mov(w1, vmRegOp(OP_A(inst)));
2664
build.mov(w2, intOp(OP_B(inst)));
2665
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgLoopNonTableFallback)));
2666
build.blr(x3);
2667
emitUpdateBase(build);
2668
build.cbnz(w0, labelOp(OP_C(inst)));
2669
jumpOrFallthrough(blockOp(OP_D(inst)), next);
2670
break;
2671
case IrCmd::FORGPREP_XNEXT_FALLBACK:
2672
regs.spill(index);
2673
build.mov(x0, rState);
2674
build.add(x1, rBase, uint16_t(vmRegOp(OP_B(inst)) * sizeof(TValue)));
2675
build.mov(w2, uintOp(OP_A(inst)) + 1);
2676
build.ldr(x3, mem(rNativeContext, offsetof(NativeContext, forgPrepXnextFallback)));
2677
build.blr(x3);
2678
// note: no emitUpdateBase necessary because forgLoopNonTableFallback does not reallocate stack
2679
jumpOrFallthrough(blockOp(OP_C(inst)), next);
2680
break;
2681
case IrCmd::COVERAGE:
2682
{
2683
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
2684
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
2685
RegisterA64 temp3 = regs.allocTemp(KindA64::w);
2686
2687
build.mov(temp1, uintOp(OP_A(inst)) * sizeof(Instruction));
2688
build.ldr(temp2, mem(rCode, temp1));
2689
2690
// increments E (high 24 bits); if the result overflows a 23-bit counter, high bit becomes 1
2691
// note: cmp can be eliminated with adds but we aren't concerned with code size for coverage
2692
build.add(temp3, temp2, uint16_t(256));
2693
build.cmp(temp3, uint16_t(0));
2694
build.csel(temp2, temp2, temp3, ConditionA64::Less);
2695
2696
build.str(temp2, mem(rCode, temp1));
2697
break;
2698
}
2699
2700
// Full instruction fallbacks
2701
case IrCmd::FALLBACK_GETGLOBAL:
2702
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2703
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmConst);
2704
2705
regs.spill(index);
2706
emitFallback(build, offsetof(NativeContext, executeGETGLOBAL), uintOp(OP_A(inst)));
2707
break;
2708
case IrCmd::FALLBACK_SETGLOBAL:
2709
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2710
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmConst);
2711
2712
regs.spill(index);
2713
emitFallback(build, offsetof(NativeContext, executeSETGLOBAL), uintOp(OP_A(inst)));
2714
break;
2715
case IrCmd::FALLBACK_GETTABLEKS:
2716
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2717
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmReg);
2718
CODEGEN_ASSERT(OP_D(inst).kind == IrOpKind::VmConst);
2719
2720
regs.spill(index);
2721
emitFallback(build, offsetof(NativeContext, executeGETTABLEKS), uintOp(OP_A(inst)));
2722
break;
2723
case IrCmd::FALLBACK_SETTABLEKS:
2724
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2725
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmReg);
2726
CODEGEN_ASSERT(OP_D(inst).kind == IrOpKind::VmConst);
2727
2728
regs.spill(index);
2729
emitFallback(build, offsetof(NativeContext, executeSETTABLEKS), uintOp(OP_A(inst)));
2730
break;
2731
case IrCmd::FALLBACK_NAMECALL:
2732
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2733
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmReg);
2734
CODEGEN_ASSERT(OP_D(inst).kind == IrOpKind::VmConst);
2735
2736
regs.spill(index);
2737
emitFallback(build, offsetof(NativeContext, executeNAMECALL), uintOp(OP_A(inst)));
2738
break;
2739
case IrCmd::FALLBACK_PREPVARARGS:
2740
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::Constant);
2741
2742
regs.spill(index);
2743
emitFallback(build, offsetof(NativeContext, executePREPVARARGS), uintOp(OP_A(inst)));
2744
break;
2745
case IrCmd::FALLBACK_GETVARARGS:
2746
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2747
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::Constant);
2748
2749
regs.spill(index);
2750
build.mov(x0, rState);
2751
2752
if (intOp(OP_C(inst)) == LUA_MULTRET)
2753
{
2754
emitAddOffset(build, x1, rCode, uintOp(OP_A(inst)) * sizeof(Instruction));
2755
build.mov(x2, rBase);
2756
build.mov(w3, vmRegOp(OP_B(inst)));
2757
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSMultRet)));
2758
build.blr(x4);
2759
2760
emitUpdateBase(build);
2761
}
2762
else
2763
{
2764
build.mov(x1, rBase);
2765
build.mov(w2, vmRegOp(OP_B(inst)));
2766
build.mov(w3, intOp(OP_C(inst)));
2767
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, executeGETVARARGSConst)));
2768
build.blr(x4);
2769
2770
// note: no emitUpdateBase necessary because executeGETVARARGSConst does not reallocate stack
2771
}
2772
break;
2773
case IrCmd::NEWCLOSURE:
2774
{
2775
RegisterA64 reg = regOp(OP_B(inst)); // note: we need to call regOp before spill so that we don't do redundant reloads
2776
2777
regs.spill(index, {reg});
2778
build.mov(x2, reg);
2779
2780
build.mov(x0, rState);
2781
build.mov(w1, uintOp(OP_A(inst)));
2782
2783
build.ldr(x3, mem(rClosure, offsetof(Closure, l.p)));
2784
build.ldr(x3, mem(x3, offsetof(Proto, p)));
2785
2786
unsigned protoIndex = uintOp(OP_C(inst)); // 0..32767
2787
int protoOffset = int(sizeof(Proto*) * protoIndex);
2788
2789
if (protoIndex <= AddressA64::kMaxOffset)
2790
{
2791
build.ldr(x3, mem(x3, protoOffset));
2792
}
2793
else
2794
{
2795
build.mov(x4, protoOffset);
2796
build.ldr(x3, mem(x3, x4));
2797
}
2798
2799
build.ldr(x4, mem(rNativeContext, offsetof(NativeContext, luaF_newLclosure)));
2800
build.blr(x4);
2801
2802
inst.regA64 = regs.takeReg(x0, index);
2803
break;
2804
}
2805
case IrCmd::FALLBACK_DUPCLOSURE:
2806
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2807
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmConst);
2808
2809
regs.spill(index);
2810
emitFallback(build, offsetof(NativeContext, executeDUPCLOSURE), uintOp(OP_A(inst)));
2811
break;
2812
case IrCmd::FALLBACK_FORGPREP:
2813
regs.spill(index);
2814
emitFallback(build, offsetof(NativeContext, executeFORGPREP), uintOp(OP_A(inst)));
2815
jumpOrFallthrough(blockOp(OP_C(inst)), next);
2816
break;
2817
2818
// Pseudo instructions
2819
case IrCmd::NOP:
2820
case IrCmd::SUBSTITUTE:
2821
case IrCmd::MARK_USED:
2822
case IrCmd::MARK_DEAD:
2823
CODEGEN_ASSERT(!"Pseudo instructions should not be lowered");
2824
break;
2825
2826
case IrCmd::BITAND_UINT:
2827
{
2828
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
2829
if (OP_A(inst).kind == IrOpKind::Inst && OP_B(inst).kind == IrOpKind::Constant &&
2830
AssemblyBuilderA64::isMaskSupported(unsigned(intOp(OP_B(inst)))))
2831
build.and_(inst.regA64, regOp(OP_A(inst)), unsigned(intOp(OP_B(inst))));
2832
else
2833
{
2834
RegisterA64 temp1 = tempUint(OP_A(inst));
2835
RegisterA64 temp2 = tempUint(OP_B(inst));
2836
build.and_(inst.regA64, temp1, temp2);
2837
}
2838
break;
2839
}
2840
case IrCmd::BITXOR_UINT:
2841
{
2842
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
2843
if (OP_A(inst).kind == IrOpKind::Inst && OP_B(inst).kind == IrOpKind::Constant &&
2844
AssemblyBuilderA64::isMaskSupported(unsigned(intOp(OP_B(inst)))))
2845
build.eor(inst.regA64, regOp(OP_A(inst)), unsigned(intOp(OP_B(inst))));
2846
else
2847
{
2848
RegisterA64 temp1 = tempUint(OP_A(inst));
2849
RegisterA64 temp2 = tempUint(OP_B(inst));
2850
build.eor(inst.regA64, temp1, temp2);
2851
}
2852
break;
2853
}
2854
case IrCmd::BITOR_UINT:
2855
{
2856
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
2857
if (OP_A(inst).kind == IrOpKind::Inst && OP_B(inst).kind == IrOpKind::Constant &&
2858
AssemblyBuilderA64::isMaskSupported(unsigned(intOp(OP_B(inst)))))
2859
build.orr(inst.regA64, regOp(OP_A(inst)), unsigned(intOp(OP_B(inst))));
2860
else
2861
{
2862
RegisterA64 temp1 = tempUint(OP_A(inst));
2863
RegisterA64 temp2 = tempUint(OP_B(inst));
2864
build.orr(inst.regA64, temp1, temp2);
2865
}
2866
break;
2867
}
2868
case IrCmd::BITNOT_UINT:
2869
{
2870
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst)});
2871
RegisterA64 temp = tempUint(OP_A(inst));
2872
build.mvn_(inst.regA64, temp);
2873
break;
2874
}
2875
case IrCmd::BITLSHIFT_UINT:
2876
{
2877
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
2878
if (OP_A(inst).kind == IrOpKind::Inst && OP_B(inst).kind == IrOpKind::Constant)
2879
build.lsl(inst.regA64, regOp(OP_A(inst)), uint8_t(unsigned(intOp(OP_B(inst))) & 31));
2880
else
2881
{
2882
RegisterA64 temp1 = tempUint(OP_A(inst));
2883
RegisterA64 temp2 = tempUint(OP_B(inst));
2884
build.lsl(inst.regA64, temp1, temp2);
2885
}
2886
break;
2887
}
2888
case IrCmd::BITRSHIFT_UINT:
2889
{
2890
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
2891
if (OP_A(inst).kind == IrOpKind::Inst && OP_B(inst).kind == IrOpKind::Constant)
2892
build.lsr(inst.regA64, regOp(OP_A(inst)), uint8_t(unsigned(intOp(OP_B(inst))) & 31));
2893
else
2894
{
2895
RegisterA64 temp1 = tempUint(OP_A(inst));
2896
RegisterA64 temp2 = tempUint(OP_B(inst));
2897
build.lsr(inst.regA64, temp1, temp2);
2898
}
2899
break;
2900
}
2901
case IrCmd::BITARSHIFT_UINT:
2902
{
2903
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
2904
if (OP_A(inst).kind == IrOpKind::Inst && OP_B(inst).kind == IrOpKind::Constant)
2905
build.asr(inst.regA64, regOp(OP_A(inst)), uint8_t(unsigned(intOp(OP_B(inst))) & 31));
2906
else
2907
{
2908
RegisterA64 temp1 = tempUint(OP_A(inst));
2909
RegisterA64 temp2 = tempUint(OP_B(inst));
2910
build.asr(inst.regA64, temp1, temp2);
2911
}
2912
break;
2913
}
2914
case IrCmd::BITLROTATE_UINT:
2915
{
2916
if (OP_A(inst).kind == IrOpKind::Inst && OP_B(inst).kind == IrOpKind::Constant)
2917
{
2918
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst)});
2919
build.ror(inst.regA64, regOp(OP_A(inst)), uint8_t((32 - unsigned(intOp(OP_B(inst)))) & 31));
2920
}
2921
else
2922
{
2923
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_B(inst)}); // can't reuse a because it would be clobbered by neg
2924
RegisterA64 temp1 = tempUint(OP_A(inst));
2925
RegisterA64 temp2 = tempUint(OP_B(inst));
2926
build.neg(inst.regA64, temp2);
2927
build.ror(inst.regA64, temp1, inst.regA64);
2928
}
2929
break;
2930
}
2931
case IrCmd::BITRROTATE_UINT:
2932
{
2933
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst), OP_B(inst)});
2934
if (OP_A(inst).kind == IrOpKind::Inst && OP_B(inst).kind == IrOpKind::Constant)
2935
build.ror(inst.regA64, regOp(OP_A(inst)), uint8_t(unsigned(intOp(OP_B(inst))) & 31));
2936
else
2937
{
2938
RegisterA64 temp1 = tempUint(OP_A(inst));
2939
RegisterA64 temp2 = tempUint(OP_B(inst));
2940
build.ror(inst.regA64, temp1, temp2);
2941
}
2942
break;
2943
}
2944
case IrCmd::BITCOUNTLZ_UINT:
2945
{
2946
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst)});
2947
RegisterA64 temp = tempUint(OP_A(inst));
2948
build.clz(inst.regA64, temp);
2949
break;
2950
}
2951
case IrCmd::BITCOUNTRZ_UINT:
2952
{
2953
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst)});
2954
RegisterA64 temp = tempUint(OP_A(inst));
2955
build.rbit(inst.regA64, temp);
2956
build.clz(inst.regA64, inst.regA64);
2957
break;
2958
}
2959
case IrCmd::BYTESWAP_UINT:
2960
{
2961
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_A(inst)});
2962
RegisterA64 temp = tempUint(OP_A(inst));
2963
build.rev(inst.regA64, temp);
2964
break;
2965
}
2966
case IrCmd::INVOKE_LIBM:
2967
{
2968
if (HAS_OP_C(inst))
2969
{
2970
bool isInt = (OP_C(inst).kind == IrOpKind::Constant) ? constOp(OP_C(inst)).kind == IrConstKind::Int
2971
: getCmdValueKind(function.instOp(OP_C(inst)).cmd) == IrValueKind::Int;
2972
2973
RegisterA64 temp1 = tempDouble(OP_B(inst));
2974
RegisterA64 temp2 = isInt ? tempInt(OP_C(inst)) : tempDouble(OP_C(inst));
2975
RegisterA64 temp3 = isInt ? noreg : regs.allocTemp(KindA64::d); // note: spill() frees all registers so we need to avoid alloc after spill
2976
regs.spill(index, {temp1, temp2});
2977
2978
if (isInt)
2979
{
2980
build.fmov(d0, temp1);
2981
build.mov(w0, temp2);
2982
}
2983
else if (d0 != temp2)
2984
{
2985
build.fmov(d0, temp1);
2986
build.fmov(d1, temp2);
2987
}
2988
else
2989
{
2990
build.fmov(temp3, d0);
2991
build.fmov(d0, temp1);
2992
build.fmov(d1, temp3);
2993
}
2994
}
2995
else
2996
{
2997
RegisterA64 temp1 = tempDouble(OP_B(inst));
2998
regs.spill(index, {temp1});
2999
build.fmov(d0, temp1);
3000
}
3001
3002
build.ldr(x1, mem(rNativeContext, getNativeContextOffset(uintOp(OP_A(inst)))));
3003
build.blr(x1);
3004
inst.regA64 = regs.takeReg(d0, index);
3005
break;
3006
}
3007
case IrCmd::GET_TYPE:
3008
{
3009
inst.regA64 = regs.allocReg(KindA64::x, index);
3010
3011
CODEGEN_ASSERT(sizeof(TString*) == 8);
3012
3013
if (OP_A(inst).kind == IrOpKind::Inst)
3014
build.add(inst.regA64, rGlobalState, regOp(OP_A(inst)), 3); // implicit uxtw
3015
else if (OP_A(inst).kind == IrOpKind::Constant)
3016
build.add(inst.regA64, rGlobalState, uint16_t(tagOp(OP_A(inst)) * 8));
3017
else
3018
CODEGEN_ASSERT(!"Unsupported instruction form");
3019
3020
build.ldr(inst.regA64, mem(inst.regA64, offsetof(global_State, ttname)));
3021
break;
3022
}
3023
case IrCmd::GET_TYPEOF:
3024
{
3025
regs.spill(index);
3026
build.mov(x0, rState);
3027
build.add(x1, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
3028
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaT_objtypenamestr)));
3029
build.blr(x2);
3030
3031
inst.regA64 = regs.takeReg(x0, index);
3032
break;
3033
}
3034
3035
case IrCmd::FINDUPVAL:
3036
{
3037
regs.spill(index);
3038
build.mov(x0, rState);
3039
build.add(x1, rBase, uint16_t(vmRegOp(OP_A(inst)) * sizeof(TValue)));
3040
build.ldr(x2, mem(rNativeContext, offsetof(NativeContext, luaF_findupval)));
3041
build.blr(x2);
3042
3043
inst.regA64 = regs.takeReg(x0, index);
3044
break;
3045
}
3046
3047
case IrCmd::BUFFER_READI8:
3048
{
3049
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_B(inst)});
3050
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)));
3051
3052
build.ldrsb(inst.regA64, addr);
3053
break;
3054
}
3055
3056
case IrCmd::BUFFER_READU8:
3057
{
3058
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_B(inst)});
3059
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)));
3060
3061
build.ldrb(inst.regA64, addr);
3062
break;
3063
}
3064
3065
case IrCmd::BUFFER_WRITEI8:
3066
{
3067
RegisterA64 temp = tempInt(OP_C(inst));
3068
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)));
3069
3070
build.strb(temp, addr);
3071
break;
3072
}
3073
3074
case IrCmd::BUFFER_READI16:
3075
{
3076
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_B(inst)});
3077
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)));
3078
3079
build.ldrsh(inst.regA64, addr);
3080
break;
3081
}
3082
3083
case IrCmd::BUFFER_READU16:
3084
{
3085
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_B(inst)});
3086
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)));
3087
3088
build.ldrh(inst.regA64, addr);
3089
break;
3090
}
3091
3092
case IrCmd::BUFFER_WRITEI16:
3093
{
3094
RegisterA64 temp = tempInt(OP_C(inst));
3095
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)));
3096
3097
build.strh(temp, addr);
3098
break;
3099
}
3100
3101
case IrCmd::BUFFER_READI32:
3102
{
3103
inst.regA64 = regs.allocReuse(KindA64::w, index, {OP_B(inst)});
3104
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)));
3105
3106
build.ldr(inst.regA64, addr);
3107
break;
3108
}
3109
3110
case IrCmd::BUFFER_WRITEI32:
3111
{
3112
RegisterA64 temp = tempInt(OP_C(inst));
3113
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)));
3114
3115
build.str(temp, addr);
3116
break;
3117
}
3118
3119
case IrCmd::BUFFER_READF32:
3120
{
3121
inst.regA64 = regs.allocReg(KindA64::s, index);
3122
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)));
3123
3124
build.ldr(inst.regA64, addr);
3125
break;
3126
}
3127
3128
case IrCmd::BUFFER_WRITEF32:
3129
{
3130
RegisterA64 temp = tempFloat(OP_C(inst));
3131
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)));
3132
3133
build.str(temp, addr);
3134
break;
3135
}
3136
3137
case IrCmd::BUFFER_READF64:
3138
{
3139
inst.regA64 = regs.allocReg(KindA64::d, index);
3140
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)));
3141
3142
build.ldr(inst.regA64, addr);
3143
break;
3144
}
3145
3146
case IrCmd::BUFFER_WRITEF64:
3147
{
3148
RegisterA64 temp = tempDouble(OP_C(inst));
3149
AddressA64 addr = tempAddrBuffer(OP_A(inst), OP_B(inst), !FFlag::LuauCodegenBufNoDefTag && !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)));
3150
3151
build.str(temp, addr);
3152
break;
3153
}
3154
3155
// To handle unsupported instructions, add "case IrCmd::OP" and make sure to set error = true!
3156
}
3157
3158
valueTracker.afterInstLowering(inst, index);
3159
3160
regs.currInstIdx = kInvalidInstIdx;
3161
3162
regs.freeLastUseRegs(inst, index);
3163
regs.freeTempRegs();
3164
}
3165
3166
void IrLoweringA64::startBlock(const IrBlock& curr)
3167
{
3168
if (curr.startpc != kBlockNoStartPc)
3169
allocAndIncrementCounterAt(
3170
curr.kind == IrBlockKind::Fallback ? CodeGenCounter::FallbackBlockExecuted : CodeGenCounter::RegularBlockExecuted, curr.startpc
3171
);
3172
}
3173
3174
void IrLoweringA64::finishBlock(const IrBlock& curr, const IrBlock& next)
3175
{
3176
if (!regs.spills.empty())
3177
{
3178
// If we have spills remaining, we have to immediately lower the successor block
3179
for (uint32_t predIdx : predecessors(function.cfg, function.getBlockIndex(next)))
3180
CODEGEN_ASSERT(predIdx == function.getBlockIndex(curr) || function.blocks[predIdx].kind == IrBlockKind::Dead);
3181
3182
// And the next block cannot be a join block in cfg
3183
CODEGEN_ASSERT(next.useCount == 1);
3184
}
3185
}
3186
3187
void IrLoweringA64::finishFunction()
3188
{
3189
if (build.logText)
3190
build.logAppend("; interrupt handlers\n");
3191
3192
for (InterruptHandler& handler : interruptHandlers)
3193
{
3194
build.setLabel(handler.self);
3195
build.mov(x0, (handler.pcpos + 1) * sizeof(Instruction));
3196
build.adr(x1, handler.next);
3197
build.b(helpers.interrupt);
3198
}
3199
3200
if (build.logText)
3201
build.logAppend("; exit handlers\n");
3202
3203
for (ExitHandler& handler : exitHandlers)
3204
{
3205
if (handler.pcpos == kVmExitEntryGuardPc)
3206
{
3207
build.setLabel(handler.self);
3208
3209
allocAndIncrementCounterAt(CodeGenCounter::VmExitTaken, ~0u);
3210
3211
build.b(helpers.exitContinueVmClearNativeFlag);
3212
}
3213
else
3214
{
3215
build.setLabel(handler.self);
3216
3217
allocAndIncrementCounterAt(CodeGenCounter::VmExitTaken, handler.pcpos);
3218
3219
build.mov(x0, handler.pcpos * sizeof(Instruction));
3220
build.b(helpers.updatePcAndContinueInVm);
3221
}
3222
}
3223
3224
// An undefined instruction is placed after the function to be used as an aborting jump offset
3225
function.endLocation = build.getLabelOffset(build.setLabel());
3226
build.udf();
3227
3228
if (stats)
3229
{
3230
if (error)
3231
stats->loweringErrors++;
3232
3233
if (regs.error)
3234
stats->regAllocErrors++;
3235
}
3236
}
3237
3238
bool IrLoweringA64::hasError() const
3239
{
3240
return error || regs.error;
3241
}
3242
3243
bool IrLoweringA64::isFallthroughBlock(const IrBlock& target, const IrBlock& next)
3244
{
3245
return target.start == next.start;
3246
}
3247
3248
void IrLoweringA64::jumpOrFallthrough(IrBlock& target, const IrBlock& next)
3249
{
3250
if (!isFallthroughBlock(target, next))
3251
build.b(target.label);
3252
}
3253
3254
Label& IrLoweringA64::getTargetLabel(IrOp op, Label& fresh)
3255
{
3256
if (op.kind == IrOpKind::Undef)
3257
return fresh;
3258
3259
if (op.kind == IrOpKind::VmExit)
3260
{
3261
if (uint32_t* index = exitHandlerMap.find(vmExitOp(op)))
3262
return exitHandlers[*index].self;
3263
3264
return fresh;
3265
}
3266
3267
return labelOp(op);
3268
}
3269
3270
void IrLoweringA64::finalizeTargetLabel(IrOp op, Label& fresh)
3271
{
3272
if (op.kind == IrOpKind::Undef)
3273
{
3274
emitAbort(build, fresh);
3275
}
3276
else if (op.kind == IrOpKind::VmExit && fresh.id != 0)
3277
{
3278
exitHandlerMap[vmExitOp(op)] = uint32_t(exitHandlers.size());
3279
exitHandlers.push_back({fresh, vmExitOp(op)});
3280
}
3281
}
3282
3283
void IrLoweringA64::checkSafeEnv(IrOp target, const IrBlock& next)
3284
{
3285
Label fresh; // used when guard aborts execution or jumps to a VM exit
3286
RegisterA64 temp = regs.allocTemp(KindA64::x);
3287
RegisterA64 tempw = castReg(KindA64::w, temp);
3288
build.ldr(temp, mem(rClosure, offsetof(Closure, env)));
3289
build.ldrb(tempw, mem(temp, offsetof(LuaTable, safeenv)));
3290
build.cbz(tempw, getTargetLabel(target, fresh));
3291
finalizeTargetLabel(target, fresh);
3292
}
3293
3294
void IrLoweringA64::allocAndIncrementCounterAt(CodeGenCounter kind, uint32_t pcpos)
3295
{
3296
if (!function.recordCounters)
3297
return;
3298
3299
if (build.logText)
3300
build.logAppend("; counter kind %u at pcpos %d\n", unsigned(kind), pcpos);
3301
3302
// {uint32_t, uint32_t, uint64_t}
3303
function.extraNativeData.push_back(unsigned(kind));
3304
function.extraNativeData.push_back(pcpos);
3305
incrementCounterAt(function.extraNativeData.size());
3306
function.extraNativeData.push_back(0);
3307
function.extraNativeData.push_back(0);
3308
}
3309
3310
void IrLoweringA64::incrementCounterAt(size_t offset)
3311
{
3312
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
3313
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
3314
3315
// Get counter slot
3316
build.ldr(temp1, mem(rClosure, offsetof(Closure, l.p)));
3317
build.ldr(temp1, mem(temp1, offsetof(Proto, execdata)));
3318
emitAddOffset(build, temp2, temp1, (unsigned(function.proto->sizecode) + offset) * 4);
3319
3320
// Increment
3321
build.ldr(temp1, temp2);
3322
build.add(temp1, temp1, uint16_t(1));
3323
build.str(temp1, temp2);
3324
3325
regs.freeTemp(temp1);
3326
regs.freeTemp(temp2);
3327
}
3328
3329
void IrLoweringA64::checkObjectBarrierConditions(RegisterA64 object, RegisterA64 temp, RegisterA64 ra, IrOp raOp, int ratag, Label& skip)
3330
{
3331
RegisterA64 tempw = castReg(KindA64::w, temp);
3332
3333
// iscollectable(ra)
3334
if (ratag == -1 || !isGCO(ratag))
3335
{
3336
if (raOp.kind == IrOpKind::Inst)
3337
{
3338
build.umov_4s(tempw, ra, 3);
3339
}
3340
else
3341
{
3342
AddressA64 addr = tempAddr(raOp, offsetof(TValue, tt), temp);
3343
build.ldr(tempw, addr);
3344
}
3345
3346
build.cmp(tempw, uint16_t(LUA_TSTRING));
3347
build.b(ConditionA64::Less, skip);
3348
}
3349
3350
// isblack(obj2gco(o))
3351
build.ldrb(tempw, mem(object, offsetof(GCheader, marked)));
3352
build.tbz(tempw, BLACKBIT, skip);
3353
3354
// iswhite(gcvalue(ra))
3355
if (raOp.kind == IrOpKind::Inst)
3356
{
3357
build.fmov(temp, castReg(KindA64::d, ra));
3358
}
3359
else
3360
{
3361
AddressA64 addr = tempAddr(raOp, offsetof(TValue, value), temp);
3362
build.ldr(temp, addr);
3363
}
3364
3365
build.ldrb(tempw, mem(temp, offsetof(GCheader, marked)));
3366
build.tst(tempw, bit2mask(WHITE0BIT, WHITE1BIT));
3367
build.b(ConditionA64::Equal, skip); // Equal = Zero after tst
3368
}
3369
3370
RegisterA64 IrLoweringA64::tempDouble(IrOp op)
3371
{
3372
if (op.kind == IrOpKind::Inst)
3373
return regOp(op);
3374
else if (op.kind == IrOpKind::Constant)
3375
{
3376
double val = doubleOp(op);
3377
3378
if (AssemblyBuilderA64::isFmovSupportedFp64(val))
3379
{
3380
RegisterA64 temp = regs.allocTemp(KindA64::d);
3381
build.fmov(temp, val);
3382
return temp;
3383
}
3384
else
3385
{
3386
RegisterA64 temp1 = regs.allocTemp(KindA64::x);
3387
RegisterA64 temp2 = regs.allocTemp(KindA64::d);
3388
3389
uint64_t vali = getDoubleBits(val);
3390
3391
if ((vali << 16) == 0)
3392
{
3393
build.movz(temp1, uint16_t(vali >> 48), 48);
3394
build.fmov(temp2, temp1);
3395
}
3396
else if ((vali << 32) == 0)
3397
{
3398
build.movz(temp1, uint16_t(vali >> 48), 48);
3399
build.movk(temp1, uint16_t(vali >> 32), 32);
3400
build.fmov(temp2, temp1);
3401
}
3402
else
3403
{
3404
build.adr(temp1, val);
3405
build.ldr(temp2, temp1);
3406
}
3407
3408
return temp2;
3409
}
3410
}
3411
else
3412
{
3413
CODEGEN_ASSERT(!"Unsupported instruction form");
3414
return noreg;
3415
}
3416
}
3417
3418
RegisterA64 IrLoweringA64::tempFloat(IrOp op)
3419
{
3420
if (op.kind == IrOpKind::Inst)
3421
return regOp(op);
3422
else if (op.kind == IrOpKind::Constant)
3423
{
3424
float val = float(doubleOp(op));
3425
3426
if (AssemblyBuilderA64::isFmovSupportedFp32(val))
3427
{
3428
RegisterA64 temp = regs.allocTemp(KindA64::s);
3429
build.fmov(temp, val);
3430
return temp;
3431
}
3432
else
3433
{
3434
RegisterA64 temp = regs.allocTemp(KindA64::s);
3435
3436
uint32_t vali = getFloatBits(val);
3437
3438
if ((vali & 0xffff) == 0)
3439
{
3440
RegisterA64 temp2 = regs.allocTemp(KindA64::w);
3441
3442
build.movz(temp2, uint16_t(vali >> 16), 16);
3443
build.fmov(temp, temp2);
3444
}
3445
else
3446
{
3447
RegisterA64 temp2 = regs.allocTemp(KindA64::x);
3448
3449
build.adr(temp2, val);
3450
build.ldr(temp, temp2);
3451
}
3452
3453
return temp;
3454
}
3455
}
3456
else
3457
{
3458
CODEGEN_ASSERT(!"Unsupported instruction form");
3459
return noreg;
3460
}
3461
}
3462
3463
RegisterA64 IrLoweringA64::tempInt(IrOp op)
3464
{
3465
if (op.kind == IrOpKind::Inst)
3466
return regOp(op);
3467
else if (op.kind == IrOpKind::Constant)
3468
{
3469
RegisterA64 temp = regs.allocTemp(KindA64::w);
3470
build.mov(temp, intOp(op));
3471
return temp;
3472
}
3473
else
3474
{
3475
CODEGEN_ASSERT(!"Unsupported instruction form");
3476
return noreg;
3477
}
3478
}
3479
3480
RegisterA64 IrLoweringA64::tempUint(IrOp op)
3481
{
3482
if (op.kind == IrOpKind::Inst)
3483
return regOp(op);
3484
else if (op.kind == IrOpKind::Constant)
3485
{
3486
RegisterA64 temp = regs.allocTemp(KindA64::w);
3487
build.mov(temp, unsigned(intOp(op)));
3488
return temp;
3489
}
3490
else
3491
{
3492
CODEGEN_ASSERT(!"Unsupported instruction form");
3493
return noreg;
3494
}
3495
}
3496
3497
AddressA64 IrLoweringA64::tempAddr(IrOp op, int offset, RegisterA64 tempStorage)
3498
{
3499
// This is needed to tighten the bounds checks in the VmConst case below
3500
CODEGEN_ASSERT(offset % 4 == 0);
3501
// Full encoded range is wider depending on the load size, but this assertion helps establish a smaller guaranteed working range [0..4096)
3502
CODEGEN_ASSERT(offset >= 0 && unsigned(offset / 4) <= AssemblyBuilderA64::kMaxImmediate);
3503
3504
if (op.kind == IrOpKind::VmReg)
3505
{
3506
return mem(rBase, vmRegOp(op) * sizeof(TValue) + offset);
3507
}
3508
else if (op.kind == IrOpKind::VmConst)
3509
{
3510
size_t constantOffset = vmConstOp(op) * sizeof(TValue) + offset;
3511
3512
// Note: cumulative offset is guaranteed to be divisible by 4; we can use that to expand the useful range that doesn't require temporaries
3513
if (constantOffset / 4 <= AddressA64::kMaxOffset)
3514
return mem(rConstants, int(constantOffset));
3515
3516
RegisterA64 temp = tempStorage == noreg ? regs.allocTemp(KindA64::x) : tempStorage;
3517
CODEGEN_ASSERT(temp.kind == KindA64::x && "temp storage, when provided, must be an 'x' register");
3518
3519
emitAddOffset(build, temp, rConstants, constantOffset);
3520
return temp;
3521
}
3522
// If we have a register, we assume it's a pointer to TValue
3523
else if (op.kind == IrOpKind::Inst)
3524
{
3525
CODEGEN_ASSERT(getCmdValueKind(function.instOp(op).cmd) == IrValueKind::Pointer);
3526
return mem(regOp(op), offset);
3527
}
3528
else
3529
{
3530
CODEGEN_ASSERT(!"Unsupported instruction form");
3531
return noreg;
3532
}
3533
}
3534
3535
AddressA64 IrLoweringA64::tempAddrBuffer(IrOp bufferOp, IrOp indexOp, uint8_t tag)
3536
{
3537
CODEGEN_ASSERT(tag == LUA_TUSERDATA || tag == LUA_TBUFFER);
3538
int dataOffset = tag == LUA_TBUFFER ? offsetof(Buffer, data) : offsetof(Udata, data);
3539
3540
if (indexOp.kind == IrOpKind::Inst)
3541
{
3542
CODEGEN_ASSERT(!producesDirtyHighRegisterBits(function.instOp(indexOp).cmd));
3543
3544
RegisterA64 temp = regs.allocTemp(KindA64::x);
3545
build.add(temp, regOp(bufferOp), regOp(indexOp)); // implicit uxtw
3546
return mem(temp, dataOffset);
3547
}
3548
else if (indexOp.kind == IrOpKind::Constant)
3549
{
3550
// Since the resulting address may be used to load any size, including 1 byte, from an unaligned offset, we are limited by unscaled
3551
// encoding
3552
if (unsigned(intOp(indexOp)) + dataOffset <= 255)
3553
return mem(regOp(bufferOp), int(intOp(indexOp) + dataOffset));
3554
3555
// indexOp can only be negative in dead code (since offsets are checked); this avoids assertion in emitAddOffset
3556
if (intOp(indexOp) < 0)
3557
return mem(regOp(bufferOp), dataOffset);
3558
3559
RegisterA64 temp = regs.allocTemp(KindA64::x);
3560
emitAddOffset(build, temp, regOp(bufferOp), size_t(intOp(indexOp)));
3561
return mem(temp, dataOffset);
3562
}
3563
else
3564
{
3565
CODEGEN_ASSERT(!"Unsupported instruction form");
3566
return noreg;
3567
}
3568
}
3569
3570
RegisterA64 IrLoweringA64::regOp(IrOp op)
3571
{
3572
IrInst& inst = function.instOp(op);
3573
3574
if (inst.spilled || inst.needsReload)
3575
regs.restoreReg(inst);
3576
3577
CODEGEN_ASSERT(inst.regA64 != noreg);
3578
return inst.regA64;
3579
}
3580
3581
IrConst IrLoweringA64::constOp(IrOp op) const
3582
{
3583
return function.constOp(op);
3584
}
3585
3586
uint8_t IrLoweringA64::tagOp(IrOp op) const
3587
{
3588
return function.tagOp(op);
3589
}
3590
3591
int IrLoweringA64::intOp(IrOp op) const
3592
{
3593
return function.intOp(op);
3594
}
3595
3596
unsigned IrLoweringA64::uintOp(IrOp op) const
3597
{
3598
return function.uintOp(op);
3599
}
3600
3601
unsigned IrLoweringA64::importOp(IrOp op) const
3602
{
3603
return function.importOp(op);
3604
}
3605
3606
double IrLoweringA64::doubleOp(IrOp op) const
3607
{
3608
return function.doubleOp(op);
3609
}
3610
3611
IrBlock& IrLoweringA64::blockOp(IrOp op) const
3612
{
3613
return function.blockOp(op);
3614
}
3615
3616
Label& IrLoweringA64::labelOp(IrOp op) const
3617
{
3618
return blockOp(op).label;
3619
}
3620
3621
} // namespace A64
3622
} // namespace CodeGen
3623
} // namespace Luau
3624
3625