Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/CodeGen/src/IrLoweringX64.cpp
2725 views
1
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
2
#include "IrLoweringX64.h"
3
4
#include "Luau/CodeGenOptions.h"
5
#include "Luau/DenseHash.h"
6
#include "Luau/IrCallWrapperX64.h"
7
#include "Luau/IrData.h"
8
#include "Luau/IrUtils.h"
9
#include "Luau/LoweringStats.h"
10
11
#include "EmitBuiltinsX64.h"
12
#include "EmitCommonX64.h"
13
#include "EmitInstructionX64.h"
14
#include "NativeState.h"
15
16
#include "lstate.h"
17
#include "lgc.h"
18
19
LUAU_FASTFLAG(LuauCodegenBlockSafeEnv)
20
LUAU_FASTFLAG(LuauCodegenBufferRangeMerge4)
21
LUAU_FASTFLAG(LuauCodegenBufNoDefTag)
22
23
namespace Luau
24
{
25
namespace CodeGen
26
{
27
namespace X64
28
{
29
30
IrLoweringX64::IrLoweringX64(AssemblyBuilderX64& build, ModuleHelpers& helpers, IrFunction& function, LoweringStats* stats)
31
: build(build)
32
, helpers(helpers)
33
, function(function)
34
, stats(stats)
35
, regs(build, function, stats)
36
, valueTracker(function)
37
, exitHandlerMap(~0u)
38
{
39
valueTracker.setRestoreCallback(
40
&regs,
41
[](void* context, IrInst& inst)
42
{
43
((IrRegAllocX64*)context)->restore(inst, false);
44
}
45
);
46
47
build.align(kFunctionAlignment, X64::AlignmentDataX64::Ud2);
48
}
49
50
void IrLoweringX64::lowerInst(IrInst& inst, uint32_t index, const IrBlock& next)
51
{
52
regs.currInstIdx = index;
53
54
valueTracker.beforeInstLowering(inst);
55
56
switch (inst.cmd)
57
{
58
case IrCmd::LOAD_TAG:
59
inst.regX64 = regs.allocReg(SizeX64::dword, index);
60
61
if (OP_A(inst).kind == IrOpKind::VmReg)
62
build.mov(inst.regX64, luauRegTag(vmRegOp(OP_A(inst))));
63
else if (OP_A(inst).kind == IrOpKind::VmConst)
64
build.mov(inst.regX64, luauConstantTag(vmConstOp(OP_A(inst))));
65
// If we have a register, we assume it's a pointer to TValue
66
// We might introduce explicit operand types in the future to make this more robust
67
else if (OP_A(inst).kind == IrOpKind::Inst)
68
build.mov(inst.regX64, dword[regOp(OP_A(inst)) + offsetof(TValue, tt)]);
69
else
70
CODEGEN_ASSERT(!"Unsupported instruction form");
71
break;
72
case IrCmd::LOAD_POINTER:
73
inst.regX64 = regs.allocReg(SizeX64::qword, index);
74
75
if (OP_A(inst).kind == IrOpKind::VmReg)
76
build.mov(inst.regX64, luauRegValue(vmRegOp(OP_A(inst))));
77
else if (OP_A(inst).kind == IrOpKind::VmConst)
78
build.mov(inst.regX64, luauConstantValue(vmConstOp(OP_A(inst))));
79
// If we have a register, we assume it's a pointer to TValue
80
// We might introduce explicit operand types in the future to make this more robust
81
else if (OP_A(inst).kind == IrOpKind::Inst)
82
build.mov(inst.regX64, qword[regOp(OP_A(inst)) + offsetof(TValue, value)]);
83
else
84
CODEGEN_ASSERT(!"Unsupported instruction form");
85
break;
86
case IrCmd::LOAD_DOUBLE:
87
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
88
89
if (OP_A(inst).kind == IrOpKind::VmReg)
90
build.vmovsd(inst.regX64, luauRegValue(vmRegOp(OP_A(inst))));
91
else if (OP_A(inst).kind == IrOpKind::VmConst)
92
build.vmovsd(inst.regX64, luauConstantValue(vmConstOp(OP_A(inst))));
93
else
94
CODEGEN_ASSERT(!"Unsupported instruction form");
95
break;
96
case IrCmd::LOAD_INT:
97
inst.regX64 = regs.allocReg(SizeX64::dword, index);
98
99
build.mov(inst.regX64, luauRegValueInt(vmRegOp(OP_A(inst))));
100
break;
101
case IrCmd::LOAD_FLOAT:
102
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
103
104
if (OP_A(inst).kind == IrOpKind::VmReg)
105
build.vmovss(inst.regX64, dword[rBase + vmRegOp(OP_A(inst)) * sizeof(TValue) + offsetof(TValue, value) + intOp(OP_B(inst))]);
106
else if (OP_A(inst).kind == IrOpKind::VmConst)
107
build.vmovss(inst.regX64, dword[rConstants + vmConstOp(OP_A(inst)) * sizeof(TValue) + offsetof(TValue, value) + intOp(OP_B(inst))]);
108
else
109
CODEGEN_ASSERT(!"Unsupported instruction form");
110
break;
111
case IrCmd::LOAD_TVALUE:
112
{
113
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
114
115
int addrOffset = HAS_OP_B(inst) ? intOp(OP_B(inst)) : 0;
116
117
if (OP_A(inst).kind == IrOpKind::VmReg)
118
build.vmovups(inst.regX64, luauReg(vmRegOp(OP_A(inst))));
119
else if (OP_A(inst).kind == IrOpKind::VmConst)
120
build.vmovups(inst.regX64, luauConstant(vmConstOp(OP_A(inst))));
121
else if (OP_A(inst).kind == IrOpKind::Inst)
122
build.vmovups(inst.regX64, xmmword[regOp(OP_A(inst)) + addrOffset]);
123
else
124
CODEGEN_ASSERT(!"Unsupported instruction form");
125
break;
126
}
127
case IrCmd::LOAD_ENV:
128
inst.regX64 = regs.allocReg(SizeX64::qword, index);
129
130
build.mov(inst.regX64, sClosure);
131
build.mov(inst.regX64, qword[inst.regX64 + offsetof(Closure, env)]);
132
break;
133
case IrCmd::GET_ARR_ADDR:
134
if (OP_B(inst).kind == IrOpKind::Inst)
135
{
136
inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {OP_B(inst)});
137
138
if (dwordReg(inst.regX64) != regOp(OP_B(inst)))
139
build.mov(dwordReg(inst.regX64), regOp(OP_B(inst)));
140
141
build.shl(dwordReg(inst.regX64), kTValueSizeLog2);
142
build.add(inst.regX64, qword[regOp(OP_A(inst)) + offsetof(LuaTable, array)]);
143
}
144
else if (OP_B(inst).kind == IrOpKind::Constant)
145
{
146
inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {OP_A(inst)});
147
148
build.mov(inst.regX64, qword[regOp(OP_A(inst)) + offsetof(LuaTable, array)]);
149
150
if (intOp(OP_B(inst)) != 0)
151
build.lea(inst.regX64, addr[inst.regX64 + intOp(OP_B(inst)) * sizeof(TValue)]);
152
}
153
else
154
{
155
CODEGEN_ASSERT(!"Unsupported instruction form");
156
}
157
break;
158
case IrCmd::GET_SLOT_NODE_ADDR:
159
{
160
inst.regX64 = regs.allocReg(SizeX64::qword, index);
161
162
ScopedRegX64 tmp{regs, SizeX64::qword};
163
164
getTableNodeAtCachedSlot(build, tmp.reg, inst.regX64, regOp(OP_A(inst)), uintOp(OP_B(inst)));
165
break;
166
}
167
case IrCmd::GET_HASH_NODE_ADDR:
168
{
169
// Custom bit shift value can only be placed in cl
170
ScopedRegX64 shiftTmp{regs, regs.takeReg(rcx, kInvalidInstIdx)};
171
172
inst.regX64 = regs.allocReg(SizeX64::qword, index);
173
174
ScopedRegX64 tmp{regs, SizeX64::qword};
175
176
build.mov(inst.regX64, qword[regOp(OP_A(inst)) + offsetof(LuaTable, node)]);
177
build.mov(dwordReg(tmp.reg), 1);
178
build.mov(byteReg(shiftTmp.reg), byte[regOp(OP_A(inst)) + offsetof(LuaTable, lsizenode)]);
179
build.shl(dwordReg(tmp.reg), byteReg(shiftTmp.reg));
180
build.dec(dwordReg(tmp.reg));
181
build.and_(dwordReg(tmp.reg), uintOp(OP_B(inst)));
182
build.shl(tmp.reg, kLuaNodeSizeLog2);
183
build.add(inst.regX64, tmp.reg);
184
break;
185
};
186
case IrCmd::GET_CLOSURE_UPVAL_ADDR:
187
{
188
inst.regX64 = regs.allocRegOrReuse(SizeX64::qword, index, {OP_A(inst)});
189
190
if (OP_A(inst).kind == IrOpKind::Undef)
191
{
192
build.mov(inst.regX64, sClosure);
193
}
194
else
195
{
196
RegisterX64 cl = regOp(OP_A(inst));
197
if (inst.regX64 != cl)
198
build.mov(inst.regX64, cl);
199
}
200
201
build.add(inst.regX64, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(OP_B(inst)));
202
break;
203
}
204
case IrCmd::STORE_TAG:
205
if (OP_B(inst).kind == IrOpKind::Constant)
206
{
207
if (OP_A(inst).kind == IrOpKind::Inst)
208
build.mov(dword[regOp(OP_A(inst)) + offsetof(TValue, tt)], tagOp(OP_B(inst)));
209
else
210
build.mov(luauRegTag(vmRegOp(OP_A(inst))), tagOp(OP_B(inst)));
211
}
212
else
213
{
214
CODEGEN_ASSERT(!"Unsupported instruction form");
215
}
216
break;
217
case IrCmd::STORE_POINTER:
218
{
219
OperandX64 valueLhs =
220
OP_A(inst).kind == IrOpKind::Inst ? qword[regOp(OP_A(inst)) + offsetof(TValue, value)] : luauRegValue(vmRegOp(OP_A(inst)));
221
222
if (OP_B(inst).kind == IrOpKind::Constant)
223
{
224
CODEGEN_ASSERT(intOp(OP_B(inst)) == 0);
225
build.mov(valueLhs, 0);
226
}
227
else if (OP_B(inst).kind == IrOpKind::Inst)
228
{
229
build.mov(valueLhs, regOp(OP_B(inst)));
230
}
231
else
232
{
233
CODEGEN_ASSERT(!"Unsupported instruction form");
234
}
235
break;
236
}
237
case IrCmd::STORE_EXTRA:
238
if (OP_B(inst).kind == IrOpKind::Constant)
239
{
240
if (OP_A(inst).kind == IrOpKind::Inst)
241
build.mov(dword[regOp(OP_A(inst)) + offsetof(TValue, extra)], intOp(OP_B(inst)));
242
else
243
build.mov(luauRegExtra(vmRegOp(OP_A(inst))), intOp(OP_B(inst)));
244
}
245
else
246
{
247
CODEGEN_ASSERT(!"Unsupported instruction form");
248
}
249
break;
250
case IrCmd::STORE_DOUBLE:
251
{
252
OperandX64 valueLhs =
253
OP_A(inst).kind == IrOpKind::Inst ? qword[regOp(OP_A(inst)) + offsetof(TValue, value)] : luauRegValue(vmRegOp(OP_A(inst)));
254
255
if (OP_B(inst).kind == IrOpKind::Constant)
256
{
257
ScopedRegX64 tmp{regs, SizeX64::xmmword};
258
259
build.vmovsd(tmp.reg, build.f64(doubleOp(OP_B(inst))));
260
build.vmovsd(valueLhs, tmp.reg);
261
}
262
else if (OP_B(inst).kind == IrOpKind::Inst)
263
{
264
build.vmovsd(valueLhs, regOp(OP_B(inst)));
265
}
266
else
267
{
268
CODEGEN_ASSERT(!"Unsupported instruction form");
269
}
270
break;
271
}
272
case IrCmd::STORE_INT:
273
if (OP_B(inst).kind == IrOpKind::Constant)
274
build.mov(luauRegValueInt(vmRegOp(OP_A(inst))), intOp(OP_B(inst)));
275
else if (OP_B(inst).kind == IrOpKind::Inst)
276
build.mov(luauRegValueInt(vmRegOp(OP_A(inst))), regOp(OP_B(inst)));
277
else
278
CODEGEN_ASSERT(!"Unsupported instruction form");
279
break;
280
case IrCmd::STORE_VECTOR:
281
storeFloat(luauRegValueVector(vmRegOp(OP_A(inst)), 0), OP_B(inst));
282
storeFloat(luauRegValueVector(vmRegOp(OP_A(inst)), 1), OP_C(inst));
283
storeFloat(luauRegValueVector(vmRegOp(OP_A(inst)), 2), OP_D(inst));
284
285
if (HAS_OP_E(inst))
286
build.mov(luauRegTag(vmRegOp(OP_A(inst))), tagOp(OP_E(inst)));
287
break;
288
case IrCmd::STORE_TVALUE:
289
{
290
int addrOffset = HAS_OP_C(inst) ? intOp(OP_C(inst)) : 0;
291
292
if (OP_A(inst).kind == IrOpKind::VmReg)
293
build.vmovups(luauReg(vmRegOp(OP_A(inst))), regOp(OP_B(inst)));
294
else if (OP_A(inst).kind == IrOpKind::Inst)
295
build.vmovups(xmmword[regOp(OP_A(inst)) + addrOffset], regOp(OP_B(inst)));
296
else
297
CODEGEN_ASSERT(!"Unsupported instruction form");
298
break;
299
}
300
case IrCmd::STORE_SPLIT_TVALUE:
301
{
302
int addrOffset = HAS_OP_D(inst) ? intOp(OP_D(inst)) : 0;
303
304
OperandX64 tagLhs =
305
OP_A(inst).kind == IrOpKind::Inst ? dword[regOp(OP_A(inst)) + offsetof(TValue, tt) + addrOffset] : luauRegTag(vmRegOp(OP_A(inst)));
306
build.mov(tagLhs, tagOp(OP_B(inst)));
307
308
if (tagOp(OP_B(inst)) == LUA_TBOOLEAN)
309
{
310
OperandX64 valueLhs = OP_A(inst).kind == IrOpKind::Inst ? dword[regOp(OP_A(inst)) + offsetof(TValue, value) + addrOffset]
311
: luauRegValueInt(vmRegOp(OP_A(inst)));
312
build.mov(valueLhs, OP_C(inst).kind == IrOpKind::Constant ? OperandX64(intOp(OP_C(inst))) : regOp(OP_C(inst)));
313
}
314
else if (tagOp(OP_B(inst)) == LUA_TNUMBER)
315
{
316
OperandX64 valueLhs = OP_A(inst).kind == IrOpKind::Inst ? qword[regOp(OP_A(inst)) + offsetof(TValue, value) + addrOffset]
317
: luauRegValue(vmRegOp(OP_A(inst)));
318
319
if (OP_C(inst).kind == IrOpKind::Constant)
320
{
321
ScopedRegX64 tmp{regs, SizeX64::xmmword};
322
323
build.vmovsd(tmp.reg, build.f64(doubleOp(OP_C(inst))));
324
build.vmovsd(valueLhs, tmp.reg);
325
}
326
else
327
{
328
build.vmovsd(valueLhs, regOp(OP_C(inst)));
329
}
330
}
331
else if (isGCO(tagOp(OP_B(inst))))
332
{
333
OperandX64 valueLhs = OP_A(inst).kind == IrOpKind::Inst ? qword[regOp(OP_A(inst)) + offsetof(TValue, value) + addrOffset]
334
: luauRegValue(vmRegOp(OP_A(inst)));
335
build.mov(valueLhs, regOp(OP_C(inst)));
336
}
337
else
338
{
339
CODEGEN_ASSERT(!"Unsupported instruction form");
340
}
341
break;
342
}
343
case IrCmd::ADD_INT:
344
{
345
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
346
347
if (OP_A(inst).kind == IrOpKind::Constant)
348
{
349
build.lea(inst.regX64, addr[regOp(OP_B(inst)) + intOp(OP_A(inst))]);
350
}
351
else if (OP_A(inst).kind == IrOpKind::Inst)
352
{
353
if (inst.regX64 == regOp(OP_A(inst)))
354
{
355
if (OP_B(inst).kind == IrOpKind::Inst)
356
build.add(inst.regX64, regOp(OP_B(inst)));
357
else if (intOp(OP_B(inst)) == 1)
358
build.inc(inst.regX64);
359
else
360
build.add(inst.regX64, intOp(OP_B(inst)));
361
}
362
else
363
{
364
if (OP_B(inst).kind == IrOpKind::Inst)
365
build.lea(inst.regX64, addr[regOp(OP_A(inst)) + regOp(OP_B(inst))]);
366
else
367
build.lea(inst.regX64, addr[regOp(OP_A(inst)) + intOp(OP_B(inst))]);
368
}
369
}
370
else
371
{
372
CODEGEN_ASSERT(!"Unsupported instruction form");
373
}
374
break;
375
}
376
case IrCmd::SUB_INT:
377
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
378
379
if (OP_A(inst).kind == IrOpKind::Inst)
380
{
381
if (OP_B(inst).kind == IrOpKind::Constant)
382
{
383
if (inst.regX64 != regOp(OP_A(inst)))
384
build.lea(inst.regX64, addr[regOp(OP_A(inst)) - intOp(OP_B(inst))]);
385
else
386
build.sub(inst.regX64, intOp(OP_B(inst)));
387
}
388
else
389
{
390
// If result reuses the source, we can subtract in place, otherwise we need to setup our initial value
391
if (inst.regX64 != regOp(OP_A(inst)))
392
build.mov(inst.regX64, regOp(OP_A(inst)));
393
394
build.sub(inst.regX64, regOp(OP_B(inst)));
395
}
396
}
397
else if (OP_B(inst).kind == IrOpKind::Inst)
398
{
399
build.mov(inst.regX64, intOp(OP_A(inst)));
400
build.sub(inst.regX64, regOp(OP_B(inst)));
401
}
402
else
403
{
404
CODEGEN_ASSERT(!"Unsupported instruction form");
405
}
406
break;
407
case IrCmd::SEXTI8_INT:
408
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
409
410
build.movsx(inst.regX64, byteReg(regOp(OP_A(inst))));
411
break;
412
case IrCmd::SEXTI16_INT:
413
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
414
415
build.movsx(inst.regX64, wordReg(regOp(OP_A(inst))));
416
break;
417
case IrCmd::ADD_NUM:
418
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
419
420
if (OP_A(inst).kind == IrOpKind::Constant)
421
{
422
ScopedRegX64 tmp{regs, SizeX64::xmmword};
423
424
build.vmovsd(tmp.reg, memRegDoubleOp(OP_A(inst)));
425
build.vaddsd(inst.regX64, tmp.reg, memRegDoubleOp(OP_B(inst)));
426
}
427
else
428
{
429
build.vaddsd(inst.regX64, regOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)));
430
}
431
break;
432
case IrCmd::SUB_NUM:
433
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
434
435
if (OP_A(inst).kind == IrOpKind::Constant)
436
{
437
ScopedRegX64 tmp{regs, SizeX64::xmmword};
438
439
build.vmovsd(tmp.reg, memRegDoubleOp(OP_A(inst)));
440
build.vsubsd(inst.regX64, tmp.reg, memRegDoubleOp(OP_B(inst)));
441
}
442
else
443
{
444
build.vsubsd(inst.regX64, regOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)));
445
}
446
break;
447
case IrCmd::MUL_NUM:
448
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
449
450
if (OP_A(inst).kind == IrOpKind::Constant)
451
{
452
ScopedRegX64 tmp{regs, SizeX64::xmmword};
453
454
build.vmovsd(tmp.reg, memRegDoubleOp(OP_A(inst)));
455
build.vmulsd(inst.regX64, tmp.reg, memRegDoubleOp(OP_B(inst)));
456
}
457
else
458
{
459
build.vmulsd(inst.regX64, regOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)));
460
}
461
break;
462
case IrCmd::DIV_NUM:
463
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
464
465
if (OP_A(inst).kind == IrOpKind::Constant)
466
{
467
ScopedRegX64 tmp{regs, SizeX64::xmmword};
468
469
build.vmovsd(tmp.reg, memRegDoubleOp(OP_A(inst)));
470
build.vdivsd(inst.regX64, tmp.reg, memRegDoubleOp(OP_B(inst)));
471
}
472
else
473
{
474
build.vdivsd(inst.regX64, regOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)));
475
}
476
break;
477
case IrCmd::IDIV_NUM:
478
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
479
480
if (OP_A(inst).kind == IrOpKind::Constant)
481
{
482
ScopedRegX64 tmp{regs, SizeX64::xmmword};
483
484
build.vmovsd(tmp.reg, memRegDoubleOp(OP_A(inst)));
485
build.vdivsd(inst.regX64, tmp.reg, memRegDoubleOp(OP_B(inst)));
486
}
487
else
488
{
489
build.vdivsd(inst.regX64, regOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)));
490
}
491
build.vroundsd(inst.regX64, inst.regX64, inst.regX64, RoundingModeX64::RoundToNegativeInfinity);
492
break;
493
case IrCmd::MULADD_NUM:
494
{
495
if ((build.features & Feature_FMA3) != 0)
496
{
497
if (OP_A(inst).kind != IrOpKind::Inst)
498
{
499
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
500
build.vmovsd(inst.regX64, memRegDoubleOp(OP_A(inst)));
501
}
502
else
503
{
504
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
505
RegisterX64 aReg = regOp(OP_A(inst));
506
if (inst.regX64 != aReg)
507
build.vmovupd(inst.regX64, aReg);
508
}
509
510
ScopedRegX64 optBTmp{regs};
511
RegisterX64 bReg{};
512
513
if (OP_B(inst).kind == IrOpKind::Constant)
514
{
515
optBTmp.alloc(SizeX64::xmmword);
516
517
build.vmovsd(optBTmp.reg, memRegDoubleOp(OP_B(inst)));
518
bReg = optBTmp.reg;
519
}
520
else
521
{
522
bReg = regOp(OP_B(inst));
523
}
524
525
build.vfmadd213pd(inst.regX64, bReg, memRegDoubleOp(OP_C(inst)));
526
}
527
else
528
{
529
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
530
531
if (OP_A(inst).kind != IrOpKind::Inst && OP_B(inst).kind != IrOpKind::Inst)
532
{
533
build.vmovsd(inst.regX64, memRegDoubleOp(OP_A(inst)));
534
build.vmulsd(inst.regX64, inst.regX64, memRegDoubleOp(OP_B(inst)));
535
}
536
else if (OP_A(inst).kind == IrOpKind::Inst)
537
{
538
build.vmulsd(inst.regX64, regOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)));
539
}
540
else
541
{
542
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::Inst);
543
build.vmulsd(inst.regX64, regOp(OP_B(inst)), memRegDoubleOp(OP_A(inst)));
544
}
545
546
build.vaddsd(inst.regX64, inst.regX64, memRegDoubleOp(OP_C(inst)));
547
}
548
break;
549
}
550
case IrCmd::MOD_NUM:
551
{
552
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
553
554
ScopedRegX64 optLhsTmp{regs};
555
RegisterX64 lhs;
556
557
if (OP_A(inst).kind == IrOpKind::Constant)
558
{
559
optLhsTmp.alloc(SizeX64::xmmword);
560
561
build.vmovsd(optLhsTmp.reg, memRegDoubleOp(OP_A(inst)));
562
lhs = optLhsTmp.reg;
563
}
564
else
565
{
566
lhs = regOp(OP_A(inst));
567
}
568
569
if (OP_B(inst).kind == IrOpKind::Inst)
570
{
571
ScopedRegX64 tmp{regs, SizeX64::xmmword};
572
573
build.vdivsd(tmp.reg, lhs, memRegDoubleOp(OP_B(inst)));
574
build.vroundsd(tmp.reg, tmp.reg, tmp.reg, RoundingModeX64::RoundToNegativeInfinity);
575
build.vmulsd(tmp.reg, tmp.reg, memRegDoubleOp(OP_B(inst)));
576
build.vsubsd(inst.regX64, lhs, tmp.reg);
577
}
578
else
579
{
580
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
581
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
582
583
build.vmovsd(tmp1.reg, memRegDoubleOp(OP_B(inst)));
584
build.vdivsd(tmp2.reg, lhs, tmp1.reg);
585
build.vroundsd(tmp2.reg, tmp2.reg, tmp2.reg, RoundingModeX64::RoundToNegativeInfinity);
586
build.vmulsd(tmp1.reg, tmp2.reg, tmp1.reg);
587
build.vsubsd(inst.regX64, lhs, tmp1.reg);
588
}
589
break;
590
}
591
case IrCmd::MIN_NUM:
592
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
593
594
if (OP_A(inst).kind == IrOpKind::Constant)
595
{
596
ScopedRegX64 tmp{regs, SizeX64::xmmword};
597
598
build.vmovsd(tmp.reg, memRegDoubleOp(OP_A(inst)));
599
build.vminsd(inst.regX64, tmp.reg, memRegDoubleOp(OP_B(inst)));
600
}
601
else
602
{
603
build.vminsd(inst.regX64, regOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)));
604
}
605
break;
606
case IrCmd::MAX_NUM:
607
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
608
609
if (OP_A(inst).kind == IrOpKind::Constant)
610
{
611
ScopedRegX64 tmp{regs, SizeX64::xmmword};
612
613
build.vmovsd(tmp.reg, memRegDoubleOp(OP_A(inst)));
614
build.vmaxsd(inst.regX64, tmp.reg, memRegDoubleOp(OP_B(inst)));
615
}
616
else
617
{
618
build.vmaxsd(inst.regX64, regOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)));
619
}
620
break;
621
case IrCmd::UNM_NUM:
622
{
623
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
624
625
build.vxorpd(inst.regX64, regOp(OP_A(inst)), build.f64(-0.0));
626
break;
627
}
628
case IrCmd::FLOOR_NUM:
629
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
630
631
build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(OP_A(inst)), RoundingModeX64::RoundToNegativeInfinity);
632
break;
633
case IrCmd::CEIL_NUM:
634
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
635
636
build.vroundsd(inst.regX64, inst.regX64, memRegDoubleOp(OP_A(inst)), RoundingModeX64::RoundToPositiveInfinity);
637
break;
638
case IrCmd::ROUND_NUM:
639
{
640
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
641
642
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
643
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
644
645
if (OP_A(inst).kind != IrOpKind::Inst)
646
build.vmovsd(inst.regX64, memRegDoubleOp(OP_A(inst)));
647
else if (regOp(OP_A(inst)) != inst.regX64)
648
build.vmovsd(inst.regX64, inst.regX64, regOp(OP_A(inst)));
649
650
build.vandpd(tmp1.reg, inst.regX64, build.f64x2(-0.0, -0.0));
651
build.vmovsd(tmp2.reg, build.i64(0x3fdfffffffffffff)); // 0.49999999999999994
652
build.vorpd(tmp1.reg, tmp1.reg, tmp2.reg);
653
build.vaddsd(inst.regX64, inst.regX64, tmp1.reg);
654
build.vroundsd(inst.regX64, inst.regX64, inst.regX64, RoundingModeX64::RoundToZero);
655
break;
656
}
657
case IrCmd::SQRT_NUM:
658
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
659
660
build.vsqrtsd(inst.regX64, inst.regX64, memRegDoubleOp(OP_A(inst)));
661
break;
662
case IrCmd::ABS_NUM:
663
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
664
665
if (OP_A(inst).kind != IrOpKind::Inst)
666
build.vmovsd(inst.regX64, memRegDoubleOp(OP_A(inst)));
667
else if (regOp(OP_A(inst)) != inst.regX64)
668
build.vmovsd(inst.regX64, inst.regX64, regOp(OP_A(inst)));
669
670
build.vandpd(inst.regX64, inst.regX64, build.i64(~(1LL << 63)));
671
break;
672
case IrCmd::SIGN_NUM:
673
{
674
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
675
676
ScopedRegX64 tmp0{regs, SizeX64::xmmword};
677
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
678
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
679
680
build.vxorpd(tmp0.reg, tmp0.reg, tmp0.reg);
681
682
// Set tmp1 to -1 if arg < 0, else 0
683
build.vcmpltsd(tmp1.reg, regOp(OP_A(inst)), tmp0.reg);
684
build.vmovsd(tmp2.reg, build.f64(-1));
685
build.vandpd(tmp1.reg, tmp1.reg, tmp2.reg);
686
687
// Set mask bit to 1 if 0 < arg, else 0
688
build.vcmpltsd(inst.regX64, tmp0.reg, regOp(OP_A(inst)));
689
690
// Result = (mask-bit == 1) ? 1.0 : tmp1
691
// If arg < 0 then tmp1 is -1 and mask-bit is 0, result is -1
692
// If arg == 0 then tmp1 is 0 and mask-bit is 0, result is 0
693
// If arg > 0 then tmp1 is 0 and mask-bit is 1, result is 1
694
build.vblendvpd(inst.regX64, tmp1.reg, build.f64x2(1, 1), inst.regX64);
695
break;
696
}
697
case IrCmd::ADD_FLOAT:
698
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
699
700
if (OP_A(inst).kind == IrOpKind::Constant)
701
{
702
ScopedRegX64 tmp{regs, SizeX64::xmmword};
703
704
build.vmovss(tmp.reg, memRegFloatOp(OP_A(inst)));
705
build.vaddss(inst.regX64, tmp.reg, memRegFloatOp(OP_B(inst)));
706
}
707
else
708
{
709
build.vaddss(inst.regX64, regOp(OP_A(inst)), memRegFloatOp(OP_B(inst)));
710
}
711
break;
712
case IrCmd::SUB_FLOAT:
713
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
714
715
if (OP_A(inst).kind == IrOpKind::Constant)
716
{
717
ScopedRegX64 tmp{regs, SizeX64::xmmword};
718
719
build.vmovss(tmp.reg, memRegFloatOp(OP_A(inst)));
720
build.vsubss(inst.regX64, tmp.reg, memRegFloatOp(OP_B(inst)));
721
}
722
else
723
{
724
build.vsubss(inst.regX64, regOp(OP_A(inst)), memRegFloatOp(OP_B(inst)));
725
}
726
break;
727
case IrCmd::MUL_FLOAT:
728
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
729
730
if (OP_A(inst).kind == IrOpKind::Constant)
731
{
732
ScopedRegX64 tmp{regs, SizeX64::xmmword};
733
734
build.vmovss(tmp.reg, memRegFloatOp(OP_A(inst)));
735
build.vmulss(inst.regX64, tmp.reg, memRegFloatOp(OP_B(inst)));
736
}
737
else
738
{
739
build.vmulss(inst.regX64, regOp(OP_A(inst)), memRegFloatOp(OP_B(inst)));
740
}
741
break;
742
case IrCmd::DIV_FLOAT:
743
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
744
745
if (OP_A(inst).kind == IrOpKind::Constant)
746
{
747
ScopedRegX64 tmp{regs, SizeX64::xmmword};
748
749
build.vmovss(tmp.reg, memRegFloatOp(OP_A(inst)));
750
build.vdivss(inst.regX64, tmp.reg, memRegFloatOp(OP_B(inst)));
751
}
752
else
753
{
754
build.vdivss(inst.regX64, regOp(OP_A(inst)), memRegFloatOp(OP_B(inst)));
755
}
756
break;
757
case IrCmd::MIN_FLOAT:
758
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
759
760
if (OP_A(inst).kind == IrOpKind::Constant)
761
{
762
ScopedRegX64 tmp{regs, SizeX64::xmmword};
763
764
build.vmovss(tmp.reg, memRegFloatOp(OP_A(inst)));
765
build.vminss(inst.regX64, tmp.reg, memRegFloatOp(OP_B(inst)));
766
}
767
else
768
{
769
build.vminss(inst.regX64, regOp(OP_A(inst)), memRegFloatOp(OP_B(inst)));
770
}
771
break;
772
case IrCmd::MAX_FLOAT:
773
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
774
775
if (OP_A(inst).kind == IrOpKind::Constant)
776
{
777
ScopedRegX64 tmp{regs, SizeX64::xmmword};
778
779
build.vmovss(tmp.reg, memRegFloatOp(OP_A(inst)));
780
build.vmaxss(inst.regX64, tmp.reg, memRegFloatOp(OP_B(inst)));
781
}
782
else
783
{
784
build.vmaxss(inst.regX64, regOp(OP_A(inst)), memRegFloatOp(OP_B(inst)));
785
}
786
break;
787
case IrCmd::UNM_FLOAT:
788
{
789
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
790
791
build.vxorps(inst.regX64, regOp(OP_A(inst)), build.f32(-0.0));
792
break;
793
}
794
case IrCmd::FLOOR_FLOAT:
795
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
796
797
build.vroundss(inst.regX64, inst.regX64, memRegFloatOp(OP_A(inst)), RoundingModeX64::RoundToNegativeInfinity);
798
break;
799
case IrCmd::CEIL_FLOAT:
800
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
801
802
build.vroundss(inst.regX64, inst.regX64, memRegFloatOp(OP_A(inst)), RoundingModeX64::RoundToPositiveInfinity);
803
break;
804
case IrCmd::SQRT_FLOAT:
805
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
806
807
build.vsqrtss(inst.regX64, inst.regX64, memRegFloatOp(OP_A(inst)));
808
break;
809
case IrCmd::ABS_FLOAT:
810
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
811
812
if (OP_A(inst).kind != IrOpKind::Inst)
813
build.vmovss(inst.regX64, memRegFloatOp(OP_A(inst)));
814
else if (regOp(OP_A(inst)) != inst.regX64)
815
build.vmovss(inst.regX64, inst.regX64, regOp(OP_A(inst)));
816
817
build.vandps(inst.regX64, inst.regX64, build.i32(0x7fffffff));
818
break;
819
case IrCmd::SIGN_FLOAT:
820
{
821
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
822
823
ScopedRegX64 tmp0{regs, SizeX64::xmmword};
824
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
825
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
826
827
build.vxorps(tmp0.reg, tmp0.reg, tmp0.reg);
828
829
// Set tmp1 to -1 if arg < 0, else 0
830
build.vcmpltss(tmp1.reg, regOp(OP_A(inst)), tmp0.reg);
831
build.vmovss(tmp2.reg, build.f32(-1.0f));
832
build.vandps(tmp1.reg, tmp1.reg, tmp2.reg);
833
834
// Set mask bit to 1 if 0 < arg, else 0
835
build.vcmpltss(inst.regX64, tmp0.reg, regOp(OP_A(inst)));
836
837
// Result = (mask-bit == 1) ? 1.0 : tmp1
838
// If arg < 0 then tmp1 is -1 and mask-bit is 0, result is -1
839
// If arg == 0 then tmp1 is 0 and mask-bit is 0, result is 0
840
// If arg > 0 then tmp1 is 0 and mask-bit is 1, result is 1
841
build.vblendvps(inst.regX64, tmp1.reg, build.f32x4(1, 1, 1, 1), inst.regX64);
842
break;
843
}
844
case IrCmd::SELECT_NUM:
845
{
846
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_C(inst), OP_D(inst)}); // can't reuse b if a is a memory operand
847
848
ScopedRegX64 tmp{regs, SizeX64::xmmword};
849
850
if (OP_C(inst).kind == IrOpKind::Inst)
851
build.vcmpeqsd(tmp.reg, regOp(OP_C(inst)), memRegDoubleOp(OP_D(inst)));
852
else
853
{
854
build.vmovsd(tmp.reg, memRegDoubleOp(OP_C(inst)));
855
build.vcmpeqsd(tmp.reg, tmp.reg, memRegDoubleOp(OP_D(inst)));
856
}
857
858
if (OP_A(inst).kind == IrOpKind::Inst)
859
build.vblendvpd(inst.regX64, regOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)), tmp.reg);
860
else
861
{
862
build.vmovsd(inst.regX64, memRegDoubleOp(OP_A(inst)));
863
build.vblendvpd(inst.regX64, inst.regX64, memRegDoubleOp(OP_B(inst)), tmp.reg);
864
}
865
break;
866
}
867
case IrCmd::SELECT_VEC:
868
{
869
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_C(inst), OP_D(inst)});
870
871
ScopedRegX64 tmp1{regs};
872
ScopedRegX64 tmp2{regs};
873
RegisterX64 tmpc = vecOp(OP_C(inst), tmp1);
874
RegisterX64 tmpd = vecOp(OP_D(inst), tmp2);
875
876
build.vcmpeqps(inst.regX64, tmpc, tmpd);
877
build.vblendvps(inst.regX64, vecOp(OP_A(inst), tmp1), vecOp(OP_B(inst), tmp2), inst.regX64);
878
879
break;
880
}
881
case IrCmd::SELECT_IF_TRUTHY:
882
{
883
inst.regX64 = regs.allocReg(SizeX64::xmmword, index); // No reuse since multiple inputs can be shared
884
885
// Place lhs as the result, we will overwrite it with rhs if 'A' is falsy later
886
build.vmovaps(inst.regX64, regOp(OP_B(inst)));
887
888
// Get rhs register early, so a potential restore happens on both sides of a conditional control flow
889
RegisterX64 c = regOp(OP_C(inst));
890
891
ScopedRegX64 tmp{regs, SizeX64::dword};
892
Label saveRhs, exit;
893
894
// Check tag first
895
build.vpextrd(tmp.reg, regOp(OP_A(inst)), 3);
896
build.cmp(tmp.reg, LUA_TBOOLEAN);
897
898
build.jcc(ConditionX64::Below, saveRhs); // rhs if 'A' is nil
899
build.jcc(ConditionX64::Above, exit); // Keep lhs if 'A' is not a boolean
900
901
// Check the boolean value
902
build.vpextrd(tmp.reg, regOp(OP_A(inst)), 0);
903
build.test(tmp.reg, tmp.reg);
904
build.jcc(ConditionX64::NotZero, exit); // Keep lhs if 'A' is true
905
906
build.setLabel(saveRhs);
907
build.vmovaps(inst.regX64, c);
908
909
build.setLabel(exit);
910
break;
911
}
912
case IrCmd::ADD_VEC:
913
{
914
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
915
916
ScopedRegX64 tmp1{regs};
917
ScopedRegX64 tmp2{regs};
918
919
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
920
RegisterX64 tmpb = (OP_A(inst) == OP_B(inst)) ? tmpa : vecOp(OP_B(inst), tmp2);
921
922
build.vaddps(inst.regX64, tmpa, tmpb);
923
break;
924
}
925
case IrCmd::SUB_VEC:
926
{
927
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
928
929
ScopedRegX64 tmp1{regs};
930
ScopedRegX64 tmp2{regs};
931
932
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
933
RegisterX64 tmpb = (OP_A(inst) == OP_B(inst)) ? tmpa : vecOp(OP_B(inst), tmp2);
934
935
build.vsubps(inst.regX64, tmpa, tmpb);
936
break;
937
}
938
case IrCmd::MUL_VEC:
939
{
940
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
941
942
ScopedRegX64 tmp1{regs};
943
ScopedRegX64 tmp2{regs};
944
945
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
946
RegisterX64 tmpb = (OP_A(inst) == OP_B(inst)) ? tmpa : vecOp(OP_B(inst), tmp2);
947
948
build.vmulps(inst.regX64, tmpa, tmpb);
949
break;
950
}
951
case IrCmd::DIV_VEC:
952
{
953
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
954
955
ScopedRegX64 tmp1{regs};
956
ScopedRegX64 tmp2{regs};
957
958
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
959
RegisterX64 tmpb = (OP_A(inst) == OP_B(inst)) ? tmpa : vecOp(OP_B(inst), tmp2);
960
961
build.vdivps(inst.regX64, tmpa, tmpb);
962
break;
963
}
964
case IrCmd::IDIV_VEC:
965
{
966
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
967
968
ScopedRegX64 tmp1{regs};
969
ScopedRegX64 tmp2{regs};
970
971
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
972
RegisterX64 tmpb = (OP_A(inst) == OP_B(inst)) ? tmpa : vecOp(OP_B(inst), tmp2);
973
974
build.vdivps(inst.regX64, tmpa, tmpb);
975
build.vroundps(inst.regX64, inst.regX64, RoundingModeX64::RoundToNegativeInfinity);
976
break;
977
}
978
case IrCmd::MULADD_VEC:
979
{
980
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
981
ScopedRegX64 tmp1{regs};
982
ScopedRegX64 tmp2{regs};
983
ScopedRegX64 tmp3{regs};
984
985
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
986
RegisterX64 tmpb = vecOp(OP_B(inst), tmp2);
987
RegisterX64 tmpc = vecOp(OP_C(inst), tmp3);
988
989
if ((build.features & Feature_FMA3) != 0)
990
{
991
if (inst.regX64 != tmpa)
992
build.vmovups(inst.regX64, tmpa);
993
994
build.vfmadd213ps(inst.regX64, tmpb, tmpc);
995
}
996
else
997
{
998
build.vmulps(inst.regX64, tmpa, tmpb);
999
build.vaddps(inst.regX64, inst.regX64, tmpc);
1000
}
1001
1002
break;
1003
}
1004
case IrCmd::UNM_VEC:
1005
{
1006
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
1007
1008
build.vxorpd(inst.regX64, regOp(OP_A(inst)), build.f32x4(-0.0, -0.0, -0.0, -0.0));
1009
break;
1010
}
1011
case IrCmd::MIN_VEC:
1012
{
1013
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
1014
1015
ScopedRegX64 tmp1{regs};
1016
ScopedRegX64 tmp2{regs};
1017
1018
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
1019
RegisterX64 tmpb = (OP_A(inst) == OP_B(inst)) ? tmpa : vecOp(OP_B(inst), tmp2);
1020
1021
build.vminps(inst.regX64, tmpa, tmpb);
1022
break;
1023
}
1024
case IrCmd::MAX_VEC:
1025
{
1026
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
1027
1028
ScopedRegX64 tmp1{regs};
1029
ScopedRegX64 tmp2{regs};
1030
1031
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
1032
RegisterX64 tmpb = (OP_A(inst) == OP_B(inst)) ? tmpa : vecOp(OP_B(inst), tmp2);
1033
1034
build.vmaxps(inst.regX64, tmpa, tmpb);
1035
break;
1036
}
1037
case IrCmd::FLOOR_VEC:
1038
{
1039
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
1040
1041
ScopedRegX64 tmp1{regs};
1042
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
1043
1044
build.vroundps(inst.regX64, tmpa, RoundingModeX64::RoundToNegativeInfinity);
1045
break;
1046
}
1047
case IrCmd::CEIL_VEC:
1048
{
1049
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
1050
1051
ScopedRegX64 tmp1{regs};
1052
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
1053
1054
build.vroundps(inst.regX64, tmpa, RoundingModeX64::RoundToPositiveInfinity);
1055
break;
1056
}
1057
case IrCmd::ABS_VEC:
1058
{
1059
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
1060
1061
ScopedRegX64 tmp1{regs};
1062
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
1063
1064
build.vandps(inst.regX64, tmpa, build.u32x4(0x7fffffff, 0x7fffffff, 0x7fffffff, 0x7fffffff));
1065
break;
1066
}
1067
case IrCmd::DOT_VEC:
1068
{
1069
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst), OP_B(inst)});
1070
1071
ScopedRegX64 tmp1{regs};
1072
ScopedRegX64 tmp2{regs};
1073
1074
RegisterX64 tmpa = vecOp(OP_A(inst), tmp1);
1075
RegisterX64 tmpb = (OP_A(inst) == OP_B(inst)) ? tmpa : vecOp(OP_B(inst), tmp2);
1076
1077
build.vdpps(inst.regX64, tmpa, tmpb, 0x71); // 7 = 0b0111, sum first 3 products into first float
1078
break;
1079
}
1080
case IrCmd::EXTRACT_VEC:
1081
{
1082
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
1083
1084
build.vpshufps(inst.regX64, regOp(OP_A(inst)), regOp(OP_A(inst)), intOp(OP_B(inst)));
1085
break;
1086
}
1087
case IrCmd::NOT_ANY:
1088
{
1089
// TODO: if we have a single user which is a STORE_INT, we are missing the opportunity to write directly to target
1090
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst), OP_B(inst)});
1091
1092
Label saveOne, saveZero, exit;
1093
1094
if (OP_A(inst).kind == IrOpKind::Constant)
1095
{
1096
// Other cases should've been constant folded
1097
CODEGEN_ASSERT(tagOp(OP_A(inst)) == LUA_TBOOLEAN);
1098
}
1099
else
1100
{
1101
build.cmp(regOp(OP_A(inst)), LUA_TNIL);
1102
build.jcc(ConditionX64::Equal, saveOne);
1103
1104
build.cmp(regOp(OP_A(inst)), LUA_TBOOLEAN);
1105
build.jcc(ConditionX64::NotEqual, saveZero);
1106
}
1107
1108
if (OP_B(inst).kind == IrOpKind::Constant)
1109
{
1110
// If value is 1, we fallthrough to storing 0
1111
if (intOp(OP_B(inst)) == 0)
1112
build.jmp(saveOne);
1113
}
1114
else
1115
{
1116
build.cmp(regOp(OP_B(inst)), 0);
1117
build.jcc(ConditionX64::Equal, saveOne);
1118
}
1119
1120
build.setLabel(saveZero);
1121
build.mov(inst.regX64, 0);
1122
build.jmp(exit);
1123
1124
build.setLabel(saveOne);
1125
build.mov(inst.regX64, 1);
1126
1127
build.setLabel(exit);
1128
break;
1129
}
1130
case IrCmd::CMP_INT:
1131
{
1132
// Cannot reuse operand registers as a target because we have to modify it before the comparison
1133
inst.regX64 = regs.allocReg(SizeX64::dword, index);
1134
1135
// We are going to operate on byte register, those do not clear high bits on write
1136
build.xor_(inst.regX64, inst.regX64);
1137
1138
IrCondition cond = conditionOp(OP_C(inst));
1139
1140
if (OP_A(inst).kind == IrOpKind::Constant)
1141
{
1142
build.cmp(regOp(OP_B(inst)), intOp(OP_A(inst)));
1143
build.setcc(getInverseCondition(getConditionInt(cond)), byteReg(inst.regX64));
1144
}
1145
else if (OP_A(inst).kind == IrOpKind::Inst)
1146
{
1147
build.cmp(regOp(OP_A(inst)), intOp(OP_B(inst)));
1148
build.setcc(getConditionInt(cond), byteReg(inst.regX64));
1149
}
1150
else
1151
{
1152
CODEGEN_ASSERT(!"Unsupported instruction form");
1153
}
1154
break;
1155
}
1156
case IrCmd::CMP_ANY:
1157
{
1158
CODEGEN_ASSERT(OP_A(inst).kind == IrOpKind::VmReg && OP_B(inst).kind == IrOpKind::VmReg);
1159
IrCondition cond = conditionOp(OP_C(inst));
1160
1161
Label skip, exit;
1162
1163
// For equality comparison, 'luaV_lessequal' expects tag to be equal before the call
1164
if (cond == IrCondition::Equal)
1165
{
1166
ScopedRegX64 tmp{regs, SizeX64::dword};
1167
1168
build.mov(tmp.reg, memRegTagOp(OP_A(inst)));
1169
build.cmp(memRegTagOp(OP_B(inst)), tmp.reg);
1170
1171
// If the tags are not equal, skip 'luaV_lessequal' call and set result to 0
1172
build.jcc(ConditionX64::NotEqual, skip);
1173
}
1174
1175
{
1176
ScopedSpills spillGuard(regs);
1177
1178
IrCallWrapperX64 callWrap(regs, build);
1179
callWrap.addArgument(SizeX64::qword, rState);
1180
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(OP_A(inst))));
1181
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(OP_B(inst))));
1182
1183
if (cond == IrCondition::LessEqual)
1184
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessequal)]);
1185
else if (cond == IrCondition::Less)
1186
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_lessthan)]);
1187
else if (cond == IrCondition::Equal)
1188
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_equalval)]);
1189
else
1190
CODEGEN_ASSERT(!"Unsupported condition");
1191
}
1192
1193
emitUpdateBase(build);
1194
1195
inst.regX64 = regs.takeReg(eax, index);
1196
1197
if (cond == IrCondition::Equal)
1198
{
1199
build.jmp(exit);
1200
build.setLabel(skip);
1201
1202
build.xor_(inst.regX64, inst.regX64);
1203
build.setLabel(exit);
1204
}
1205
1206
// If case we made a call, skip high register bits clear, only consumer is JUMP_CMP_INT which doesn't read them
1207
break;
1208
}
1209
case IrCmd::CMP_TAG:
1210
{
1211
// Cannot reuse operand registers as a target because we have to modify it before the comparison
1212
inst.regX64 = regs.allocReg(SizeX64::dword, index);
1213
1214
// We are going to operate on byte register, those do not clear high bits on write
1215
build.xor_(inst.regX64, inst.regX64);
1216
1217
IrCondition cond = conditionOp(OP_C(inst));
1218
CODEGEN_ASSERT(cond == IrCondition::Equal || cond == IrCondition::NotEqual);
1219
ConditionX64 condX64 = getConditionInt(cond);
1220
1221
if (tagOp(OP_B(inst)) == LUA_TNIL && OP_A(inst).kind == IrOpKind::Inst)
1222
build.test(regOp(OP_A(inst)), regOp(OP_A(inst)));
1223
else
1224
build.cmp(memRegTagOp(OP_A(inst)), tagOp(OP_B(inst)));
1225
1226
build.setcc(condX64, byteReg(inst.regX64));
1227
1228
break;
1229
}
1230
case IrCmd::CMP_SPLIT_TVALUE:
1231
{
1232
// Cannot reuse operand registers as a target because we have to modify it before the comparison
1233
inst.regX64 = regs.allocReg(SizeX64::dword, index);
1234
1235
// Second operand of this instruction must be a constant
1236
// Without a constant type, we wouldn't know the correct way to compare the values at lowering time
1237
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::Constant);
1238
1239
// We are going to operate on byte registers, those do not clear high bits on write
1240
build.xor_(inst.regX64, inst.regX64);
1241
1242
IrCondition cond = conditionOp(OP_E(inst));
1243
CODEGEN_ASSERT(cond == IrCondition::Equal || cond == IrCondition::NotEqual);
1244
1245
// Check tag equality first
1246
ScopedRegX64 tmp1{regs, SizeX64::byte};
1247
1248
if (OP_A(inst).kind != IrOpKind::Constant)
1249
{
1250
build.cmp(regOp(OP_A(inst)), tagOp(OP_B(inst)));
1251
build.setcc(getConditionInt(cond), byteReg(tmp1.reg));
1252
}
1253
else
1254
{
1255
// Constant folding had to handle different constant tags
1256
CODEGEN_ASSERT(tagOp(OP_A(inst)) == tagOp(OP_B(inst)));
1257
}
1258
1259
if (tagOp(OP_B(inst)) == LUA_TBOOLEAN)
1260
{
1261
if (OP_C(inst).kind == IrOpKind::Constant)
1262
build.cmp(regOp(OP_D(inst)), intOp(OP_C(inst))); // swapped arguments
1263
else if (OP_D(inst).kind == IrOpKind::Constant)
1264
build.cmp(regOp(OP_C(inst)), intOp(OP_D(inst)));
1265
else
1266
build.cmp(regOp(OP_C(inst)), regOp(OP_D(inst)));
1267
1268
build.setcc(getConditionInt(cond), byteReg(inst.regX64));
1269
}
1270
else if (tagOp(OP_B(inst)) == LUA_TSTRING)
1271
{
1272
build.cmp(regOp(OP_C(inst)), regOp(OP_D(inst)));
1273
build.setcc(getConditionInt(cond), byteReg(inst.regX64));
1274
}
1275
else if (tagOp(OP_B(inst)) == LUA_TNUMBER)
1276
{
1277
if (OP_C(inst).kind == IrOpKind::Constant)
1278
build.vucomisd(regOp(OP_D(inst)), memRegDoubleOp(OP_C(inst))); // swapped arguments
1279
else if (OP_D(inst).kind == IrOpKind::Constant)
1280
build.vucomisd(regOp(OP_C(inst)), memRegDoubleOp(OP_D(inst)));
1281
else
1282
build.vucomisd(regOp(OP_C(inst)), regOp(OP_D(inst)));
1283
1284
if (OP_C(inst) == OP_D(inst))
1285
{
1286
// When numbers are the same, we only need to check parity to detect NaN
1287
if (cond == IrCondition::Equal)
1288
build.setcc(ConditionX64::NotParity, byteReg(inst.regX64));
1289
else
1290
build.setcc(ConditionX64::Parity, byteReg(inst.regX64));
1291
}
1292
else
1293
{
1294
ScopedRegX64 tmp2{regs, SizeX64::dword};
1295
1296
if (cond == IrCondition::Equal)
1297
{
1298
build.mov(tmp2.reg, 0);
1299
build.setcc(ConditionX64::NotParity, byteReg(inst.regX64));
1300
build.cmov(ConditionX64::NotEqual, inst.regX64, tmp2.reg);
1301
}
1302
else
1303
{
1304
build.mov(tmp2.reg, 1);
1305
build.setcc(ConditionX64::Parity, byteReg(inst.regX64));
1306
build.cmov(ConditionX64::NotEqual, inst.regX64, tmp2.reg);
1307
}
1308
}
1309
}
1310
else
1311
{
1312
CODEGEN_ASSERT(!"unsupported type tag in CMP_SPLIT_TVALUE");
1313
}
1314
1315
if (OP_A(inst).kind != IrOpKind::Constant)
1316
{
1317
if (cond == IrCondition::Equal)
1318
build.and_(byteReg(inst.regX64), byteReg(tmp1.reg));
1319
else
1320
build.or_(byteReg(inst.regX64), byteReg(tmp1.reg));
1321
}
1322
break;
1323
}
1324
case IrCmd::JUMP:
1325
jumpOrAbortOnUndef(OP_A(inst), next);
1326
break;
1327
case IrCmd::JUMP_IF_TRUTHY:
1328
jumpIfTruthy(build, vmRegOp(OP_A(inst)), labelOp(OP_B(inst)), labelOp(OP_C(inst)));
1329
jumpOrFallthrough(blockOp(OP_C(inst)), next);
1330
break;
1331
case IrCmd::JUMP_IF_FALSY:
1332
jumpIfFalsy(build, vmRegOp(OP_A(inst)), labelOp(OP_B(inst)), labelOp(OP_C(inst)));
1333
jumpOrFallthrough(blockOp(OP_C(inst)), next);
1334
break;
1335
case IrCmd::JUMP_EQ_TAG:
1336
{
1337
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::Inst || OP_B(inst).kind == IrOpKind::Constant);
1338
OperandX64 opb = OP_B(inst).kind == IrOpKind::Inst ? regOp(OP_B(inst)) : OperandX64(tagOp(OP_B(inst)));
1339
1340
if (OP_A(inst).kind == IrOpKind::Constant)
1341
build.cmp(opb, tagOp(OP_A(inst)));
1342
else
1343
build.cmp(memRegTagOp(OP_A(inst)), opb);
1344
1345
if (isFallthroughBlock(blockOp(OP_D(inst)), next))
1346
{
1347
build.jcc(ConditionX64::Equal, labelOp(OP_C(inst)));
1348
jumpOrFallthrough(blockOp(OP_D(inst)), next);
1349
}
1350
else
1351
{
1352
build.jcc(ConditionX64::NotEqual, labelOp(OP_D(inst)));
1353
jumpOrFallthrough(blockOp(OP_C(inst)), next);
1354
}
1355
break;
1356
}
1357
case IrCmd::JUMP_CMP_INT:
1358
{
1359
IrCondition cond = conditionOp(OP_C(inst));
1360
1361
if ((cond == IrCondition::Equal || cond == IrCondition::NotEqual) && intOp(OP_B(inst)) == 0)
1362
{
1363
bool invert = cond == IrCondition::NotEqual;
1364
1365
build.test(regOp(OP_A(inst)), regOp(OP_A(inst)));
1366
1367
if (isFallthroughBlock(blockOp(OP_D(inst)), next))
1368
{
1369
build.jcc(invert ? ConditionX64::Zero : ConditionX64::NotZero, labelOp(OP_E(inst)));
1370
jumpOrFallthrough(blockOp(OP_D(inst)), next);
1371
}
1372
else
1373
{
1374
build.jcc(invert ? ConditionX64::NotZero : ConditionX64::Zero, labelOp(OP_D(inst)));
1375
jumpOrFallthrough(blockOp(OP_E(inst)), next);
1376
}
1377
}
1378
else
1379
{
1380
build.cmp(regOp(OP_A(inst)), intOp(OP_B(inst)));
1381
1382
build.jcc(getConditionInt(cond), labelOp(OP_D(inst)));
1383
jumpOrFallthrough(blockOp(OP_E(inst)), next);
1384
}
1385
break;
1386
}
1387
case IrCmd::JUMP_EQ_POINTER:
1388
build.cmp(regOp(OP_A(inst)), regOp(OP_B(inst)));
1389
1390
build.jcc(ConditionX64::Equal, labelOp(OP_C(inst)));
1391
jumpOrFallthrough(blockOp(OP_D(inst)), next);
1392
break;
1393
case IrCmd::JUMP_CMP_NUM:
1394
{
1395
IrCondition cond = conditionOp(OP_C(inst));
1396
1397
ScopedRegX64 tmp{regs, SizeX64::xmmword};
1398
1399
jumpOnNumberCmp(
1400
build, tmp.reg, memRegDoubleOp(OP_A(inst)), memRegDoubleOp(OP_B(inst)), cond, labelOp(OP_D(inst)), /* floatPrecision */ false
1401
);
1402
jumpOrFallthrough(blockOp(OP_E(inst)), next);
1403
break;
1404
}
1405
case IrCmd::JUMP_CMP_FLOAT:
1406
{
1407
IrCondition cond = conditionOp(OP_C(inst));
1408
1409
ScopedRegX64 tmp{regs, SizeX64::xmmword};
1410
1411
jumpOnNumberCmp(build, tmp.reg, memRegFloatOp(OP_A(inst)), memRegFloatOp(OP_B(inst)), cond, labelOp(OP_D(inst)), /* floatPrecision */ true);
1412
jumpOrFallthrough(blockOp(OP_E(inst)), next);
1413
break;
1414
}
1415
case IrCmd::JUMP_FORN_LOOP_COND:
1416
{
1417
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
1418
ScopedRegX64 tmp2{regs, SizeX64::xmmword};
1419
ScopedRegX64 tmp3{regs, SizeX64::xmmword};
1420
1421
RegisterX64 index = OP_A(inst).kind == IrOpKind::Inst ? regOp(OP_A(inst)) : tmp1.reg;
1422
RegisterX64 limit = OP_B(inst).kind == IrOpKind::Inst ? regOp(OP_B(inst)) : tmp2.reg;
1423
1424
if (OP_A(inst).kind != IrOpKind::Inst)
1425
build.vmovsd(tmp1.reg, memRegDoubleOp(OP_A(inst)));
1426
1427
if (OP_B(inst).kind != IrOpKind::Inst)
1428
build.vmovsd(tmp2.reg, memRegDoubleOp(OP_B(inst)));
1429
1430
Label direct;
1431
1432
// step > 0
1433
jumpOnNumberCmp(build, tmp3.reg, memRegDoubleOp(OP_C(inst)), build.f64(0.0), IrCondition::Greater, direct, /* floatPrecision */ false);
1434
1435
// !(limit <= index)
1436
jumpOnNumberCmp(build, noreg, limit, index, IrCondition::NotLessEqual, labelOp(OP_E(inst)), /* floatPrecision */ false);
1437
build.jmp(labelOp(OP_D(inst)));
1438
1439
// !(index <= limit)
1440
build.setLabel(direct);
1441
jumpOnNumberCmp(build, noreg, index, limit, IrCondition::NotLessEqual, labelOp(OP_E(inst)), /* floatPrecision */ false);
1442
jumpOrFallthrough(blockOp(OP_D(inst)), next);
1443
break;
1444
}
1445
case IrCmd::TABLE_LEN:
1446
{
1447
IrCallWrapperX64 callWrap(regs, build, index);
1448
callWrap.addArgument(SizeX64::qword, regOp(OP_A(inst)), OP_A(inst));
1449
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_getn)]);
1450
1451
inst.regX64 = regs.takeReg(eax, index);
1452
1453
build.mov(inst.regX64, inst.regX64); // Ensure high register bits are cleared
1454
break;
1455
}
1456
case IrCmd::TABLE_SETNUM:
1457
{
1458
IrCallWrapperX64 callWrap(regs, build, index);
1459
callWrap.addArgument(SizeX64::qword, rState);
1460
callWrap.addArgument(SizeX64::qword, regOp(OP_A(inst)), OP_A(inst));
1461
callWrap.addArgument(SizeX64::dword, regOp(OP_B(inst)), OP_B(inst));
1462
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_setnum)]);
1463
inst.regX64 = regs.takeReg(rax, index);
1464
break;
1465
}
1466
case IrCmd::STRING_LEN:
1467
{
1468
RegisterX64 ptr = regOp(OP_A(inst));
1469
inst.regX64 = regs.allocReg(SizeX64::dword, index);
1470
build.mov(inst.regX64, dword[ptr + offsetof(TString, len)]);
1471
break;
1472
}
1473
case IrCmd::NEW_TABLE:
1474
{
1475
IrCallWrapperX64 callWrap(regs, build, index);
1476
callWrap.addArgument(SizeX64::qword, rState);
1477
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(OP_A(inst))));
1478
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(OP_B(inst))));
1479
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_new)]);
1480
inst.regX64 = regs.takeReg(rax, index);
1481
break;
1482
}
1483
case IrCmd::DUP_TABLE:
1484
{
1485
IrCallWrapperX64 callWrap(regs, build, index);
1486
callWrap.addArgument(SizeX64::qword, rState);
1487
callWrap.addArgument(SizeX64::qword, regOp(OP_A(inst)), OP_A(inst));
1488
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaH_clone)]);
1489
inst.regX64 = regs.takeReg(rax, index);
1490
break;
1491
}
1492
case IrCmd::TRY_NUM_TO_INDEX:
1493
{
1494
inst.regX64 = regs.allocReg(SizeX64::dword, index);
1495
1496
ScopedRegX64 tmp{regs, SizeX64::xmmword};
1497
1498
convertNumberToIndexOrJump(build, tmp.reg, regOp(OP_A(inst)), inst.regX64, labelOp(OP_B(inst)));
1499
break;
1500
}
1501
case IrCmd::TRY_CALL_FASTGETTM:
1502
{
1503
ScopedRegX64 tmp{regs, SizeX64::qword};
1504
1505
build.mov(tmp.reg, qword[regOp(OP_A(inst)) + offsetof(LuaTable, metatable)]);
1506
regs.freeLastUseReg(function.instOp(OP_A(inst)), index); // Release before the call if it's the last use
1507
1508
build.test(tmp.reg, tmp.reg);
1509
build.jcc(ConditionX64::Zero, labelOp(OP_C(inst))); // No metatable
1510
1511
build.test(byte[tmp.reg + offsetof(LuaTable, tmcache)], 1 << intOp(OP_B(inst)));
1512
build.jcc(ConditionX64::NotZero, labelOp(OP_C(inst))); // No tag method
1513
1514
ScopedRegX64 tmp2{regs, SizeX64::qword};
1515
build.mov(tmp2.reg, qword[rState + offsetof(lua_State, global)]);
1516
1517
{
1518
ScopedSpills spillGuard(regs);
1519
1520
IrCallWrapperX64 callWrap(regs, build, index);
1521
callWrap.addArgument(SizeX64::qword, tmp);
1522
callWrap.addArgument(SizeX64::qword, intOp(OP_B(inst)));
1523
callWrap.addArgument(SizeX64::qword, qword[tmp2.release() + offsetof(global_State, tmname) + intOp(OP_B(inst)) * sizeof(TString*)]);
1524
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_gettm)]);
1525
}
1526
1527
build.test(rax, rax);
1528
build.jcc(ConditionX64::Zero, labelOp(OP_C(inst))); // No tag method
1529
1530
inst.regX64 = regs.takeReg(rax, index);
1531
break;
1532
}
1533
case IrCmd::NEW_USERDATA:
1534
{
1535
IrCallWrapperX64 callWrap(regs, build, index);
1536
callWrap.addArgument(SizeX64::qword, rState);
1537
callWrap.addArgument(SizeX64::qword, intOp(OP_A(inst)));
1538
callWrap.addArgument(SizeX64::dword, intOp(OP_B(inst)));
1539
callWrap.call(qword[rNativeContext + offsetof(NativeContext, newUserdata)]);
1540
inst.regX64 = regs.takeReg(rax, index);
1541
break;
1542
}
1543
case IrCmd::INT_TO_NUM:
1544
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
1545
1546
build.vcvtsi2sd(inst.regX64, inst.regX64, regOp(OP_A(inst)));
1547
break;
1548
case IrCmd::UINT_TO_NUM:
1549
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
1550
1551
// AVX has no uint->double conversion; the source must come from UINT op and they all should clear top 32 bits so we can usually
1552
// use 64-bit reg; the one exception is NUM_TO_UINT which doesn't clear top bits
1553
if (IrCmd source = function.instOp(OP_A(inst)).cmd; source == IrCmd::NUM_TO_UINT)
1554
{
1555
ScopedRegX64 tmp{regs, SizeX64::dword};
1556
build.mov(tmp.reg, regOp(OP_A(inst)));
1557
build.vcvtsi2sd(inst.regX64, inst.regX64, qwordReg(tmp.reg));
1558
}
1559
else
1560
{
1561
CODEGEN_ASSERT(source != IrCmd::SUBSTITUTE); // we don't process substitutions
1562
build.vcvtsi2sd(inst.regX64, inst.regX64, qwordReg(regOp(OP_A(inst))));
1563
}
1564
break;
1565
case IrCmd::UINT_TO_FLOAT:
1566
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
1567
1568
// AVX has no uint->float conversion; the source must come from UINT op and they all should clear top 32 bits so we can usually
1569
// use 64-bit reg; the one exception is NUM_TO_UINT which doesn't clear top bits
1570
if (IrCmd source = function.instOp(OP_A(inst)).cmd; source == IrCmd::NUM_TO_UINT)
1571
{
1572
ScopedRegX64 tmp{regs, SizeX64::dword};
1573
build.mov(tmp.reg, regOp(OP_A(inst)));
1574
build.vcvtsi2ss(inst.regX64, inst.regX64, qwordReg(tmp.reg));
1575
}
1576
else
1577
{
1578
CODEGEN_ASSERT(source != IrCmd::SUBSTITUTE); // we don't process substitutions
1579
build.vcvtsi2ss(inst.regX64, inst.regX64, qwordReg(regOp(OP_A(inst))));
1580
}
1581
break;
1582
case IrCmd::NUM_TO_INT:
1583
inst.regX64 = regs.allocReg(SizeX64::dword, index);
1584
1585
build.vcvttsd2si(inst.regX64, memRegDoubleOp(OP_A(inst)));
1586
break;
1587
case IrCmd::NUM_TO_UINT:
1588
inst.regX64 = regs.allocReg(SizeX64::dword, index);
1589
1590
// Note: we perform 'uint64_t = (long long)double' for consistency with C++ code
1591
build.vcvttsd2si(qwordReg(inst.regX64), memRegDoubleOp(OP_A(inst)));
1592
break;
1593
1594
case IrCmd::FLOAT_TO_NUM:
1595
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
1596
1597
build.vcvtss2sd(inst.regX64, inst.regX64, memRegDoubleOp(OP_A(inst)));
1598
break;
1599
1600
case IrCmd::NUM_TO_FLOAT:
1601
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
1602
1603
build.vcvtsd2ss(inst.regX64, inst.regX64, memRegDoubleOp(OP_A(inst)));
1604
break;
1605
case IrCmd::FLOAT_TO_VEC:
1606
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
1607
1608
if (OP_A(inst).kind == IrOpKind::Constant)
1609
{
1610
float value = float(doubleOp(OP_A(inst)));
1611
uint32_t asU32;
1612
static_assert(sizeof(asU32) == sizeof(value), "Expecting float to be 32-bit");
1613
memcpy(&asU32, &value, sizeof(value));
1614
1615
build.vmovaps(inst.regX64, build.u32x4(asU32, asU32, asU32, 0));
1616
}
1617
else
1618
{
1619
build.vpshufps(inst.regX64, regOp(OP_A(inst)), regOp(OP_A(inst)), 0b00'00'00'00);
1620
}
1621
break;
1622
case IrCmd::TAG_VECTOR:
1623
inst.regX64 = regs.allocRegOrReuse(SizeX64::xmmword, index, {OP_A(inst)});
1624
1625
build.vpinsrd(inst.regX64, regOp(OP_A(inst)), build.i32(LUA_TVECTOR), 3);
1626
break;
1627
case IrCmd::TRUNCATE_UINT:
1628
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
1629
1630
// Might generate mov with the same source and destination register which is not a no-op
1631
build.mov(inst.regX64, regOp(OP_A(inst)));
1632
break;
1633
case IrCmd::ADJUST_STACK_TO_REG:
1634
{
1635
ScopedRegX64 tmp{regs, SizeX64::qword};
1636
1637
if (OP_B(inst).kind == IrOpKind::Constant)
1638
{
1639
build.lea(tmp.reg, addr[rBase + (vmRegOp(OP_A(inst)) + intOp(OP_B(inst))) * sizeof(TValue)]);
1640
build.mov(qword[rState + offsetof(lua_State, top)], tmp.reg);
1641
}
1642
else if (OP_B(inst).kind == IrOpKind::Inst)
1643
{
1644
build.mov(dwordReg(tmp.reg), regOp(OP_B(inst)));
1645
build.shl(tmp.reg, kTValueSizeLog2);
1646
build.lea(tmp.reg, addr[rBase + tmp.reg + vmRegOp(OP_A(inst)) * sizeof(TValue)]);
1647
build.mov(qword[rState + offsetof(lua_State, top)], tmp.reg);
1648
}
1649
else
1650
{
1651
CODEGEN_ASSERT(!"Unsupported instruction form");
1652
}
1653
break;
1654
}
1655
case IrCmd::ADJUST_STACK_TO_TOP:
1656
{
1657
ScopedRegX64 tmp{regs, SizeX64::qword};
1658
build.mov(tmp.reg, qword[rState + offsetof(lua_State, ci)]);
1659
build.mov(tmp.reg, qword[tmp.reg + offsetof(CallInfo, top)]);
1660
build.mov(qword[rState + offsetof(lua_State, top)], tmp.reg);
1661
break;
1662
}
1663
1664
case IrCmd::FASTCALL:
1665
{
1666
emitBuiltin(regs, build, uintOp(OP_A(inst)), vmRegOp(OP_B(inst)), vmRegOp(OP_C(inst)), intOp(OP_D(inst)));
1667
break;
1668
}
1669
case IrCmd::INVOKE_FASTCALL:
1670
{
1671
unsigned bfid = uintOp(OP_A(inst));
1672
1673
OperandX64 args = 0;
1674
ScopedRegX64 argsAlt{regs};
1675
1676
// 'E' argument can only be produced by LOP_FASTCALL3
1677
if (OP_E(inst).kind != IrOpKind::Undef)
1678
{
1679
CODEGEN_ASSERT(intOp(OP_F(inst)) == 3);
1680
1681
ScopedRegX64 tmp{regs, SizeX64::xmmword};
1682
argsAlt.alloc(SizeX64::qword);
1683
1684
build.mov(argsAlt.reg, qword[rState + offsetof(lua_State, top)]);
1685
1686
build.vmovups(tmp.reg, luauReg(vmRegOp(OP_D(inst))));
1687
build.vmovups(xmmword[argsAlt.reg], tmp.reg);
1688
1689
build.vmovups(tmp.reg, luauReg(vmRegOp(OP_E(inst))));
1690
build.vmovups(xmmword[argsAlt.reg + sizeof(TValue)], tmp.reg);
1691
}
1692
else
1693
{
1694
if (OP_D(inst).kind == IrOpKind::VmReg)
1695
args = luauRegAddress(vmRegOp(OP_D(inst)));
1696
else if (OP_D(inst).kind == IrOpKind::VmConst)
1697
args = luauConstantAddress(vmConstOp(OP_D(inst)));
1698
else
1699
CODEGEN_ASSERT(OP_D(inst).kind == IrOpKind::Undef);
1700
}
1701
1702
int ra = vmRegOp(OP_B(inst));
1703
int arg = vmRegOp(OP_C(inst));
1704
int nparams = intOp(OP_F(inst));
1705
int nresults = intOp(OP_G(inst));
1706
1707
IrCallWrapperX64 callWrap(regs, build, index);
1708
callWrap.addArgument(SizeX64::qword, rState);
1709
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
1710
callWrap.addArgument(SizeX64::qword, luauRegAddress(arg));
1711
callWrap.addArgument(SizeX64::dword, nresults);
1712
1713
if (OP_E(inst).kind != IrOpKind::Undef)
1714
callWrap.addArgument(SizeX64::qword, argsAlt);
1715
else
1716
callWrap.addArgument(SizeX64::qword, args);
1717
1718
if (nparams == LUA_MULTRET)
1719
{
1720
RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
1721
ScopedRegX64 tmp{regs, SizeX64::qword};
1722
1723
// L->top - (ra + 1)
1724
build.mov(reg, qword[rState + offsetof(lua_State, top)]);
1725
build.lea(tmp.reg, addr[rBase + (ra + 1) * sizeof(TValue)]);
1726
build.sub(reg, tmp.reg);
1727
build.shr(reg, kTValueSizeLog2);
1728
1729
callWrap.addArgument(SizeX64::dword, dwordReg(reg));
1730
}
1731
else
1732
{
1733
callWrap.addArgument(SizeX64::dword, nparams);
1734
}
1735
1736
ScopedRegX64 func{regs, SizeX64::qword};
1737
build.mov(func.reg, qword[rNativeContext + offsetof(NativeContext, luauF_table) + bfid * sizeof(luau_FastFunction)]);
1738
1739
callWrap.call(func.release());
1740
inst.regX64 = regs.takeReg(eax, index); // Result of a builtin call is returned in eax
1741
// Skipping high register bits clear, only consumer is CHECK_FASTCALL_RES which doesn't read them
1742
break;
1743
}
1744
case IrCmd::CHECK_FASTCALL_RES:
1745
{
1746
RegisterX64 res = regOp(OP_A(inst));
1747
1748
build.test(res, res); // test here will set SF=1 for a negative number and it always sets OF to 0
1749
build.jcc(ConditionX64::Less, labelOp(OP_B(inst))); // jl jumps if SF != OF
1750
break;
1751
}
1752
case IrCmd::DO_ARITH:
1753
{
1754
OperandX64 opb = OP_B(inst).kind == IrOpKind::VmReg ? luauRegAddress(vmRegOp(OP_B(inst))) : luauConstantAddress(vmConstOp(OP_B(inst)));
1755
OperandX64 opc = OP_C(inst).kind == IrOpKind::VmReg ? luauRegAddress(vmRegOp(OP_C(inst))) : luauConstantAddress(vmConstOp(OP_C(inst)));
1756
callArithHelper(regs, build, vmRegOp(OP_A(inst)), opb, opc, TMS(intOp(OP_D(inst))));
1757
break;
1758
}
1759
case IrCmd::DO_LEN:
1760
callLengthHelper(regs, build, vmRegOp(OP_A(inst)), vmRegOp(OP_B(inst)));
1761
break;
1762
case IrCmd::GET_TABLE:
1763
if (OP_C(inst).kind == IrOpKind::VmReg)
1764
{
1765
callGetTable(regs, build, vmRegOp(OP_B(inst)), luauRegAddress(vmRegOp(OP_C(inst))), vmRegOp(OP_A(inst)));
1766
}
1767
else if (OP_C(inst).kind == IrOpKind::Constant)
1768
{
1769
TValue n = {};
1770
setnvalue(&n, uintOp(OP_C(inst)));
1771
callGetTable(regs, build, vmRegOp(OP_B(inst)), build.bytes(&n, sizeof(n)), vmRegOp(OP_A(inst)));
1772
}
1773
else
1774
{
1775
CODEGEN_ASSERT(!"Unsupported instruction form");
1776
}
1777
break;
1778
case IrCmd::SET_TABLE:
1779
if (OP_C(inst).kind == IrOpKind::VmReg)
1780
{
1781
callSetTable(regs, build, vmRegOp(OP_B(inst)), luauRegAddress(vmRegOp(OP_C(inst))), vmRegOp(OP_A(inst)));
1782
}
1783
else if (OP_C(inst).kind == IrOpKind::Constant)
1784
{
1785
TValue n = {};
1786
setnvalue(&n, uintOp(OP_C(inst)));
1787
callSetTable(regs, build, vmRegOp(OP_B(inst)), build.bytes(&n, sizeof(n)), vmRegOp(OP_A(inst)));
1788
}
1789
else
1790
{
1791
CODEGEN_ASSERT(!"Unsupported instruction form");
1792
}
1793
break;
1794
case IrCmd::GET_CACHED_IMPORT:
1795
{
1796
regs.assertAllFree();
1797
regs.assertNoSpills();
1798
1799
Label skip, exit;
1800
1801
// If the constant for the import is set, we will use it directly, otherwise we have to call an import path lookup function
1802
build.cmp(luauConstantTag(vmConstOp(OP_B(inst))), LUA_TNIL);
1803
build.jcc(ConditionX64::NotEqual, skip);
1804
1805
{
1806
ScopedSpills spillGuard(regs);
1807
1808
IrCallWrapperX64 callWrap(regs, build, index);
1809
callWrap.addArgument(SizeX64::qword, rState);
1810
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(OP_A(inst))));
1811
callWrap.addArgument(SizeX64::dword, importOp(OP_C(inst)));
1812
callWrap.addArgument(SizeX64::dword, uintOp(OP_D(inst)));
1813
callWrap.call(qword[rNativeContext + offsetof(NativeContext, getImport)]);
1814
}
1815
1816
emitUpdateBase(build);
1817
build.jmp(exit);
1818
1819
build.setLabel(skip);
1820
1821
ScopedRegX64 tmp1{regs, SizeX64::xmmword};
1822
1823
build.vmovups(tmp1.reg, luauConstant(vmConstOp(OP_B(inst))));
1824
build.vmovups(luauReg(vmRegOp(OP_A(inst))), tmp1.reg);
1825
build.setLabel(exit);
1826
break;
1827
}
1828
case IrCmd::CONCAT:
1829
{
1830
IrCallWrapperX64 callWrap(regs, build, index);
1831
callWrap.addArgument(SizeX64::qword, rState);
1832
callWrap.addArgument(SizeX64::dword, int32_t(uintOp(OP_B(inst))));
1833
callWrap.addArgument(SizeX64::dword, int32_t(vmRegOp(OP_A(inst)) + uintOp(OP_B(inst)) - 1));
1834
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_concat)]);
1835
1836
emitUpdateBase(build);
1837
break;
1838
}
1839
case IrCmd::GET_UPVALUE:
1840
{
1841
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
1842
1843
ScopedRegX64 tmp1{regs, SizeX64::qword};
1844
1845
build.mov(tmp1.reg, sClosure);
1846
build.add(tmp1.reg, offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(OP_A(inst)));
1847
1848
// uprefs[] is either an actual value, or it points to UpVal object which has a pointer to value
1849
Label skip;
1850
build.cmp(dword[tmp1.reg + offsetof(TValue, tt)], LUA_TUPVAL);
1851
build.jcc(ConditionX64::NotEqual, skip);
1852
1853
// UpVal.v points to the value (either on stack, or on heap inside each UpVal, but we can deref it unconditionally)
1854
build.mov(tmp1.reg, qword[tmp1.reg + offsetof(TValue, value.gc)]);
1855
build.mov(tmp1.reg, qword[tmp1.reg + offsetof(UpVal, v)]);
1856
1857
build.setLabel(skip);
1858
1859
build.vmovups(inst.regX64, xmmword[tmp1.reg]);
1860
break;
1861
}
1862
case IrCmd::SET_UPVALUE:
1863
{
1864
ScopedRegX64 tmp1{regs, SizeX64::qword};
1865
ScopedRegX64 tmp2{regs, SizeX64::qword};
1866
1867
build.mov(tmp1.reg, sClosure);
1868
build.mov(tmp2.reg, qword[tmp1.reg + offsetof(Closure, l.uprefs) + sizeof(TValue) * vmUpvalueOp(OP_A(inst)) + offsetof(TValue, value.gc)]);
1869
1870
build.mov(tmp1.reg, qword[tmp2.reg + offsetof(UpVal, v)]);
1871
build.vmovups(xmmword[tmp1.reg], regOp(OP_B(inst)));
1872
1873
tmp1.free();
1874
1875
if (OP_C(inst).kind == IrOpKind::Undef || isGCO(tagOp(OP_C(inst))))
1876
{
1877
callBarrierObject(
1878
regs, build, tmp2.release(), {}, regOp(OP_B(inst)), OP_B(inst), OP_C(inst).kind == IrOpKind::Undef ? -1 : tagOp(OP_C(inst))
1879
);
1880
}
1881
break;
1882
}
1883
case IrCmd::CHECK_TAG:
1884
build.cmp(memRegTagOp(OP_A(inst)), tagOp(OP_B(inst)));
1885
jumpOrAbortOnUndef(ConditionX64::NotEqual, OP_C(inst), next);
1886
break;
1887
case IrCmd::CHECK_TRUTHY:
1888
{
1889
// Constant tags which don't require boolean value check should've been removed in constant folding
1890
CODEGEN_ASSERT(OP_A(inst).kind != IrOpKind::Constant || tagOp(OP_A(inst)) == LUA_TBOOLEAN);
1891
1892
Label skip;
1893
1894
if (OP_A(inst).kind != IrOpKind::Constant)
1895
{
1896
// Fail to fallback on 'nil' (falsy)
1897
build.cmp(memRegTagOp(OP_A(inst)), LUA_TNIL);
1898
jumpOrAbortOnUndef(ConditionX64::Equal, OP_C(inst), next);
1899
1900
// Skip value test if it's not a boolean (truthy)
1901
build.cmp(memRegTagOp(OP_A(inst)), LUA_TBOOLEAN);
1902
build.jcc(ConditionX64::NotEqual, skip);
1903
}
1904
1905
// fail to fallback on 'false' boolean value (falsy)
1906
if (OP_B(inst).kind != IrOpKind::Constant)
1907
{
1908
build.cmp(memRegUintOp(OP_B(inst)), 0);
1909
jumpOrAbortOnUndef(ConditionX64::Equal, OP_C(inst), next);
1910
}
1911
else
1912
{
1913
if (intOp(OP_B(inst)) == 0)
1914
jumpOrAbortOnUndef(OP_C(inst), next);
1915
}
1916
1917
if (OP_A(inst).kind != IrOpKind::Constant)
1918
build.setLabel(skip);
1919
break;
1920
}
1921
case IrCmd::CHECK_READONLY:
1922
build.cmp(byte[regOp(OP_A(inst)) + offsetof(LuaTable, readonly)], 0);
1923
jumpOrAbortOnUndef(ConditionX64::NotEqual, OP_B(inst), next);
1924
break;
1925
case IrCmd::CHECK_NO_METATABLE:
1926
build.cmp(qword[regOp(OP_A(inst)) + offsetof(LuaTable, metatable)], 0);
1927
jumpOrAbortOnUndef(ConditionX64::NotEqual, OP_B(inst), next);
1928
break;
1929
case IrCmd::CHECK_SAFE_ENV:
1930
{
1931
if (FFlag::LuauCodegenBlockSafeEnv)
1932
{
1933
checkSafeEnv(OP_A(inst), next);
1934
}
1935
else
1936
{
1937
ScopedRegX64 tmp{regs, SizeX64::qword};
1938
1939
build.mov(tmp.reg, sClosure);
1940
build.mov(tmp.reg, qword[tmp.reg + offsetof(Closure, env)]);
1941
build.cmp(byte[tmp.reg + offsetof(LuaTable, safeenv)], 0);
1942
1943
jumpOrAbortOnUndef(ConditionX64::Equal, OP_A(inst), next);
1944
}
1945
break;
1946
}
1947
case IrCmd::CHECK_ARRAY_SIZE:
1948
if (OP_B(inst).kind == IrOpKind::Inst)
1949
build.cmp(dword[regOp(OP_A(inst)) + offsetof(LuaTable, sizearray)], regOp(OP_B(inst)));
1950
else if (OP_B(inst).kind == IrOpKind::Constant)
1951
build.cmp(dword[regOp(OP_A(inst)) + offsetof(LuaTable, sizearray)], intOp(OP_B(inst)));
1952
else
1953
CODEGEN_ASSERT(!"Unsupported instruction form");
1954
1955
jumpOrAbortOnUndef(ConditionX64::BelowEqual, OP_C(inst), next);
1956
break;
1957
case IrCmd::JUMP_SLOT_MATCH:
1958
case IrCmd::CHECK_SLOT_MATCH:
1959
{
1960
Label abort; // Used when guard aborts execution
1961
const IrOp& mismatchOp = inst.cmd == IrCmd::JUMP_SLOT_MATCH ? OP_D(inst) : OP_C(inst);
1962
Label& mismatch = mismatchOp.kind == IrOpKind::Undef ? abort : labelOp(mismatchOp);
1963
1964
ScopedRegX64 tmp{regs, SizeX64::qword};
1965
1966
// Check if node key tag is a string
1967
build.mov(dwordReg(tmp.reg), luauNodeKeyTag(regOp(OP_A(inst))));
1968
build.and_(dwordReg(tmp.reg), kTKeyTagMask);
1969
build.cmp(dwordReg(tmp.reg), LUA_TSTRING);
1970
build.jcc(ConditionX64::NotEqual, mismatch);
1971
1972
// Check that node key value matches the expected one
1973
build.mov(tmp.reg, luauConstantValue(vmConstOp(OP_B(inst))));
1974
build.cmp(tmp.reg, luauNodeKeyValue(regOp(OP_A(inst))));
1975
build.jcc(ConditionX64::NotEqual, mismatch);
1976
1977
// Check that node value is not nil
1978
build.cmp(dword[regOp(OP_A(inst)) + offsetof(LuaNode, val) + offsetof(TValue, tt)], LUA_TNIL);
1979
build.jcc(ConditionX64::Equal, mismatch);
1980
1981
if (inst.cmd == IrCmd::JUMP_SLOT_MATCH)
1982
{
1983
jumpOrFallthrough(blockOp(OP_C(inst)), next);
1984
}
1985
else if (mismatchOp.kind == IrOpKind::Undef)
1986
{
1987
Label skip;
1988
build.jmp(skip);
1989
build.setLabel(abort);
1990
build.ud2();
1991
build.setLabel(skip);
1992
}
1993
break;
1994
}
1995
case IrCmd::CHECK_NODE_NO_NEXT:
1996
{
1997
ScopedRegX64 tmp{regs, SizeX64::dword};
1998
1999
build.mov(tmp.reg, dword[regOp(OP_A(inst)) + offsetof(LuaNode, key) + kOffsetOfTKeyTagNext]);
2000
build.shr(tmp.reg, kTKeyTagBits);
2001
jumpOrAbortOnUndef(ConditionX64::NotZero, OP_B(inst), next);
2002
break;
2003
}
2004
case IrCmd::CHECK_NODE_VALUE:
2005
{
2006
build.cmp(dword[regOp(OP_A(inst)) + offsetof(LuaNode, val) + offsetof(TValue, tt)], LUA_TNIL);
2007
jumpOrAbortOnUndef(ConditionX64::Equal, OP_B(inst), next);
2008
break;
2009
}
2010
case IrCmd::CHECK_BUFFER_LEN:
2011
{
2012
if (FFlag::LuauCodegenBufferRangeMerge4)
2013
{
2014
int minOffset = intOp(OP_C(inst));
2015
int maxOffset = intOp(OP_D(inst));
2016
CODEGEN_ASSERT(minOffset < maxOffset);
2017
2018
int accessSize = maxOffset - minOffset;
2019
CODEGEN_ASSERT(accessSize > 0);
2020
2021
// Check if we are acting not only as a guard for the size, but as a guard that offset represents an exact integer
2022
if (OP_E(inst).kind != IrOpKind::Undef)
2023
{
2024
CODEGEN_ASSERT(getCmdValueKind(function.instOp(OP_B(inst)).cmd) == IrValueKind::Int);
2025
CODEGEN_ASSERT(!producesDirtyHighRegisterBits(function.instOp(OP_B(inst)).cmd)); // Ensure that high register bits are cleared
2026
2027
ScopedRegX64 tmp{regs, SizeX64::xmmword};
2028
2029
// Convert integer back to double
2030
build.vcvtsi2sd(tmp.reg, tmp.reg, regOp(OP_B(inst)));
2031
2032
build.vucomisd(tmp.reg, regOp(OP_E(inst))); // Sets ZF=1 if equal or NaN, PF=1 on NaN
2033
2034
// We don't allow non-integer values
2035
jumpOrAbortOnUndef(ConditionX64::NotZero, OP_F(inst), next); // exit on ZF=0
2036
jumpOrAbortOnUndef(ConditionX64::Parity, OP_F(inst), next); // exit on PF=1
2037
}
2038
2039
if (OP_B(inst).kind == IrOpKind::Inst)
2040
{
2041
CODEGEN_ASSERT(!producesDirtyHighRegisterBits(function.instOp(OP_B(inst)).cmd)); // Ensure that high register bits are cleared
2042
2043
if (accessSize == 1 && minOffset == 0)
2044
{
2045
// Simpler check for a single byte access
2046
build.cmp(dword[regOp(OP_A(inst)) + offsetof(Buffer, len)], regOp(OP_B(inst)));
2047
jumpOrAbortOnUndef(ConditionX64::BelowEqual, OP_F(inst), next);
2048
}
2049
else
2050
{
2051
ScopedRegX64 tmp1{regs, SizeX64::qword};
2052
ScopedRegX64 tmp2{regs, SizeX64::dword};
2053
2054
// To perform the bounds check using a single branch, we take index that is limited to a 32 bit int
2055
// Max offset is then added using a 64 bit addition
2056
// This will make sure that addition will not wrap around for values like 0xffffffff
2057
2058
if (minOffset >= 0)
2059
{
2060
build.lea(tmp1.reg, addr[qwordReg(regOp(OP_B(inst))) + maxOffset]);
2061
}
2062
else
2063
{
2064
// When the min offset is negative, we subtract it from offset first (in 32 bits)
2065
build.lea(dwordReg(tmp1.reg), addr[regOp(OP_B(inst)) + minOffset]);
2066
2067
// And then add the full access size like before
2068
build.lea(tmp1.reg, addr[tmp1.reg + accessSize]);
2069
}
2070
2071
build.mov(tmp2.reg, dword[regOp(OP_A(inst)) + offsetof(Buffer, len)]);
2072
build.cmp(qwordReg(tmp2.reg), tmp1.reg);
2073
2074
jumpOrAbortOnUndef(ConditionX64::Below, OP_F(inst), next);
2075
}
2076
}
2077
else if (OP_B(inst).kind == IrOpKind::Constant)
2078
{
2079
int offset = intOp(OP_B(inst));
2080
2081
// Constant folding can take care of it, but for safety we avoid overflow/underflow cases here
2082
if (offset < 0 || unsigned(offset) + unsigned(accessSize) >= unsigned(INT_MAX))
2083
jumpOrAbortOnUndef(OP_F(inst), next);
2084
else
2085
build.cmp(dword[regOp(OP_A(inst)) + offsetof(Buffer, len)], offset + accessSize);
2086
2087
jumpOrAbortOnUndef(ConditionX64::Below, OP_F(inst), next);
2088
}
2089
else
2090
{
2091
CODEGEN_ASSERT(!"Unsupported instruction form");
2092
}
2093
}
2094
else
2095
{
2096
int accessSize = intOp(OP_C(inst));
2097
CODEGEN_ASSERT(accessSize > 0);
2098
2099
if (OP_B(inst).kind == IrOpKind::Inst)
2100
{
2101
CODEGEN_ASSERT(!producesDirtyHighRegisterBits(function.instOp(OP_B(inst)).cmd)); // Ensure that high register bits are cleared
2102
2103
if (accessSize == 1)
2104
{
2105
// Simpler check for a single byte access
2106
build.cmp(dword[regOp(OP_A(inst)) + offsetof(Buffer, len)], regOp(OP_B(inst)));
2107
jumpOrAbortOnUndef(ConditionX64::BelowEqual, OP_D(inst), next);
2108
}
2109
else
2110
{
2111
ScopedRegX64 tmp1{regs, SizeX64::qword};
2112
ScopedRegX64 tmp2{regs, SizeX64::dword};
2113
2114
// To perform the bounds check using a single branch, we take index that is limited to 32 bit int
2115
// Access size is then added using a 64 bit addition
2116
// This will make sure that addition will not wrap around for values like 0xffffffff
2117
build.lea(tmp1.reg, addr[qwordReg(regOp(OP_B(inst))) + accessSize]);
2118
build.mov(tmp2.reg, dword[regOp(OP_A(inst)) + offsetof(Buffer, len)]);
2119
build.cmp(qwordReg(tmp2.reg), tmp1.reg);
2120
2121
jumpOrAbortOnUndef(ConditionX64::Below, OP_D(inst), next);
2122
}
2123
}
2124
else if (OP_B(inst).kind == IrOpKind::Constant)
2125
{
2126
int offset = intOp(OP_B(inst));
2127
2128
// Constant folding can take care of it, but for safety we avoid overflow/underflow cases here
2129
if (offset < 0 || unsigned(offset) + unsigned(accessSize) >= unsigned(INT_MAX))
2130
jumpOrAbortOnUndef(OP_D(inst), next);
2131
else
2132
build.cmp(dword[regOp(OP_A(inst)) + offsetof(Buffer, len)], offset + accessSize);
2133
2134
jumpOrAbortOnUndef(ConditionX64::Below, OP_D(inst), next);
2135
}
2136
else
2137
{
2138
CODEGEN_ASSERT(!"Unsupported instruction form");
2139
}
2140
}
2141
break;
2142
}
2143
case IrCmd::CHECK_USERDATA_TAG:
2144
{
2145
build.cmp(byte[regOp(OP_A(inst)) + offsetof(Udata, tag)], intOp(OP_B(inst)));
2146
jumpOrAbortOnUndef(ConditionX64::NotEqual, OP_C(inst), next);
2147
break;
2148
}
2149
case IrCmd::CHECK_CMP_INT:
2150
{
2151
IrCondition cond = conditionOp(OP_C(inst));
2152
2153
if ((cond == IrCondition::Equal || cond == IrCondition::NotEqual) && OP_B(inst).kind == IrOpKind::Constant && intOp(OP_B(inst)) == 0)
2154
{
2155
build.test(regOp(OP_A(inst)), regOp(OP_A(inst)));
2156
jumpOrAbortOnUndef(cond == IrCondition::Equal ? ConditionX64::NotZero : ConditionX64::Zero, OP_D(inst), next);
2157
}
2158
else if (OP_A(inst).kind == IrOpKind::Constant)
2159
{
2160
ScopedRegX64 tmp{regs, SizeX64::dword};
2161
build.mov(tmp.reg, memRegIntOp(OP_A(inst)));
2162
build.cmp(tmp.reg, memRegIntOp(OP_B(inst)));
2163
jumpOrAbortOnUndef(getConditionInt(getNegatedCondition(cond)), OP_D(inst), next);
2164
}
2165
else
2166
{
2167
build.cmp(regOp(OP_A(inst)), memRegIntOp(OP_B(inst)));
2168
jumpOrAbortOnUndef(getConditionInt(getNegatedCondition(cond)), OP_D(inst), next);
2169
}
2170
break;
2171
}
2172
case IrCmd::INTERRUPT:
2173
{
2174
unsigned pcpos = uintOp(OP_A(inst));
2175
2176
// We unconditionally spill values here because that allows us to ignore register state when we synthesize interrupt handler
2177
// This can be changed in the future if we can somehow record interrupt handler code separately
2178
// Since interrupts are loop edges or call/ret, we don't have a significant opportunity for register reuse here anyway
2179
regs.preserveAndFreeInstValues();
2180
2181
ScopedRegX64 tmp{regs, SizeX64::qword};
2182
2183
Label self;
2184
2185
build.mov(tmp.reg, qword[rState + offsetof(lua_State, global)]);
2186
build.cmp(qword[tmp.reg + offsetof(global_State, cb.interrupt)], 0);
2187
build.jcc(ConditionX64::NotEqual, self);
2188
2189
Label next = build.setLabel();
2190
2191
interruptHandlers.push_back({self, pcpos, next});
2192
break;
2193
}
2194
case IrCmd::CHECK_GC:
2195
callStepGc(regs, build);
2196
break;
2197
case IrCmd::BARRIER_OBJ:
2198
callBarrierObject(regs, build, regOp(OP_A(inst)), OP_A(inst), noreg, OP_B(inst), OP_C(inst).kind == IrOpKind::Undef ? -1 : tagOp(OP_C(inst)));
2199
break;
2200
case IrCmd::BARRIER_TABLE_BACK:
2201
callBarrierTableFast(regs, build, regOp(OP_A(inst)), OP_A(inst));
2202
break;
2203
case IrCmd::BARRIER_TABLE_FORWARD:
2204
{
2205
Label skip;
2206
2207
ScopedRegX64 tmp{regs, SizeX64::qword};
2208
2209
checkObjectBarrierConditions(
2210
build, tmp.reg, regOp(OP_A(inst)), noreg, OP_B(inst), OP_C(inst).kind == IrOpKind::Undef ? -1 : tagOp(OP_C(inst)), skip
2211
);
2212
2213
{
2214
ScopedSpills spillGuard(regs);
2215
2216
IrCallWrapperX64 callWrap(regs, build, index);
2217
callWrap.addArgument(SizeX64::qword, rState);
2218
callWrap.addArgument(SizeX64::qword, regOp(OP_A(inst)), OP_A(inst));
2219
callWrap.addArgument(SizeX64::qword, tmp);
2220
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barriertable)]);
2221
}
2222
2223
build.setLabel(skip);
2224
break;
2225
}
2226
case IrCmd::SET_SAVEDPC:
2227
{
2228
ScopedRegX64 tmp1{regs, SizeX64::qword};
2229
ScopedRegX64 tmp2{regs, SizeX64::qword};
2230
2231
build.mov(tmp2.reg, sCode);
2232
build.add(tmp2.reg, uintOp(OP_A(inst)) * sizeof(Instruction));
2233
build.mov(tmp1.reg, qword[rState + offsetof(lua_State, ci)]);
2234
build.mov(qword[tmp1.reg + offsetof(CallInfo, savedpc)], tmp2.reg);
2235
break;
2236
}
2237
case IrCmd::CLOSE_UPVALS:
2238
{
2239
Label next;
2240
ScopedRegX64 tmp1{regs, SizeX64::qword};
2241
ScopedRegX64 tmp2{regs, SizeX64::qword};
2242
2243
// L->openupval != 0
2244
build.mov(tmp1.reg, qword[rState + offsetof(lua_State, openupval)]);
2245
build.test(tmp1.reg, tmp1.reg);
2246
build.jcc(ConditionX64::Zero, next);
2247
2248
// ra <= L->openupval->v
2249
build.lea(tmp2.reg, addr[rBase + vmRegOp(OP_A(inst)) * sizeof(TValue)]);
2250
build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(UpVal, v)]);
2251
build.jcc(ConditionX64::Above, next);
2252
2253
tmp1.free();
2254
2255
{
2256
ScopedSpills spillGuard(regs);
2257
2258
IrCallWrapperX64 callWrap(regs, build, index);
2259
callWrap.addArgument(SizeX64::qword, rState);
2260
callWrap.addArgument(SizeX64::qword, tmp2);
2261
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaF_close)]);
2262
}
2263
2264
build.setLabel(next);
2265
break;
2266
}
2267
case IrCmd::CAPTURE:
2268
// No-op right now
2269
break;
2270
2271
// Fallbacks to non-IR instruction implementations
2272
case IrCmd::SETLIST:
2273
regs.assertAllFree();
2274
emitInstSetList(
2275
regs,
2276
build,
2277
vmRegOp(OP_B(inst)),
2278
vmRegOp(OP_C(inst)),
2279
intOp(OP_D(inst)),
2280
uintOp(OP_E(inst)),
2281
OP_F(inst).kind == IrOpKind::Undef ? -1 : int(uintOp(OP_F(inst)))
2282
);
2283
break;
2284
case IrCmd::CALL:
2285
regs.assertAllFree();
2286
regs.assertNoSpills();
2287
emitInstCall(regs, build, helpers, vmRegOp(OP_A(inst)), intOp(OP_B(inst)), intOp(OP_C(inst)));
2288
break;
2289
case IrCmd::RETURN:
2290
regs.assertAllFree();
2291
regs.assertNoSpills();
2292
emitInstReturn(build, helpers, vmRegOp(OP_A(inst)), intOp(OP_B(inst)), function.variadic);
2293
break;
2294
case IrCmd::FORGLOOP:
2295
regs.assertAllFree();
2296
emitInstForGLoop(regs, build, vmRegOp(OP_A(inst)), intOp(OP_B(inst)), labelOp(OP_C(inst)));
2297
jumpOrFallthrough(blockOp(OP_D(inst)), next);
2298
break;
2299
case IrCmd::FORGLOOP_FALLBACK:
2300
{
2301
IrCallWrapperX64 callWrap(regs, build, index);
2302
callWrap.addArgument(SizeX64::qword, rState);
2303
callWrap.addArgument(SizeX64::dword, vmRegOp(OP_A(inst)));
2304
callWrap.addArgument(SizeX64::dword, intOp(OP_B(inst)));
2305
callWrap.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNonTableFallback)]);
2306
2307
emitUpdateBase(build);
2308
2309
build.test(al, al);
2310
build.jcc(ConditionX64::NotZero, labelOp(OP_C(inst)));
2311
jumpOrFallthrough(blockOp(OP_D(inst)), next);
2312
break;
2313
}
2314
case IrCmd::FORGPREP_XNEXT_FALLBACK:
2315
{
2316
IrCallWrapperX64 callWrap(regs, build, index);
2317
callWrap.addArgument(SizeX64::qword, rState);
2318
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(OP_B(inst))));
2319
callWrap.addArgument(SizeX64::dword, uintOp(OP_A(inst)) + 1);
2320
callWrap.call(qword[rNativeContext + offsetof(NativeContext, forgPrepXnextFallback)]);
2321
jumpOrFallthrough(blockOp(OP_C(inst)), next);
2322
break;
2323
}
2324
case IrCmd::COVERAGE:
2325
{
2326
ScopedRegX64 tmp1{regs, SizeX64::qword};
2327
ScopedRegX64 tmp2{regs, SizeX64::dword};
2328
ScopedRegX64 tmp3{regs, SizeX64::dword};
2329
2330
build.mov(tmp1.reg, sCode);
2331
build.add(tmp1.reg, uintOp(OP_A(inst)) * sizeof(Instruction));
2332
2333
// hits = LUAU_INSN_E(*pc)
2334
build.mov(tmp2.reg, dword[tmp1.reg]);
2335
build.sar(tmp2.reg, 8);
2336
2337
// hits = (hits < (1 << 23) - 1) ? hits + 1 : hits;
2338
build.xor_(tmp3.reg, tmp3.reg);
2339
build.cmp(tmp2.reg, (1 << 23) - 1);
2340
build.setcc(ConditionX64::NotEqual, byteReg(tmp3.reg));
2341
build.add(tmp2.reg, tmp3.reg);
2342
2343
// VM_PATCH_E(pc, hits);
2344
build.sal(tmp2.reg, 8);
2345
build.movzx(tmp3.reg, byte[tmp1.reg]);
2346
build.or_(tmp3.reg, tmp2.reg);
2347
build.mov(dword[tmp1.reg], tmp3.reg);
2348
break;
2349
}
2350
2351
// Full instruction fallbacks
2352
case IrCmd::FALLBACK_GETGLOBAL:
2353
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2354
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmConst);
2355
2356
emitFallback(regs, build, offsetof(NativeContext, executeGETGLOBAL), uintOp(OP_A(inst)));
2357
break;
2358
case IrCmd::FALLBACK_SETGLOBAL:
2359
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2360
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmConst);
2361
2362
emitFallback(regs, build, offsetof(NativeContext, executeSETGLOBAL), uintOp(OP_A(inst)));
2363
break;
2364
case IrCmd::FALLBACK_GETTABLEKS:
2365
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2366
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmReg);
2367
CODEGEN_ASSERT(OP_D(inst).kind == IrOpKind::VmConst);
2368
2369
emitFallback(regs, build, offsetof(NativeContext, executeGETTABLEKS), uintOp(OP_A(inst)));
2370
break;
2371
case IrCmd::FALLBACK_SETTABLEKS:
2372
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2373
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmReg);
2374
CODEGEN_ASSERT(OP_D(inst).kind == IrOpKind::VmConst);
2375
2376
emitFallback(regs, build, offsetof(NativeContext, executeSETTABLEKS), uintOp(OP_A(inst)));
2377
break;
2378
case IrCmd::FALLBACK_NAMECALL:
2379
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2380
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmReg);
2381
CODEGEN_ASSERT(OP_D(inst).kind == IrOpKind::VmConst);
2382
2383
emitFallback(regs, build, offsetof(NativeContext, executeNAMECALL), uintOp(OP_A(inst)));
2384
break;
2385
case IrCmd::FALLBACK_PREPVARARGS:
2386
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::Constant);
2387
2388
emitFallback(regs, build, offsetof(NativeContext, executePREPVARARGS), uintOp(OP_A(inst)));
2389
break;
2390
case IrCmd::FALLBACK_GETVARARGS:
2391
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2392
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::Constant);
2393
2394
if (intOp(OP_C(inst)) == LUA_MULTRET)
2395
{
2396
IrCallWrapperX64 callWrap(regs, build);
2397
callWrap.addArgument(SizeX64::qword, rState);
2398
2399
RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
2400
build.mov(reg, sCode);
2401
callWrap.addArgument(SizeX64::qword, addr[reg + uintOp(OP_A(inst)) * sizeof(Instruction)]);
2402
2403
callWrap.addArgument(SizeX64::qword, rBase);
2404
callWrap.addArgument(SizeX64::dword, vmRegOp(OP_B(inst)));
2405
callWrap.call(qword[rNativeContext + offsetof(NativeContext, executeGETVARARGSMultRet)]);
2406
2407
emitUpdateBase(build);
2408
}
2409
else
2410
{
2411
IrCallWrapperX64 callWrap(regs, build);
2412
callWrap.addArgument(SizeX64::qword, rState);
2413
callWrap.addArgument(SizeX64::qword, rBase);
2414
callWrap.addArgument(SizeX64::dword, vmRegOp(OP_B(inst)));
2415
callWrap.addArgument(SizeX64::dword, intOp(OP_C(inst)));
2416
callWrap.call(qword[rNativeContext + offsetof(NativeContext, executeGETVARARGSConst)]);
2417
}
2418
break;
2419
case IrCmd::NEWCLOSURE:
2420
{
2421
ScopedRegX64 tmp2{regs, SizeX64::qword};
2422
build.mov(tmp2.reg, sClosure);
2423
build.mov(tmp2.reg, qword[tmp2.reg + offsetof(Closure, l.p)]);
2424
build.mov(tmp2.reg, qword[tmp2.reg + offsetof(Proto, p)]);
2425
build.mov(tmp2.reg, qword[tmp2.reg + sizeof(Proto*) * uintOp(OP_C(inst))]);
2426
2427
IrCallWrapperX64 callWrap(regs, build, index);
2428
callWrap.addArgument(SizeX64::qword, rState);
2429
callWrap.addArgument(SizeX64::dword, uintOp(OP_A(inst)), OP_A(inst));
2430
callWrap.addArgument(SizeX64::qword, regOp(OP_B(inst)), OP_B(inst));
2431
callWrap.addArgument(SizeX64::qword, tmp2);
2432
2433
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaF_newLclosure)]);
2434
2435
inst.regX64 = regs.takeReg(rax, index);
2436
break;
2437
}
2438
case IrCmd::FALLBACK_DUPCLOSURE:
2439
CODEGEN_ASSERT(OP_B(inst).kind == IrOpKind::VmReg);
2440
CODEGEN_ASSERT(OP_C(inst).kind == IrOpKind::VmConst);
2441
2442
emitFallback(regs, build, offsetof(NativeContext, executeDUPCLOSURE), uintOp(OP_A(inst)));
2443
break;
2444
case IrCmd::FALLBACK_FORGPREP:
2445
emitFallback(regs, build, offsetof(NativeContext, executeFORGPREP), uintOp(OP_A(inst)));
2446
jumpOrFallthrough(blockOp(OP_C(inst)), next);
2447
break;
2448
case IrCmd::BITAND_UINT:
2449
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2450
2451
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2452
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2453
2454
build.and_(inst.regX64, memRegUintOp(OP_B(inst)));
2455
break;
2456
case IrCmd::BITXOR_UINT:
2457
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2458
2459
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2460
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2461
2462
build.xor_(inst.regX64, memRegUintOp(OP_B(inst)));
2463
break;
2464
case IrCmd::BITOR_UINT:
2465
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2466
2467
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2468
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2469
2470
build.or_(inst.regX64, memRegUintOp(OP_B(inst)));
2471
break;
2472
case IrCmd::BITNOT_UINT:
2473
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2474
2475
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2476
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2477
2478
build.not_(inst.regX64);
2479
break;
2480
case IrCmd::BITLSHIFT_UINT:
2481
{
2482
ScopedRegX64 shiftTmp{regs};
2483
2484
// Custom bit shift value can only be placed in cl
2485
// but we use it if the shift value is not a constant stored in b
2486
if (OP_B(inst).kind != IrOpKind::Constant)
2487
shiftTmp.take(ecx);
2488
2489
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2490
2491
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2492
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2493
2494
if (OP_B(inst).kind == IrOpKind::Constant)
2495
{
2496
// if shift value is a constant, we extract the byte-sized shift amount
2497
int8_t shift = int8_t(unsigned(intOp(OP_B(inst))));
2498
build.shl(inst.regX64, shift);
2499
}
2500
else
2501
{
2502
build.mov(shiftTmp.reg, memRegUintOp(OP_B(inst)));
2503
build.shl(inst.regX64, byteReg(shiftTmp.reg));
2504
}
2505
2506
break;
2507
}
2508
case IrCmd::BITRSHIFT_UINT:
2509
{
2510
ScopedRegX64 shiftTmp{regs};
2511
2512
// Custom bit shift value can only be placed in cl
2513
// but we use it if the shift value is not a constant stored in b
2514
if (OP_B(inst).kind != IrOpKind::Constant)
2515
shiftTmp.take(ecx);
2516
2517
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2518
2519
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2520
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2521
2522
if (OP_B(inst).kind == IrOpKind::Constant)
2523
{
2524
// if shift value is a constant, we extract the byte-sized shift amount
2525
int8_t shift = int8_t(unsigned(intOp(OP_B(inst))));
2526
build.shr(inst.regX64, shift);
2527
}
2528
else
2529
{
2530
build.mov(shiftTmp.reg, memRegUintOp(OP_B(inst)));
2531
build.shr(inst.regX64, byteReg(shiftTmp.reg));
2532
}
2533
2534
break;
2535
}
2536
case IrCmd::BITARSHIFT_UINT:
2537
{
2538
ScopedRegX64 shiftTmp{regs};
2539
2540
// Custom bit shift value can only be placed in cl
2541
// but we use it if the shift value is not a constant stored in b
2542
if (OP_B(inst).kind != IrOpKind::Constant)
2543
shiftTmp.take(ecx);
2544
2545
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2546
2547
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2548
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2549
2550
if (OP_B(inst).kind == IrOpKind::Constant)
2551
{
2552
// if shift value is a constant, we extract the byte-sized shift amount
2553
int8_t shift = int8_t(unsigned(intOp(OP_B(inst))));
2554
build.sar(inst.regX64, shift);
2555
}
2556
else
2557
{
2558
build.mov(shiftTmp.reg, memRegUintOp(OP_B(inst)));
2559
build.sar(inst.regX64, byteReg(shiftTmp.reg));
2560
}
2561
2562
break;
2563
}
2564
case IrCmd::BITLROTATE_UINT:
2565
{
2566
ScopedRegX64 shiftTmp{regs};
2567
2568
// Custom bit shift value can only be placed in cl
2569
// but we use it if the shift value is not a constant stored in b
2570
if (OP_B(inst).kind != IrOpKind::Constant)
2571
shiftTmp.take(ecx);
2572
2573
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2574
2575
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2576
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2577
2578
if (OP_B(inst).kind == IrOpKind::Constant)
2579
{
2580
// if shift value is a constant, we extract the byte-sized shift amount
2581
int8_t shift = int8_t(unsigned(intOp(OP_B(inst))));
2582
build.rol(inst.regX64, shift);
2583
}
2584
else
2585
{
2586
build.mov(shiftTmp.reg, memRegUintOp(OP_B(inst)));
2587
build.rol(inst.regX64, byteReg(shiftTmp.reg));
2588
}
2589
2590
break;
2591
}
2592
case IrCmd::BITRROTATE_UINT:
2593
{
2594
ScopedRegX64 shiftTmp{regs};
2595
2596
// Custom bit shift value can only be placed in cl
2597
// but we use it if the shift value is not a constant stored in b
2598
if (OP_B(inst).kind != IrOpKind::Constant)
2599
shiftTmp.take(ecx);
2600
2601
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2602
2603
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2604
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2605
2606
if (OP_B(inst).kind == IrOpKind::Constant)
2607
{
2608
// if shift value is a constant, we extract the byte-sized shift amount
2609
int8_t shift = int8_t(unsigned(intOp(OP_B(inst))));
2610
build.ror(inst.regX64, shift);
2611
}
2612
else
2613
{
2614
build.mov(shiftTmp.reg, memRegUintOp(OP_B(inst)));
2615
build.ror(inst.regX64, byteReg(shiftTmp.reg));
2616
}
2617
2618
break;
2619
}
2620
case IrCmd::BITCOUNTLZ_UINT:
2621
{
2622
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2623
2624
Label zero, exit;
2625
2626
build.test(regOp(OP_A(inst)), regOp(OP_A(inst)));
2627
build.jcc(ConditionX64::Equal, zero);
2628
2629
build.bsr(inst.regX64, regOp(OP_A(inst)));
2630
build.xor_(inst.regX64, 0x1f);
2631
build.jmp(exit);
2632
2633
build.setLabel(zero);
2634
build.mov(inst.regX64, 32);
2635
2636
build.setLabel(exit);
2637
break;
2638
}
2639
case IrCmd::BITCOUNTRZ_UINT:
2640
{
2641
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2642
2643
Label zero, exit;
2644
2645
build.test(regOp(OP_A(inst)), regOp(OP_A(inst)));
2646
build.jcc(ConditionX64::Equal, zero);
2647
2648
build.bsf(inst.regX64, regOp(OP_A(inst)));
2649
build.jmp(exit);
2650
2651
build.setLabel(zero);
2652
build.mov(inst.regX64, 32);
2653
2654
build.setLabel(exit);
2655
break;
2656
}
2657
case IrCmd::BYTESWAP_UINT:
2658
{
2659
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst)});
2660
2661
if (OP_A(inst).kind != IrOpKind::Inst || inst.regX64 != regOp(OP_A(inst)))
2662
build.mov(inst.regX64, memRegUintOp(OP_A(inst)));
2663
2664
build.bswap(inst.regX64);
2665
break;
2666
}
2667
case IrCmd::INVOKE_LIBM:
2668
{
2669
IrCallWrapperX64 callWrap(regs, build, index);
2670
callWrap.addArgument(SizeX64::xmmword, memRegDoubleOp(OP_B(inst)), OP_B(inst));
2671
2672
if (HAS_OP_C(inst))
2673
{
2674
bool isInt = (OP_C(inst).kind == IrOpKind::Constant) ? constOp(OP_C(inst)).kind == IrConstKind::Int
2675
: getCmdValueKind(function.instOp(OP_C(inst)).cmd) == IrValueKind::Int;
2676
2677
if (isInt)
2678
callWrap.addArgument(SizeX64::dword, memRegUintOp(OP_C(inst)), OP_C(inst));
2679
else
2680
callWrap.addArgument(SizeX64::xmmword, memRegDoubleOp(OP_C(inst)), OP_C(inst));
2681
}
2682
2683
callWrap.call(qword[rNativeContext + getNativeContextOffset(uintOp(OP_A(inst)))]);
2684
inst.regX64 = regs.takeReg(xmm0, index);
2685
break;
2686
}
2687
case IrCmd::GET_TYPE:
2688
{
2689
inst.regX64 = regs.allocReg(SizeX64::qword, index);
2690
2691
build.mov(inst.regX64, qword[rState + offsetof(lua_State, global)]);
2692
2693
if (OP_A(inst).kind == IrOpKind::Inst)
2694
build.mov(inst.regX64, qword[inst.regX64 + qwordReg(regOp(OP_A(inst))) * sizeof(TString*) + offsetof(global_State, ttname)]);
2695
else if (OP_A(inst).kind == IrOpKind::Constant)
2696
build.mov(inst.regX64, qword[inst.regX64 + tagOp(OP_A(inst)) * sizeof(TString*) + offsetof(global_State, ttname)]);
2697
else
2698
CODEGEN_ASSERT(!"Unsupported instruction form");
2699
break;
2700
}
2701
case IrCmd::GET_TYPEOF:
2702
{
2703
IrCallWrapperX64 callWrap(regs, build);
2704
callWrap.addArgument(SizeX64::qword, rState);
2705
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(OP_A(inst))));
2706
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaT_objtypenamestr)]);
2707
2708
inst.regX64 = regs.takeReg(rax, index);
2709
break;
2710
}
2711
2712
case IrCmd::FINDUPVAL:
2713
{
2714
IrCallWrapperX64 callWrap(regs, build);
2715
callWrap.addArgument(SizeX64::qword, rState);
2716
callWrap.addArgument(SizeX64::qword, luauRegAddress(vmRegOp(OP_A(inst))));
2717
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaF_findupval)]);
2718
2719
inst.regX64 = regs.takeReg(rax, index);
2720
break;
2721
}
2722
2723
case IrCmd::BUFFER_READI8:
2724
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst), OP_B(inst)});
2725
2726
if (FFlag::LuauCodegenBufNoDefTag)
2727
build.movsx(inst.regX64, byte[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_C(inst)))]);
2728
else
2729
build.movsx(inst.regX64, byte[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)))]);
2730
break;
2731
2732
case IrCmd::BUFFER_READU8:
2733
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst), OP_B(inst)});
2734
2735
if (FFlag::LuauCodegenBufNoDefTag)
2736
build.movzx(inst.regX64, byte[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_C(inst)))]);
2737
else
2738
build.movzx(inst.regX64, byte[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)))]);
2739
break;
2740
2741
case IrCmd::BUFFER_WRITEI8:
2742
{
2743
OperandX64 value = OP_C(inst).kind == IrOpKind::Inst ? byteReg(regOp(OP_C(inst))) : OperandX64(int8_t(intOp(OP_C(inst))));
2744
2745
if (FFlag::LuauCodegenBufNoDefTag)
2746
build.mov(byte[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_D(inst)))], value);
2747
else
2748
build.mov(byte[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)))], value);
2749
break;
2750
}
2751
2752
case IrCmd::BUFFER_READI16:
2753
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst), OP_B(inst)});
2754
2755
if (FFlag::LuauCodegenBufNoDefTag)
2756
build.movsx(inst.regX64, word[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_C(inst)))]);
2757
else
2758
build.movsx(inst.regX64, word[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)))]);
2759
break;
2760
2761
case IrCmd::BUFFER_READU16:
2762
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst), OP_B(inst)});
2763
2764
if (FFlag::LuauCodegenBufNoDefTag)
2765
build.movzx(inst.regX64, word[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_C(inst)))]);
2766
else
2767
build.movzx(inst.regX64, word[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)))]);
2768
break;
2769
2770
case IrCmd::BUFFER_WRITEI16:
2771
{
2772
OperandX64 value = OP_C(inst).kind == IrOpKind::Inst ? wordReg(regOp(OP_C(inst))) : OperandX64(int16_t(intOp(OP_C(inst))));
2773
2774
if (FFlag::LuauCodegenBufNoDefTag)
2775
build.mov(word[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_D(inst)))], value);
2776
else
2777
build.mov(word[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)))], value);
2778
break;
2779
}
2780
2781
case IrCmd::BUFFER_READI32:
2782
inst.regX64 = regs.allocRegOrReuse(SizeX64::dword, index, {OP_A(inst), OP_B(inst)});
2783
2784
if (FFlag::LuauCodegenBufNoDefTag)
2785
build.mov(inst.regX64, dword[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_C(inst)))]);
2786
else
2787
build.mov(inst.regX64, dword[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)))]);
2788
break;
2789
2790
case IrCmd::BUFFER_WRITEI32:
2791
{
2792
OperandX64 value = OP_C(inst).kind == IrOpKind::Inst ? regOp(OP_C(inst)) : OperandX64(intOp(OP_C(inst)));
2793
2794
if (FFlag::LuauCodegenBufNoDefTag)
2795
build.mov(dword[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_D(inst)))], value);
2796
else
2797
build.mov(dword[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)))], value);
2798
break;
2799
}
2800
2801
case IrCmd::BUFFER_READF32:
2802
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
2803
2804
if (FFlag::LuauCodegenBufNoDefTag)
2805
build.vmovss(inst.regX64, dword[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_C(inst)))]);
2806
else
2807
build.vmovss(inst.regX64, dword[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)))]);
2808
break;
2809
2810
case IrCmd::BUFFER_WRITEF32:
2811
if (FFlag::LuauCodegenBufNoDefTag)
2812
storeFloat(dword[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_D(inst)))], OP_C(inst));
2813
else
2814
storeFloat(dword[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)))], OP_C(inst));
2815
break;
2816
2817
case IrCmd::BUFFER_READF64:
2818
inst.regX64 = regs.allocReg(SizeX64::xmmword, index);
2819
2820
if (FFlag::LuauCodegenBufNoDefTag)
2821
build.vmovsd(inst.regX64, qword[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_C(inst)))]);
2822
else
2823
build.vmovsd(inst.regX64, qword[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_C(inst) ? LUA_TBUFFER : tagOp(OP_C(inst)))]);
2824
break;
2825
2826
case IrCmd::BUFFER_WRITEF64:
2827
if (OP_C(inst).kind == IrOpKind::Constant)
2828
{
2829
ScopedRegX64 tmp{regs, SizeX64::xmmword};
2830
build.vmovsd(tmp.reg, build.f64(doubleOp(OP_C(inst))));
2831
2832
if (FFlag::LuauCodegenBufNoDefTag)
2833
build.vmovsd(qword[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_D(inst)))], tmp.reg);
2834
else
2835
build.vmovsd(qword[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)))], tmp.reg);
2836
}
2837
else if (OP_C(inst).kind == IrOpKind::Inst)
2838
{
2839
if (FFlag::LuauCodegenBufNoDefTag)
2840
build.vmovsd(qword[bufferAddrOp(OP_A(inst), OP_B(inst), tagOp(OP_D(inst)))], regOp(OP_C(inst)));
2841
else
2842
build.vmovsd(qword[bufferAddrOp(OP_A(inst), OP_B(inst), !HAS_OP_D(inst) ? LUA_TBUFFER : tagOp(OP_D(inst)))], regOp(OP_C(inst)));
2843
}
2844
else
2845
{
2846
CODEGEN_ASSERT(!"Unsupported instruction form");
2847
}
2848
break;
2849
2850
// Pseudo instructions
2851
case IrCmd::NOP:
2852
case IrCmd::SUBSTITUTE:
2853
case IrCmd::MARK_USED:
2854
case IrCmd::MARK_DEAD:
2855
CODEGEN_ASSERT(!"Pseudo instructions should not be lowered");
2856
break;
2857
}
2858
2859
valueTracker.afterInstLowering(inst, index);
2860
2861
regs.currInstIdx = kInvalidInstIdx;
2862
2863
regs.freeLastUseRegs(inst, index);
2864
}
2865
2866
void IrLoweringX64::startBlock(const IrBlock& curr)
2867
{
2868
if (curr.startpc != kBlockNoStartPc)
2869
allocAndIncrementCounterAt(
2870
curr.kind == IrBlockKind::Fallback ? CodeGenCounter::FallbackBlockExecuted : CodeGenCounter::RegularBlockExecuted, curr.startpc
2871
);
2872
}
2873
2874
void IrLoweringX64::finishBlock(const IrBlock& curr, const IrBlock& next)
2875
{
2876
if (!regs.spills.empty())
2877
{
2878
// If we have spills remaining, we have to immediately lower the successor block
2879
for (uint32_t predIdx : predecessors(function.cfg, function.getBlockIndex(next)))
2880
CODEGEN_ASSERT(predIdx == function.getBlockIndex(curr) || function.blocks[predIdx].kind == IrBlockKind::Dead);
2881
2882
// And the next block cannot be a join block in cfg
2883
CODEGEN_ASSERT(next.useCount == 1);
2884
}
2885
}
2886
2887
void IrLoweringX64::finishFunction()
2888
{
2889
if (build.logText)
2890
build.logAppend("; interrupt handlers\n");
2891
2892
for (InterruptHandler& handler : interruptHandlers)
2893
{
2894
build.setLabel(handler.self);
2895
build.mov(eax, handler.pcpos + 1);
2896
build.lea(rbx, handler.next);
2897
build.jmp(helpers.interrupt);
2898
}
2899
2900
if (build.logText)
2901
build.logAppend("; exit handlers\n");
2902
2903
for (ExitHandler& handler : exitHandlers)
2904
{
2905
if (handler.pcpos == kVmExitEntryGuardPc)
2906
{
2907
build.setLabel(handler.self);
2908
2909
allocAndIncrementCounterAt(CodeGenCounter::VmExitTaken, ~0u);
2910
2911
build.jmp(helpers.exitContinueVmClearNativeFlag);
2912
}
2913
else
2914
{
2915
build.setLabel(handler.self);
2916
2917
allocAndIncrementCounterAt(CodeGenCounter::VmExitTaken, handler.pcpos);
2918
2919
build.mov(edx, handler.pcpos * sizeof(Instruction));
2920
build.jmp(helpers.updatePcAndContinueInVm);
2921
}
2922
}
2923
2924
// An undefined instruction is placed after the function to be used as an aborting jump offset
2925
function.endLocation = build.getLabelOffset(build.setLabel());
2926
build.ud2();
2927
2928
if (stats)
2929
{
2930
if (regs.maxUsedSlot > kSpillSlots_NEW + kExtraSpillSlots)
2931
stats->regAllocErrors++;
2932
2933
if (regs.maxUsedSlot > stats->maxSpillSlotsUsed)
2934
stats->maxSpillSlotsUsed = regs.maxUsedSlot;
2935
}
2936
}
2937
2938
bool IrLoweringX64::hasError() const
2939
{
2940
// If register allocator had to use more stack slots than we have available, this function can't run natively
2941
if (regs.maxUsedSlot > kSpillSlots_NEW + kExtraSpillSlots)
2942
return true;
2943
2944
return false;
2945
}
2946
2947
bool IrLoweringX64::isFallthroughBlock(const IrBlock& target, const IrBlock& next)
2948
{
2949
return target.start == next.start;
2950
}
2951
2952
Label& IrLoweringX64::getTargetLabel(IrOp op, Label& fresh)
2953
{
2954
if (op.kind == IrOpKind::Undef)
2955
return fresh;
2956
2957
if (op.kind == IrOpKind::VmExit)
2958
{
2959
if (uint32_t* index = exitHandlerMap.find(vmExitOp(op)))
2960
return exitHandlers[*index].self;
2961
2962
return fresh;
2963
}
2964
2965
return labelOp(op);
2966
}
2967
2968
void IrLoweringX64::finalizeTargetLabel(IrOp op, Label& fresh)
2969
{
2970
if (op.kind == IrOpKind::VmExit && fresh.id != 0)
2971
{
2972
exitHandlerMap[vmExitOp(op)] = uint32_t(exitHandlers.size());
2973
exitHandlers.push_back({fresh, vmExitOp(op)});
2974
}
2975
}
2976
2977
void IrLoweringX64::jumpOrFallthrough(IrBlock& target, const IrBlock& next)
2978
{
2979
if (!isFallthroughBlock(target, next))
2980
build.jmp(target.label);
2981
}
2982
2983
void IrLoweringX64::jumpOrAbortOnUndef(ConditionX64 cond, IrOp target, const IrBlock& next)
2984
{
2985
Label fresh;
2986
Label& label = getTargetLabel(target, fresh);
2987
2988
if (target.kind == IrOpKind::Undef)
2989
{
2990
if (cond == ConditionX64::Count)
2991
{
2992
build.ud2(); // Unconditional jump to abort is just an abort
2993
}
2994
else
2995
{
2996
build.jcc(getNegatedCondition(cond), label);
2997
build.ud2();
2998
build.setLabel(label);
2999
}
3000
}
3001
else if (cond == ConditionX64::Count)
3002
{
3003
// Unconditional jump can be skipped if it's a fallthrough
3004
if (target.kind == IrOpKind::VmExit || !isFallthroughBlock(blockOp(target), next))
3005
build.jmp(label);
3006
}
3007
else
3008
{
3009
build.jcc(cond, label);
3010
}
3011
3012
finalizeTargetLabel(target, fresh);
3013
}
3014
3015
void IrLoweringX64::jumpOrAbortOnUndef(IrOp target, const IrBlock& next)
3016
{
3017
jumpOrAbortOnUndef(ConditionX64::Count, target, next);
3018
}
3019
3020
void IrLoweringX64::storeFloat(OperandX64 dst, IrOp src)
3021
{
3022
if (src.kind == IrOpKind::Constant)
3023
{
3024
ScopedRegX64 tmp{regs, SizeX64::xmmword};
3025
build.vmovss(tmp.reg, build.f32(float(doubleOp(src))));
3026
build.vmovss(dst, tmp.reg);
3027
}
3028
else if (src.kind == IrOpKind::Inst)
3029
{
3030
CODEGEN_ASSERT(getCmdValueKind(function.instOp(src).cmd) == IrValueKind::Float);
3031
build.vmovss(dst, regOp(src));
3032
}
3033
else
3034
{
3035
CODEGEN_ASSERT(!"Unsupported instruction form");
3036
}
3037
}
3038
3039
void IrLoweringX64::storeDoubleAsFloat(OperandX64 dst, IrOp src)
3040
{
3041
ScopedRegX64 tmp{regs, SizeX64::xmmword};
3042
3043
if (src.kind == IrOpKind::Constant)
3044
{
3045
build.vmovss(tmp.reg, build.f32(float(doubleOp(src))));
3046
}
3047
else if (src.kind == IrOpKind::Inst)
3048
{
3049
build.vcvtsd2ss(tmp.reg, regOp(src), regOp(src));
3050
}
3051
else
3052
{
3053
CODEGEN_ASSERT(!"Unsupported instruction form");
3054
}
3055
build.vmovss(dst, tmp.reg);
3056
}
3057
3058
void IrLoweringX64::checkSafeEnv(IrOp target, const IrBlock& next)
3059
{
3060
ScopedRegX64 tmp{regs, SizeX64::qword};
3061
3062
build.mov(tmp.reg, sClosure);
3063
build.mov(tmp.reg, qword[tmp.reg + offsetof(Closure, env)]);
3064
build.cmp(byte[tmp.reg + offsetof(LuaTable, safeenv)], 0);
3065
3066
jumpOrAbortOnUndef(ConditionX64::Equal, target, next);
3067
}
3068
3069
void IrLoweringX64::allocAndIncrementCounterAt(CodeGenCounter kind, uint32_t pcpos)
3070
{
3071
if (!function.recordCounters)
3072
return;
3073
3074
if (build.logText)
3075
build.logAppend("; counter kind %u at pcpos %d\n", unsigned(kind), pcpos);
3076
3077
// {uint32_t, uint32_t, uint64_t}
3078
function.extraNativeData.push_back(unsigned(kind));
3079
function.extraNativeData.push_back(pcpos);
3080
incrementCounterAt(function.extraNativeData.size());
3081
function.extraNativeData.push_back(0);
3082
function.extraNativeData.push_back(0);
3083
}
3084
3085
void IrLoweringX64::incrementCounterAt(size_t offset)
3086
{
3087
ScopedRegX64 tmp{regs, SizeX64::qword};
3088
3089
// Get counter slot
3090
build.mov(tmp.reg, sClosure);
3091
build.mov(tmp.reg, qword[tmp.reg + offsetof(Closure, l.p)]);
3092
build.mov(tmp.reg, qword[tmp.reg + offsetof(Proto, execdata)]);
3093
3094
// Increment
3095
build.inc(qword[tmp.reg + uint32_t(function.proto->sizecode + uint32_t(offset)) * 4]);
3096
}
3097
3098
OperandX64 IrLoweringX64::memRegDoubleOp(IrOp op)
3099
{
3100
switch (op.kind)
3101
{
3102
case IrOpKind::Inst:
3103
return regOp(op);
3104
case IrOpKind::Constant:
3105
return build.f64(doubleOp(op));
3106
case IrOpKind::VmReg:
3107
return luauRegValue(vmRegOp(op));
3108
case IrOpKind::VmConst:
3109
return luauConstantValue(vmConstOp(op));
3110
default:
3111
CODEGEN_ASSERT(!"Unsupported operand kind");
3112
}
3113
3114
return noreg;
3115
}
3116
3117
OperandX64 IrLoweringX64::memRegFloatOp(IrOp op)
3118
{
3119
switch (op.kind)
3120
{
3121
case IrOpKind::Inst:
3122
CODEGEN_ASSERT(getCmdValueKind(function.instructions[op.index].cmd) == IrValueKind::Float);
3123
return regOp(op);
3124
case IrOpKind::Constant:
3125
return build.f32(float(doubleOp(op)));
3126
default:
3127
CODEGEN_ASSERT(!"Unsupported operand kind");
3128
}
3129
3130
return noreg;
3131
}
3132
3133
OperandX64 IrLoweringX64::memRegUintOp(IrOp op)
3134
{
3135
switch (op.kind)
3136
{
3137
case IrOpKind::Inst:
3138
return regOp(op);
3139
case IrOpKind::Constant:
3140
return OperandX64(unsigned(intOp(op)));
3141
case IrOpKind::VmReg:
3142
return luauRegValueInt(vmRegOp(op));
3143
default:
3144
CODEGEN_ASSERT(!"Unsupported operand kind");
3145
}
3146
3147
return noreg;
3148
}
3149
3150
OperandX64 IrLoweringX64::memRegIntOp(IrOp op)
3151
{
3152
switch (op.kind)
3153
{
3154
case IrOpKind::Inst:
3155
return regOp(op);
3156
case IrOpKind::Constant:
3157
return OperandX64(intOp(op));
3158
case IrOpKind::VmReg:
3159
return luauRegValueInt(vmRegOp(op));
3160
default:
3161
CODEGEN_ASSERT(!"Unsupported operand kind");
3162
}
3163
3164
return noreg;
3165
}
3166
3167
OperandX64 IrLoweringX64::memRegTagOp(IrOp op)
3168
{
3169
switch (op.kind)
3170
{
3171
case IrOpKind::Inst:
3172
return regOp(op);
3173
case IrOpKind::VmReg:
3174
return luauRegTag(vmRegOp(op));
3175
case IrOpKind::VmConst:
3176
return luauConstantTag(vmConstOp(op));
3177
default:
3178
CODEGEN_ASSERT(!"Unsupported operand kind");
3179
}
3180
3181
return noreg;
3182
}
3183
3184
RegisterX64 IrLoweringX64::regOp(IrOp op)
3185
{
3186
IrInst& inst = function.instOp(op);
3187
3188
if (inst.spilled || inst.needsReload)
3189
regs.restore(inst, false);
3190
3191
CODEGEN_ASSERT(inst.regX64 != noreg);
3192
return inst.regX64;
3193
}
3194
3195
OperandX64 IrLoweringX64::bufferAddrOp(IrOp bufferOp, IrOp indexOp, uint8_t tag)
3196
{
3197
CODEGEN_ASSERT(tag == LUA_TUSERDATA || tag == LUA_TBUFFER);
3198
int dataOffset = tag == LUA_TBUFFER ? offsetof(Buffer, data) : offsetof(Udata, data);
3199
3200
if (indexOp.kind == IrOpKind::Inst)
3201
{
3202
CODEGEN_ASSERT(!producesDirtyHighRegisterBits(function.instOp(indexOp).cmd)); // Ensure that high register bits are cleared
3203
3204
return regOp(bufferOp) + qwordReg(regOp(indexOp)) + dataOffset;
3205
}
3206
else if (indexOp.kind == IrOpKind::Constant)
3207
{
3208
return regOp(bufferOp) + intOp(indexOp) + dataOffset;
3209
}
3210
3211
CODEGEN_ASSERT(!"Unsupported instruction form");
3212
return noreg;
3213
}
3214
3215
RegisterX64 IrLoweringX64::vecOp(IrOp op, ScopedRegX64& tmp)
3216
{
3217
IrInst source = function.instOp(op);
3218
CODEGEN_ASSERT(source.cmd != IrCmd::SUBSTITUTE); // we don't process substitutions
3219
3220
// source that comes from memory or from tag instruction has .w = TVECTOR, which is denormal
3221
// to avoid performance degradation on some CPUs we mask this component to produce zero
3222
// otherwise we conservatively assume the vector is a result of a well formed math op so .w is a normal number or zero
3223
if (source.cmd != IrCmd::LOAD_TVALUE && source.cmd != IrCmd::GET_UPVALUE && source.cmd != IrCmd::TAG_VECTOR)
3224
return regOp(op);
3225
3226
tmp.alloc(SizeX64::xmmword);
3227
build.vandps(tmp.reg, regOp(op), vectorAndMaskOp());
3228
return tmp.reg;
3229
}
3230
3231
IrConst IrLoweringX64::constOp(IrOp op) const
3232
{
3233
return function.constOp(op);
3234
}
3235
3236
uint8_t IrLoweringX64::tagOp(IrOp op) const
3237
{
3238
return function.tagOp(op);
3239
}
3240
3241
int IrLoweringX64::intOp(IrOp op) const
3242
{
3243
return function.intOp(op);
3244
}
3245
3246
unsigned IrLoweringX64::uintOp(IrOp op) const
3247
{
3248
return function.uintOp(op);
3249
}
3250
3251
unsigned IrLoweringX64::importOp(IrOp op) const
3252
{
3253
return function.importOp(op);
3254
}
3255
3256
double IrLoweringX64::doubleOp(IrOp op) const
3257
{
3258
return function.doubleOp(op);
3259
}
3260
3261
IrBlock& IrLoweringX64::blockOp(IrOp op) const
3262
{
3263
return function.blockOp(op);
3264
}
3265
3266
Label& IrLoweringX64::labelOp(IrOp op) const
3267
{
3268
return blockOp(op).label;
3269
}
3270
3271
OperandX64 IrLoweringX64::vectorAndMaskOp()
3272
{
3273
if (vectorAndMask.base == noreg)
3274
vectorAndMask = build.u32x4(~0u, ~0u, ~0u, 0);
3275
3276
return vectorAndMask;
3277
}
3278
3279
} // namespace X64
3280
} // namespace CodeGen
3281
} // namespace Luau
3282
3283