Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/CodeGen/src/EmitCommonX64.cpp
2725 views
1
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
2
#include "EmitCommonX64.h"
3
4
#include "Luau/AssemblyBuilderX64.h"
5
#include "Luau/IrCallWrapperX64.h"
6
#include "Luau/IrData.h"
7
#include "Luau/IrRegAllocX64.h"
8
#include "Luau/IrUtils.h"
9
10
#include "NativeState.h"
11
12
#include "lgc.h"
13
#include "lstate.h"
14
15
#include <utility>
16
17
LUAU_DYNAMIC_FASTFLAGVARIABLE(AddReturnExectargetCheck, false)
18
19
namespace Luau
20
{
21
namespace CodeGen
22
{
23
namespace X64
24
{
25
26
void jumpOnNumberCmp(AssemblyBuilderX64& build, RegisterX64 tmp, OperandX64 lhs, OperandX64 rhs, IrCondition cond, Label& label, bool floatPrecision)
27
{
28
// Refresher on comi/ucomi EFLAGS:
29
// all zero: greater
30
// CF only: less
31
// ZF only: equal
32
// PF+CF+ZF: unordered (NaN)
33
34
// To avoid the lack of conditional jumps that check for "greater" conditions in IEEE 754 compliant way, we use "less" forms to emulate these
35
if (cond == IrCondition::Greater || cond == IrCondition::GreaterEqual || cond == IrCondition::NotGreater || cond == IrCondition::NotGreaterEqual)
36
std::swap(lhs, rhs);
37
38
if (floatPrecision)
39
{
40
if (rhs.cat == CategoryX64::reg)
41
{
42
build.vucomiss(rhs, lhs);
43
}
44
else
45
{
46
build.vmovss(tmp, rhs);
47
build.vucomiss(tmp, lhs);
48
}
49
}
50
else
51
{
52
if (rhs.cat == CategoryX64::reg)
53
{
54
build.vucomisd(rhs, lhs);
55
}
56
else
57
{
58
build.vmovsd(tmp, rhs);
59
build.vucomisd(tmp, lhs);
60
}
61
}
62
63
// Keep in mind that 'Not' conditions want 'true' for comparisons with NaN
64
// And because of NaN, integer check interchangeability like 'not less or equal' <-> 'greater' does not hold
65
switch (cond)
66
{
67
case IrCondition::NotLessEqual:
68
case IrCondition::NotGreaterEqual:
69
// (b < a) is the same as !(a <= b). jnae checks CF=1 which means < or NaN
70
build.jcc(ConditionX64::NotAboveEqual, label);
71
break;
72
case IrCondition::LessEqual:
73
case IrCondition::GreaterEqual:
74
// (b >= a) is the same as (a <= b). jae checks CF=0 which means >= and not NaN
75
build.jcc(ConditionX64::AboveEqual, label);
76
break;
77
case IrCondition::NotLess:
78
case IrCondition::NotGreater:
79
// (b <= a) is the same as !(a < b). jna checks CF=1 or ZF=1 which means <= or NaN
80
build.jcc(ConditionX64::NotAbove, label);
81
break;
82
case IrCondition::Less:
83
case IrCondition::Greater:
84
// (b > a) is the same as (a < b). ja checks CF=0 and ZF=0 which means > and not NaN
85
build.jcc(ConditionX64::Above, label);
86
break;
87
case IrCondition::NotEqual:
88
// ZF=0 or PF=1 means != or NaN
89
build.jcc(ConditionX64::NotZero, label);
90
build.jcc(ConditionX64::Parity, label);
91
break;
92
default:
93
CODEGEN_ASSERT(!"Unsupported condition");
94
}
95
}
96
97
ConditionX64 getConditionInt(IrCondition cond)
98
{
99
switch (cond)
100
{
101
case IrCondition::Equal:
102
return ConditionX64::Equal;
103
case IrCondition::NotEqual:
104
return ConditionX64::NotEqual;
105
case IrCondition::Less:
106
return ConditionX64::Less;
107
case IrCondition::NotLess:
108
return ConditionX64::NotLess;
109
case IrCondition::LessEqual:
110
return ConditionX64::LessEqual;
111
case IrCondition::NotLessEqual:
112
return ConditionX64::NotLessEqual;
113
case IrCondition::Greater:
114
return ConditionX64::Greater;
115
case IrCondition::NotGreater:
116
return ConditionX64::NotGreater;
117
case IrCondition::GreaterEqual:
118
return ConditionX64::GreaterEqual;
119
case IrCondition::NotGreaterEqual:
120
return ConditionX64::NotGreaterEqual;
121
case IrCondition::UnsignedLess:
122
return ConditionX64::Below;
123
case IrCondition::UnsignedLessEqual:
124
return ConditionX64::BelowEqual;
125
case IrCondition::UnsignedGreater:
126
return ConditionX64::Above;
127
case IrCondition::UnsignedGreaterEqual:
128
return ConditionX64::AboveEqual;
129
default:
130
CODEGEN_ASSERT(!"Unsupported condition");
131
return ConditionX64::Zero;
132
}
133
}
134
135
void getTableNodeAtCachedSlot(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 node, RegisterX64 table, int pcpos)
136
{
137
CODEGEN_ASSERT(tmp != node);
138
CODEGEN_ASSERT(table != node);
139
140
build.mov(node, qword[table + offsetof(LuaTable, node)]);
141
142
// compute cached slot
143
build.mov(tmp, sCode);
144
build.movzx(dwordReg(tmp), byte[tmp + pcpos * sizeof(Instruction) + kOffsetOfInstructionC]);
145
build.and_(byteReg(tmp), byte[table + offsetof(LuaTable, nodemask8)]);
146
147
// LuaNode* n = &h->node[slot];
148
build.shl(dwordReg(tmp), kLuaNodeSizeLog2);
149
build.add(node, tmp);
150
}
151
152
void convertNumberToIndexOrJump(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 numd, RegisterX64 numi, Label& label)
153
{
154
CODEGEN_ASSERT(numi.size == SizeX64::dword);
155
156
// Convert to integer, NaN is converted into 0x80000000
157
build.vcvttsd2si(numi, numd);
158
159
// Convert that integer back to double
160
build.vcvtsi2sd(tmp, numd, numi);
161
162
build.vucomisd(tmp, numd); // Sets ZF=1 if equal or NaN
163
// We don't need non-integer values
164
// But to skip the PF=1 check, we proceed with NaN because 0x80000000 index is out of bounds
165
build.jcc(ConditionX64::NotZero, label);
166
}
167
168
void callArithHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, OperandX64 b, OperandX64 c, TMS tm)
169
{
170
IrCallWrapperX64 callWrap(regs, build);
171
callWrap.addArgument(SizeX64::qword, rState);
172
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
173
callWrap.addArgument(SizeX64::qword, b);
174
callWrap.addArgument(SizeX64::qword, c);
175
176
switch (tm)
177
{
178
case TM_ADD:
179
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarithadd)]);
180
break;
181
case TM_SUB:
182
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarithsub)]);
183
break;
184
case TM_MUL:
185
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarithmul)]);
186
break;
187
case TM_DIV:
188
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarithdiv)]);
189
break;
190
case TM_IDIV:
191
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarithidiv)]);
192
break;
193
case TM_MOD:
194
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarithmod)]);
195
break;
196
case TM_POW:
197
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarithpow)]);
198
break;
199
case TM_UNM:
200
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_doarithunm)]);
201
break;
202
default:
203
CODEGEN_ASSERT(!"Invalid doarith helper operation tag");
204
break;
205
}
206
207
emitUpdateBase(build);
208
}
209
210
void callLengthHelper(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb)
211
{
212
IrCallWrapperX64 callWrap(regs, build);
213
callWrap.addArgument(SizeX64::qword, rState);
214
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
215
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
216
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_dolen)]);
217
218
emitUpdateBase(build);
219
}
220
221
void callGetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
222
{
223
IrCallWrapperX64 callWrap(regs, build);
224
callWrap.addArgument(SizeX64::qword, rState);
225
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
226
callWrap.addArgument(SizeX64::qword, c);
227
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
228
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_gettable)]);
229
230
emitUpdateBase(build);
231
}
232
233
void callSetTable(IrRegAllocX64& regs, AssemblyBuilderX64& build, int rb, OperandX64 c, int ra)
234
{
235
IrCallWrapperX64 callWrap(regs, build);
236
callWrap.addArgument(SizeX64::qword, rState);
237
callWrap.addArgument(SizeX64::qword, luauRegAddress(rb));
238
callWrap.addArgument(SizeX64::qword, c);
239
callWrap.addArgument(SizeX64::qword, luauRegAddress(ra));
240
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaV_settable)]);
241
242
emitUpdateBase(build);
243
}
244
245
void checkObjectBarrierConditions(AssemblyBuilderX64& build, RegisterX64 tmp, RegisterX64 object, RegisterX64 ra, IrOp raOp, int ratag, Label& skip)
246
{
247
// Barrier should've been optimized away if we know that it's not collectable, checking for correctness
248
if (ratag == -1 || !isGCO(ratag))
249
{
250
// iscollectable(ra)
251
if (raOp.kind == IrOpKind::Inst)
252
{
253
build.vpextrd(dwordReg(tmp), ra, 3);
254
build.cmp(dwordReg(tmp), LUA_TSTRING);
255
}
256
else
257
{
258
OperandX64 tag = (raOp.kind == IrOpKind::VmReg) ? luauRegTag(vmRegOp(raOp)) : luauConstantTag(vmConstOp(raOp));
259
build.cmp(tag, LUA_TSTRING);
260
}
261
262
build.jcc(ConditionX64::Less, skip);
263
}
264
265
// isblack(obj2gco(o))
266
build.test(byte[object + offsetof(GCheader, marked)], bitmask(BLACKBIT));
267
build.jcc(ConditionX64::Zero, skip);
268
269
// iswhite(gcvalue(ra))
270
if (raOp.kind == IrOpKind::Inst)
271
{
272
build.vmovq(tmp, ra);
273
}
274
else
275
{
276
OperandX64 value = (raOp.kind == IrOpKind::VmReg) ? luauRegValue(vmRegOp(raOp)) : luauConstantValue(vmConstOp(raOp));
277
build.mov(tmp, value);
278
}
279
build.test(byte[tmp + offsetof(GCheader, marked)], bit2mask(WHITE0BIT, WHITE1BIT));
280
build.jcc(ConditionX64::Zero, skip);
281
}
282
283
void callBarrierObject(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 object, IrOp objectOp, RegisterX64 ra, IrOp raOp, int ratag)
284
{
285
Label skip;
286
287
ScopedRegX64 tmp{regs, SizeX64::qword};
288
checkObjectBarrierConditions(build, tmp.reg, object, ra, raOp, ratag, skip);
289
290
{
291
ScopedSpills spillGuard(regs);
292
293
IrCallWrapperX64 callWrap(regs, build);
294
callWrap.addArgument(SizeX64::qword, rState);
295
callWrap.addArgument(SizeX64::qword, object, objectOp);
296
callWrap.addArgument(SizeX64::qword, tmp);
297
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierf)]);
298
}
299
300
build.setLabel(skip);
301
}
302
303
void callBarrierTableFast(IrRegAllocX64& regs, AssemblyBuilderX64& build, RegisterX64 table, IrOp tableOp)
304
{
305
Label skip;
306
307
// isblack(obj2gco(t))
308
build.test(byte[table + offsetof(GCheader, marked)], bitmask(BLACKBIT));
309
build.jcc(ConditionX64::Zero, skip);
310
311
{
312
ScopedSpills spillGuard(regs);
313
314
IrCallWrapperX64 callWrap(regs, build);
315
callWrap.addArgument(SizeX64::qword, rState);
316
callWrap.addArgument(SizeX64::qword, table, tableOp);
317
callWrap.addArgument(SizeX64::qword, addr[table + offsetof(LuaTable, gclist)]);
318
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_barrierback)]);
319
}
320
321
build.setLabel(skip);
322
}
323
324
void callStepGc(IrRegAllocX64& regs, AssemblyBuilderX64& build)
325
{
326
Label skip;
327
328
{
329
ScopedRegX64 tmp1{regs, SizeX64::qword};
330
ScopedRegX64 tmp2{regs, SizeX64::qword};
331
332
build.mov(tmp1.reg, qword[rState + offsetof(lua_State, global)]);
333
build.mov(tmp2.reg, qword[tmp1.reg + offsetof(global_State, totalbytes)]);
334
build.cmp(tmp2.reg, qword[tmp1.reg + offsetof(global_State, GCthreshold)]);
335
build.jcc(ConditionX64::Below, skip);
336
}
337
338
{
339
ScopedSpills spillGuard(regs);
340
341
IrCallWrapperX64 callWrap(regs, build);
342
callWrap.addArgument(SizeX64::qword, rState);
343
callWrap.addArgument(SizeX64::dword, 1);
344
callWrap.call(qword[rNativeContext + offsetof(NativeContext, luaC_step)]);
345
emitUpdateBase(build);
346
}
347
348
build.setLabel(skip);
349
}
350
351
void emitClearNativeFlag(AssemblyBuilderX64& build)
352
{
353
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
354
build.and_(dword[rax + offsetof(CallInfo, flags)], ~LUA_CALLINFO_NATIVE);
355
}
356
357
void emitExit(AssemblyBuilderX64& build, bool continueInVm)
358
{
359
if (continueInVm)
360
build.mov(eax, 1);
361
else
362
build.xor_(eax, eax);
363
364
build.jmp(qword[rNativeContext + offsetof(NativeContext, gateExit)]);
365
}
366
367
void emitUpdateBase(AssemblyBuilderX64& build)
368
{
369
build.mov(rBase, qword[rState + offsetof(lua_State, base)]);
370
}
371
372
void emitInterrupt(AssemblyBuilderX64& build)
373
{
374
// rax = pcpos + 1
375
// rbx = return address in native code
376
377
// note: rbx is non-volatile so it will be saved across interrupt call automatically
378
379
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
380
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
381
382
Label skip;
383
384
// Update L->ci->savedpc; required in case interrupt errors
385
build.mov(rcx, sCode);
386
build.lea(rcx, addr[rcx + rax * sizeof(Instruction)]);
387
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
388
build.mov(qword[rax + offsetof(CallInfo, savedpc)], rcx);
389
390
// Load interrupt handler; it may be nullptr in case the update raced with the check before we got here
391
build.mov(rax, qword[rState + offsetof(lua_State, global)]);
392
build.mov(rax, qword[rax + offsetof(global_State, cb.interrupt)]);
393
build.test(rax, rax);
394
build.jcc(ConditionX64::Zero, skip);
395
396
// Call interrupt
397
build.mov(rArg1, rState);
398
build.mov(dwordReg(rArg2), -1);
399
build.call(rax);
400
401
// Check if we need to exit
402
build.mov(al, byte[rState + offsetof(lua_State, status)]);
403
build.test(al, al);
404
build.jcc(ConditionX64::Zero, skip);
405
406
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
407
build.sub(qword[rax + offsetof(CallInfo, savedpc)], sizeof(Instruction));
408
emitExit(build, /* continueInVm */ false);
409
410
build.setLabel(skip);
411
412
emitUpdateBase(build); // interrupt may have reallocated stack
413
414
build.jmp(rbx);
415
}
416
417
void emitFallback(IrRegAllocX64& regs, AssemblyBuilderX64& build, int offset, int pcpos)
418
{
419
// fallback(L, instruction, base, k)
420
IrCallWrapperX64 callWrap(regs, build);
421
callWrap.addArgument(SizeX64::qword, rState);
422
423
RegisterX64 reg = callWrap.suggestNextArgumentRegister(SizeX64::qword);
424
build.mov(reg, sCode);
425
callWrap.addArgument(SizeX64::qword, addr[reg + pcpos * sizeof(Instruction)]);
426
427
callWrap.addArgument(SizeX64::qword, rBase);
428
callWrap.addArgument(SizeX64::qword, rConstants);
429
callWrap.call(qword[rNativeContext + offset]);
430
431
emitUpdateBase(build);
432
}
433
434
void emitUpdatePcForExit(AssemblyBuilderX64& build)
435
{
436
// edx = pcpos * sizeof(Instruction)
437
build.add(rdx, sCode);
438
build.mov(rax, qword[rState + offsetof(lua_State, ci)]);
439
build.mov(qword[rax + offsetof(CallInfo, savedpc)], rdx);
440
}
441
442
void emitReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers)
443
{
444
// input: res in rdi, number of written values in ecx
445
RegisterX64 res = rdi;
446
RegisterX64 written = ecx;
447
448
RegisterX64 ci = r8;
449
RegisterX64 cip = r9;
450
RegisterX64 nresults = esi;
451
452
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
453
build.lea(cip, addr[ci - sizeof(CallInfo)]);
454
455
// nresults = ci->nresults
456
build.mov(nresults, dword[ci + offsetof(CallInfo, nresults)]);
457
458
Label skipResultCopy;
459
460
// Fill the rest of the expected results (nresults - written) with 'nil'
461
RegisterX64 counter = written;
462
build.sub(counter, nresults); // counter = -(nresults - written)
463
build.jcc(ConditionX64::GreaterEqual, skipResultCopy);
464
465
Label repeatNilLoop = build.setLabel();
466
build.mov(dword[res + offsetof(TValue, tt)], LUA_TNIL);
467
build.add(res, sizeof(TValue));
468
build.inc(counter);
469
build.jcc(ConditionX64::NotZero, repeatNilLoop);
470
471
build.setLabel(skipResultCopy);
472
473
build.mov(qword[rState + offsetof(lua_State, ci)], cip); // L->ci = cip
474
build.mov(rBase, qword[cip + offsetof(CallInfo, base)]); // sync base = L->base while we have a chance
475
build.mov(qword[rState + offsetof(lua_State, base)], rBase); // L->base = cip->base
476
477
Label skipFixedRetTop;
478
build.test(nresults, nresults); // test here will set SF=1 for a negative number and it always sets OF to 0
479
build.jcc(ConditionX64::Less, skipFixedRetTop); // jl jumps if SF != OF
480
build.mov(res, qword[cip + offsetof(CallInfo, top)]); // res = cip->top
481
build.setLabel(skipFixedRetTop);
482
483
build.mov(qword[rState + offsetof(lua_State, top)], res); // L->top = res
484
485
// Unlikely, but this might be the last return from VM
486
build.test(byte[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_RETURN);
487
build.jcc(ConditionX64::NotZero, helpers.exitNoContinueVm);
488
489
// Returning back to the previous function is a bit tricky
490
// Registers alive: r9 (cip)
491
RegisterX64 proto = rcx;
492
RegisterX64 execdata = rbx;
493
RegisterX64 exectarget = r10;
494
495
// Change closure
496
build.mov(rax, qword[cip + offsetof(CallInfo, func)]);
497
build.mov(rax, qword[rax + offsetof(TValue, value.gc)]);
498
build.mov(sClosure, rax);
499
500
build.mov(proto, qword[rax + offsetof(Closure, l.p)]);
501
502
build.mov(execdata, qword[proto + offsetof(Proto, execdata)]);
503
504
build.test(byte[cip + offsetof(CallInfo, flags)], LUA_CALLINFO_NATIVE);
505
build.jcc(ConditionX64::Zero, helpers.exitContinueVm); // Continue in interpreter if function has no native data
506
507
if (DFFlag::AddReturnExectargetCheck)
508
{
509
build.mov(exectarget, qword[proto + offsetof(Proto, exectarget)]);
510
build.test(exectarget, exectarget);
511
build.jcc(ConditionX64::Zero, helpers.exitContinueVmClearNativeFlag);
512
}
513
514
// Change constants
515
build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
516
517
// Change code
518
build.mov(rdx, qword[proto + offsetof(Proto, code)]);
519
build.mov(sCode, rdx);
520
521
build.mov(rax, qword[cip + offsetof(CallInfo, savedpc)]);
522
523
// To get instruction index from instruction pointer, we need to divide byte offset by 4
524
// But we will actually need to scale instruction index by 4 back to byte offset later so it cancels out
525
build.sub(rax, rdx);
526
527
// Get new instruction location and jump to it
528
build.mov(edx, dword[execdata + rax]);
529
530
if (DFFlag::AddReturnExectargetCheck)
531
{
532
build.add(rdx, exectarget);
533
}
534
else
535
{
536
build.add(rdx, qword[proto + offsetof(Proto, exectarget)]);
537
}
538
build.jmp(rdx);
539
}
540
541
542
} // namespace X64
543
} // namespace CodeGen
544
} // namespace Luau
545
546