Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/CodeGen/src/EmitInstructionX64.cpp
2725 views
1
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
2
#include "EmitInstructionX64.h"
3
4
#include "Luau/AssemblyBuilderX64.h"
5
#include "Luau/IrCallWrapperX64.h"
6
#include "Luau/IrRegAllocX64.h"
7
#include "Luau/RegisterX64.h"
8
9
#include "EmitCommonX64.h"
10
#include "NativeState.h"
11
12
#include "lstate.h"
13
14
LUAU_FASTFLAGVARIABLE(LuauCodeGenCallWrapperEmitInst)
15
16
namespace Luau
17
{
18
namespace CodeGen
19
{
20
namespace X64
21
{
22
23
void emitInstCall(IrRegAllocX64& regs, AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults)
24
{
25
if (FFlag::LuauCodeGenCallWrapperEmitInst)
26
{
27
IrCallWrapperX64 callWrapper(regs, build);
28
29
callWrapper.addArgument(SizeX64::qword, rState);
30
callWrapper.addArgument(SizeX64::qword, luauRegAddress(ra));
31
if (nparams == LUA_MULTRET)
32
callWrapper.addArgument(SizeX64::qword, qword[rState + offsetof(lua_State, top)]);
33
else
34
callWrapper.addArgument(SizeX64::qword, luauRegAddress(ra + 1 + nparams));
35
36
callWrapper.addArgument(SizeX64::dword, nresults);
37
callWrapper.call(qword[rNativeContext + offsetof(NativeContext, callProlog)]);
38
}
39
else
40
{
41
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
42
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
43
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
44
RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx;
45
46
build.mov(rArg1, rState);
47
build.lea(rArg2, luauRegAddress(ra));
48
49
if (nparams == LUA_MULTRET)
50
build.mov(rArg3, qword[rState + offsetof(lua_State, top)]);
51
else
52
build.lea(rArg3, luauRegAddress(ra + 1 + nparams));
53
54
build.mov(dwordReg(rArg4), nresults);
55
build.call(qword[rNativeContext + offsetof(NativeContext, callProlog)]);
56
}
57
RegisterX64 ccl = rax; // Returned from callProlog
58
59
emitUpdateBase(build);
60
61
Label cFuncCall;
62
63
build.test(byte[ccl + offsetof(Closure, isC)], 1);
64
build.jcc(ConditionX64::NotZero, cFuncCall);
65
66
{
67
RegisterX64 proto = rcx; // Sync with emitContinueCallInVm
68
RegisterX64 ci = rdx;
69
RegisterX64 argi = rsi;
70
RegisterX64 argend = rdi;
71
72
build.mov(proto, qword[ccl + offsetof(Closure, l.p)]);
73
74
// Switch current Closure
75
build.mov(sClosure, ccl); // Last use of 'ccl'
76
77
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
78
79
Label fillnil, exitfillnil;
80
81
// argi = L->top
82
build.mov(argi, qword[rState + offsetof(lua_State, top)]);
83
84
// argend = L->base + p->numparams
85
build.movzx(eax, byte[proto + offsetof(Proto, numparams)]);
86
build.shl(eax, kTValueSizeLog2);
87
build.lea(argend, addr[rBase + rax]);
88
89
// while (argi < argend) setnilvalue(argi++);
90
build.setLabel(fillnil);
91
build.cmp(argi, argend);
92
build.jcc(ConditionX64::NotBelow, exitfillnil);
93
94
build.mov(dword[argi + offsetof(TValue, tt)], LUA_TNIL);
95
build.add(argi, sizeof(TValue));
96
build.jmp(fillnil); // This loop rarely runs so it's not worth repeating cmp/jcc
97
98
build.setLabel(exitfillnil);
99
100
// Set L->top to ci->top as most function expect (no vararg)
101
build.mov(rax, qword[ci + offsetof(CallInfo, top)]);
102
103
// But if it is vararg, update it to 'argi'
104
Label skipVararg;
105
106
build.test(byte[proto + offsetof(Proto, is_vararg)], 1);
107
build.jcc(ConditionX64::Zero, skipVararg);
108
build.mov(rax, argi);
109
110
build.setLabel(skipVararg);
111
112
build.mov(qword[rState + offsetof(lua_State, top)], rax);
113
114
// Switch current code
115
// ci->savedpc = p->code;
116
build.mov(rax, qword[proto + offsetof(Proto, code)]);
117
build.mov(sCode, rax); // note: this needs to be before the next store for optimal performance
118
build.mov(qword[ci + offsetof(CallInfo, savedpc)], rax);
119
120
// Switch current constants
121
build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
122
123
// Get native function entry
124
build.mov(rax, qword[proto + offsetof(Proto, exectarget)]);
125
build.test(rax, rax);
126
build.jcc(ConditionX64::Zero, helpers.exitContinueVm);
127
128
// Mark call frame as native
129
build.mov(dword[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_NATIVE);
130
131
build.jmp(rax);
132
}
133
134
build.setLabel(cFuncCall);
135
136
{
137
// results = ccl->c.f(L);
138
if (FFlag::LuauCodeGenCallWrapperEmitInst)
139
{
140
regs.takeReg(ccl, kInvalidInstIdx); // ccl = rax, returned from callProlog, have to take ownership so the wrapper can free it
141
IrCallWrapperX64 callWrapper(regs, build);
142
callWrapper.addArgument(SizeX64::qword, rState);
143
callWrapper.call(qword[ccl + offsetof(Closure, c.f)]); // Last use of 'ccl'
144
}
145
else
146
{
147
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
148
build.mov(rArg1, rState);
149
build.call(qword[ccl + offsetof(Closure, c.f)]); // Last use of 'ccl'
150
}
151
RegisterX64 results = eax;
152
153
build.test(results, results); // test here will set SF=1 for a negative number and it always sets OF to 0
154
build.jcc(ConditionX64::Less, helpers.exitNoContinueVm); // jl jumps if SF != OF
155
156
// We have special handling for small number of expected results below
157
if (nresults != 0 && nresults != 1)
158
{
159
if (FFlag::LuauCodeGenCallWrapperEmitInst)
160
{
161
regs.takeReg(results, kInvalidInstIdx); // results = eax, returned from c.f, have to take ownership so the wrapper can free it
162
IrCallWrapperX64 callWrapper(regs, build);
163
callWrapper.addArgument(SizeX64::qword, rState);
164
callWrapper.addArgument(SizeX64::dword, nresults);
165
callWrapper.addArgument(SizeX64::dword, results);
166
callWrapper.call(qword[rNativeContext + offsetof(NativeContext, callEpilogC)]);
167
}
168
else
169
{
170
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
171
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
172
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
173
174
build.mov(rArg1, rState);
175
build.mov(dwordReg(rArg2), nresults);
176
build.mov(dwordReg(rArg3), results);
177
build.call(qword[rNativeContext + offsetof(NativeContext, callEpilogC)]);
178
}
179
180
emitUpdateBase(build);
181
return;
182
}
183
184
RegisterX64 ci = rdx;
185
RegisterX64 cip = rcx;
186
RegisterX64 vali = rsi;
187
188
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
189
build.lea(cip, addr[ci - sizeof(CallInfo)]);
190
191
// L->base = cip->base
192
build.mov(rBase, qword[cip + offsetof(CallInfo, base)]);
193
build.mov(qword[rState + offsetof(lua_State, base)], rBase);
194
195
if (nresults == 1)
196
{
197
// Opportunistically copy the result we expected from (L->top - results)
198
build.mov(vali, qword[rState + offsetof(lua_State, top)]);
199
build.shl(results, kTValueSizeLog2);
200
build.sub(vali, qwordReg(results));
201
build.vmovups(xmm0, xmmword[vali]);
202
build.vmovups(luauReg(ra), xmm0);
203
204
Label skipnil;
205
206
// If there was no result, override the value with 'nil'
207
build.test(results, results);
208
build.jcc(ConditionX64::NotZero, skipnil);
209
build.mov(luauRegTag(ra), LUA_TNIL);
210
build.setLabel(skipnil);
211
}
212
213
// L->ci = cip
214
build.mov(qword[rState + offsetof(lua_State, ci)], cip);
215
216
// L->top = cip->top
217
build.mov(rax, qword[cip + offsetof(CallInfo, top)]);
218
build.mov(qword[rState + offsetof(lua_State, top)], rax);
219
}
220
}
221
222
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults, bool functionVariadic)
223
{
224
RegisterX64 res = rdi;
225
RegisterX64 written = ecx;
226
227
if (functionVariadic)
228
{
229
build.mov(res, qword[rState + offsetof(lua_State, ci)]);
230
build.mov(res, qword[res + offsetof(CallInfo, func)]);
231
}
232
else if (actualResults != 1)
233
build.lea(res, addr[rBase - sizeof(TValue)]); // invariant: ci->func + 1 == ci->base for non-variadic frames
234
235
if (actualResults == 0)
236
{
237
build.xor_(written, written);
238
build.jmp(helpers.return_);
239
}
240
else if (actualResults == 1 && !functionVariadic)
241
{
242
// fast path: minimizes res adjustments
243
// note that we skipped res computation for this specific case above
244
build.vmovups(xmm0, luauReg(ra));
245
build.vmovups(xmmword[rBase - sizeof(TValue)], xmm0);
246
build.mov(res, rBase);
247
build.mov(written, 1);
248
build.jmp(helpers.return_);
249
}
250
else if (actualResults >= 1 && actualResults <= 3)
251
{
252
for (int r = 0; r < actualResults; ++r)
253
{
254
build.vmovups(xmm0, luauReg(ra + r));
255
build.vmovups(xmmword[res + r * sizeof(TValue)], xmm0);
256
}
257
build.add(res, actualResults * sizeof(TValue));
258
build.mov(written, actualResults);
259
build.jmp(helpers.return_);
260
}
261
else
262
{
263
RegisterX64 vali = rax;
264
RegisterX64 valend = rdx;
265
266
// vali = ra
267
build.lea(vali, luauRegAddress(ra));
268
269
// Copy as much as possible for MULTRET calls, and only as much as needed otherwise
270
if (actualResults == LUA_MULTRET)
271
build.mov(valend, qword[rState + offsetof(lua_State, top)]); // valend = L->top
272
else
273
build.lea(valend, luauRegAddress(ra + actualResults)); // valend = ra + actualResults
274
275
build.xor_(written, written);
276
277
Label repeatValueLoop, exitValueLoop;
278
279
if (actualResults == LUA_MULTRET)
280
{
281
build.cmp(vali, valend);
282
build.jcc(ConditionX64::NotBelow, exitValueLoop);
283
}
284
285
build.setLabel(repeatValueLoop);
286
build.vmovups(xmm0, xmmword[vali]);
287
build.vmovups(xmmword[res], xmm0);
288
build.add(vali, sizeof(TValue));
289
build.add(res, sizeof(TValue));
290
build.inc(written);
291
build.cmp(vali, valend);
292
build.jcc(ConditionX64::Below, repeatValueLoop);
293
294
build.setLabel(exitValueLoop);
295
build.jmp(helpers.return_);
296
}
297
}
298
299
void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index, int knownSize)
300
{
301
OperandX64 last = index + count - 1;
302
303
// Using non-volatile 'rbx' for dynamic 'count' value (for LUA_MULTRET) to skip later recomputation
304
// We also keep 'count' scaled by sizeof(TValue) here as it helps in the loop below
305
RegisterX64 cscaled = rbx;
306
307
if (count == LUA_MULTRET)
308
{
309
RegisterX64 tmp = rax;
310
311
// count = L->top - rb
312
build.mov(cscaled, qword[rState + offsetof(lua_State, top)]);
313
build.lea(tmp, luauRegAddress(rb));
314
build.sub(cscaled, tmp); // Using byte difference
315
316
// L->top = L->ci->top
317
build.mov(tmp, qword[rState + offsetof(lua_State, ci)]);
318
build.mov(tmp, qword[tmp + offsetof(CallInfo, top)]);
319
build.mov(qword[rState + offsetof(lua_State, top)], tmp);
320
321
// last = index + count - 1;
322
last = edx;
323
build.mov(last, dwordReg(cscaled));
324
build.shr(last, kTValueSizeLog2);
325
build.add(last, index - 1);
326
}
327
328
RegisterX64 table = regs.takeReg(rax, kInvalidInstIdx);
329
330
build.mov(table, luauRegValue(ra));
331
332
if (count == LUA_MULTRET || knownSize < 0 || knownSize < int(index + count - 1))
333
{
334
Label skipResize;
335
336
// Resize if h->sizearray < last
337
build.cmp(dword[table + offsetof(LuaTable, sizearray)], last);
338
build.jcc(ConditionX64::NotBelow, skipResize);
339
340
if (FFlag::LuauCodeGenCallWrapperEmitInst)
341
{
342
if (count == LUA_MULTRET)
343
regs.takeReg(last.base, kInvalidInstIdx); // last = edx, preloaded above, have to take ownership so the wrapper can free it
344
IrCallWrapperX64 callWrapper(regs, build);
345
callWrapper.addArgument(SizeX64::qword, rState);
346
callWrapper.addArgument(SizeX64::qword, table);
347
callWrapper.addArgument(SizeX64::dword, last);
348
callWrapper.call(qword[rNativeContext + offsetof(NativeContext, luaH_resizearray)]);
349
// InstCallWrapperX64 freed table's register (rax) as a consumed source
350
// we need to retake it so that the subsequent build.mov reload and callBarrierTableFast can track ownership correctly
351
table = regs.takeReg(rax, kInvalidInstIdx);
352
}
353
else
354
{
355
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
356
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
357
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
358
359
// Argument setup reordered to avoid conflicts
360
CODEGEN_ASSERT(rArg3 != table);
361
build.mov(dwordReg(rArg3), last);
362
build.mov(rArg2, table);
363
build.mov(rArg1, rState);
364
build.call(qword[rNativeContext + offsetof(NativeContext, luaH_resizearray)]);
365
}
366
build.mov(table, luauRegValue(ra)); // Reload clobbered register value
367
368
build.setLabel(skipResize);
369
}
370
371
RegisterX64 arrayDst = rdx;
372
RegisterX64 offset = rcx;
373
374
build.mov(arrayDst, qword[table + offsetof(LuaTable, array)]);
375
376
const int kUnrollSetListLimit = 4;
377
378
if (count != LUA_MULTRET && count <= kUnrollSetListLimit)
379
{
380
for (int i = 0; i < count; ++i)
381
{
382
// setobj2t(L, &array[index + i - 1], rb + i);
383
build.vmovups(xmm0, luauRegValue(rb + i));
384
build.vmovups(xmmword[arrayDst + (index + i - 1) * sizeof(TValue)], xmm0);
385
}
386
}
387
else
388
{
389
CODEGEN_ASSERT(count != 0);
390
391
build.xor_(offset, offset);
392
if (index != 1)
393
build.add(arrayDst, (index - 1) * sizeof(TValue));
394
395
Label repeatLoop, endLoop;
396
OperandX64 limit = count == LUA_MULTRET ? cscaled : OperandX64(count * sizeof(TValue));
397
398
// If c is static, we will always do at least one iteration
399
if (count == LUA_MULTRET)
400
{
401
build.cmp(offset, limit);
402
build.jcc(ConditionX64::NotBelow, endLoop);
403
}
404
405
build.setLabel(repeatLoop);
406
407
// setobj2t(L, &array[index + i - 1], rb + i);
408
build.vmovups(xmm0, xmmword[offset + rBase + rb * sizeof(TValue)]); // luauReg(rb) unwrapped to add offset
409
build.vmovups(xmmword[offset + arrayDst], xmm0);
410
411
build.add(offset, sizeof(TValue));
412
build.cmp(offset, limit);
413
build.jcc(ConditionX64::Below, repeatLoop);
414
415
build.setLabel(endLoop);
416
}
417
418
callBarrierTableFast(regs, build, table, {});
419
}
420
421
void emitInstForGLoop(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat)
422
{
423
// ipairs-style traversal is handled in IR
424
CODEGEN_ASSERT(aux >= 0);
425
426
// This is a fast-path for builtin table iteration, tag check for 'ra' has to be performed before emitting this instruction
427
428
// Registers are chosen in this way to simplify fallback code for the node part
429
RegisterX64 table = (build.abi == ABIX64::Windows) ? rdx : rsi;
430
RegisterX64 index = (build.abi == ABIX64::Windows) ? r8 : rdx;
431
RegisterX64 elemPtr = rax;
432
433
build.mov(table, luauRegValue(ra + 1));
434
build.mov(index, luauRegValue(ra + 2));
435
436
// &array[index]
437
build.mov(dwordReg(elemPtr), dwordReg(index));
438
build.shl(dwordReg(elemPtr), kTValueSizeLog2);
439
build.add(elemPtr, qword[table + offsetof(LuaTable, array)]);
440
441
// Clear extra variables since we might have more than two
442
for (int i = 2; i < aux; ++i)
443
build.mov(luauRegTag(ra + 3 + i), LUA_TNIL);
444
445
Label skipArray, skipArrayNil;
446
447
// First we advance index through the array portion
448
// while (unsigned(index) < unsigned(sizearray))
449
Label arrayLoop = build.setLabel();
450
build.cmp(dwordReg(index), dword[table + offsetof(LuaTable, sizearray)]);
451
build.jcc(ConditionX64::NotBelow, skipArray);
452
453
// If element is nil, we increment the index; if it's not, we still need 'index + 1' inside
454
build.inc(index);
455
456
build.cmp(dword[elemPtr + offsetof(TValue, tt)], LUA_TNIL);
457
build.jcc(ConditionX64::Equal, skipArrayNil);
458
459
// setpvalue(ra + 2, reinterpret_cast<void*>(uintptr_t(index + 1)), LU_TAG_ITERATOR);
460
build.mov(luauRegValue(ra + 2), index);
461
// Extra should already be set to LU_TAG_ITERATOR
462
// Tag should already be set to lightuserdata
463
464
// setnvalue(ra + 3, double(index + 1));
465
build.vcvtsi2sd(xmm0, xmm0, dwordReg(index));
466
build.vmovsd(luauRegValue(ra + 3), xmm0);
467
build.mov(luauRegTag(ra + 3), LUA_TNUMBER);
468
469
// setobj2s(L, ra + 4, e);
470
setLuauReg(build, xmm2, ra + 4, xmmword[elemPtr]);
471
472
build.jmp(loopRepeat);
473
474
build.setLabel(skipArrayNil);
475
476
// Index already incremented, advance to next array element
477
build.add(elemPtr, sizeof(TValue));
478
build.jmp(arrayLoop);
479
480
build.setLabel(skipArray);
481
482
if (FFlag::LuauCodeGenCallWrapperEmitInst)
483
{
484
regs.takeReg(table, kInvalidInstIdx); // table/index are preloaded above, have to take ownership so the wrapper can free them
485
regs.takeReg(index, kInvalidInstIdx);
486
IrCallWrapperX64 callWrapper(regs, build);
487
callWrapper.addArgument(SizeX64::qword, rState);
488
callWrapper.addArgument(SizeX64::qword, table);
489
callWrapper.addArgument(SizeX64::qword, index);
490
callWrapper.addArgument(SizeX64::qword, luauRegAddress(ra));
491
callWrapper.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNodeIter)]);
492
}
493
else
494
{
495
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
496
RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx;
497
498
// Call helper to assign next node value or to signal loop exit
499
build.mov(rArg1, rState);
500
// rArg2 and rArg3 are already set
501
build.lea(rArg4, luauRegAddress(ra));
502
build.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNodeIter)]);
503
}
504
build.test(al, al);
505
build.jcc(ConditionX64::NotZero, loopRepeat);
506
}
507
508
} // namespace X64
509
} // namespace CodeGen
510
} // namespace Luau
511
512