#include "EmitInstructionX64.h"
#include "Luau/AssemblyBuilderX64.h"
#include "Luau/IrCallWrapperX64.h"
#include "Luau/IrRegAllocX64.h"
#include "Luau/RegisterX64.h"
#include "EmitCommonX64.h"
#include "NativeState.h"
#include "lstate.h"
LUAU_FASTFLAGVARIABLE(LuauCodeGenCallWrapperEmitInst)
namespace Luau
{
namespace CodeGen
{
namespace X64
{
void emitInstCall(IrRegAllocX64& regs, AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int nparams, int nresults)
{
if (FFlag::LuauCodeGenCallWrapperEmitInst)
{
IrCallWrapperX64 callWrapper(regs, build);
callWrapper.addArgument(SizeX64::qword, rState);
callWrapper.addArgument(SizeX64::qword, luauRegAddress(ra));
if (nparams == LUA_MULTRET)
callWrapper.addArgument(SizeX64::qword, qword[rState + offsetof(lua_State, top)]);
else
callWrapper.addArgument(SizeX64::qword, luauRegAddress(ra + 1 + nparams));
callWrapper.addArgument(SizeX64::dword, nresults);
callWrapper.call(qword[rNativeContext + offsetof(NativeContext, callProlog)]);
}
else
{
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx;
build.mov(rArg1, rState);
build.lea(rArg2, luauRegAddress(ra));
if (nparams == LUA_MULTRET)
build.mov(rArg3, qword[rState + offsetof(lua_State, top)]);
else
build.lea(rArg3, luauRegAddress(ra + 1 + nparams));
build.mov(dwordReg(rArg4), nresults);
build.call(qword[rNativeContext + offsetof(NativeContext, callProlog)]);
}
RegisterX64 ccl = rax;
emitUpdateBase(build);
Label cFuncCall;
build.test(byte[ccl + offsetof(Closure, isC)], 1);
build.jcc(ConditionX64::NotZero, cFuncCall);
{
RegisterX64 proto = rcx;
RegisterX64 ci = rdx;
RegisterX64 argi = rsi;
RegisterX64 argend = rdi;
build.mov(proto, qword[ccl + offsetof(Closure, l.p)]);
build.mov(sClosure, ccl);
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
Label fillnil, exitfillnil;
build.mov(argi, qword[rState + offsetof(lua_State, top)]);
build.movzx(eax, byte[proto + offsetof(Proto, numparams)]);
build.shl(eax, kTValueSizeLog2);
build.lea(argend, addr[rBase + rax]);
build.setLabel(fillnil);
build.cmp(argi, argend);
build.jcc(ConditionX64::NotBelow, exitfillnil);
build.mov(dword[argi + offsetof(TValue, tt)], LUA_TNIL);
build.add(argi, sizeof(TValue));
build.jmp(fillnil);
build.setLabel(exitfillnil);
build.mov(rax, qword[ci + offsetof(CallInfo, top)]);
Label skipVararg;
build.test(byte[proto + offsetof(Proto, is_vararg)], 1);
build.jcc(ConditionX64::Zero, skipVararg);
build.mov(rax, argi);
build.setLabel(skipVararg);
build.mov(qword[rState + offsetof(lua_State, top)], rax);
build.mov(rax, qword[proto + offsetof(Proto, code)]);
build.mov(sCode, rax);
build.mov(qword[ci + offsetof(CallInfo, savedpc)], rax);
build.mov(rConstants, qword[proto + offsetof(Proto, k)]);
build.mov(rax, qword[proto + offsetof(Proto, exectarget)]);
build.test(rax, rax);
build.jcc(ConditionX64::Zero, helpers.exitContinueVm);
build.mov(dword[ci + offsetof(CallInfo, flags)], LUA_CALLINFO_NATIVE);
build.jmp(rax);
}
build.setLabel(cFuncCall);
{
if (FFlag::LuauCodeGenCallWrapperEmitInst)
{
regs.takeReg(ccl, kInvalidInstIdx);
IrCallWrapperX64 callWrapper(regs, build);
callWrapper.addArgument(SizeX64::qword, rState);
callWrapper.call(qword[ccl + offsetof(Closure, c.f)]);
}
else
{
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
build.mov(rArg1, rState);
build.call(qword[ccl + offsetof(Closure, c.f)]);
}
RegisterX64 results = eax;
build.test(results, results);
build.jcc(ConditionX64::Less, helpers.exitNoContinueVm);
if (nresults != 0 && nresults != 1)
{
if (FFlag::LuauCodeGenCallWrapperEmitInst)
{
regs.takeReg(results, kInvalidInstIdx);
IrCallWrapperX64 callWrapper(regs, build);
callWrapper.addArgument(SizeX64::qword, rState);
callWrapper.addArgument(SizeX64::dword, nresults);
callWrapper.addArgument(SizeX64::dword, results);
callWrapper.call(qword[rNativeContext + offsetof(NativeContext, callEpilogC)]);
}
else
{
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
build.mov(rArg1, rState);
build.mov(dwordReg(rArg2), nresults);
build.mov(dwordReg(rArg3), results);
build.call(qword[rNativeContext + offsetof(NativeContext, callEpilogC)]);
}
emitUpdateBase(build);
return;
}
RegisterX64 ci = rdx;
RegisterX64 cip = rcx;
RegisterX64 vali = rsi;
build.mov(ci, qword[rState + offsetof(lua_State, ci)]);
build.lea(cip, addr[ci - sizeof(CallInfo)]);
build.mov(rBase, qword[cip + offsetof(CallInfo, base)]);
build.mov(qword[rState + offsetof(lua_State, base)], rBase);
if (nresults == 1)
{
build.mov(vali, qword[rState + offsetof(lua_State, top)]);
build.shl(results, kTValueSizeLog2);
build.sub(vali, qwordReg(results));
build.vmovups(xmm0, xmmword[vali]);
build.vmovups(luauReg(ra), xmm0);
Label skipnil;
build.test(results, results);
build.jcc(ConditionX64::NotZero, skipnil);
build.mov(luauRegTag(ra), LUA_TNIL);
build.setLabel(skipnil);
}
build.mov(qword[rState + offsetof(lua_State, ci)], cip);
build.mov(rax, qword[cip + offsetof(CallInfo, top)]);
build.mov(qword[rState + offsetof(lua_State, top)], rax);
}
}
void emitInstReturn(AssemblyBuilderX64& build, ModuleHelpers& helpers, int ra, int actualResults, bool functionVariadic)
{
RegisterX64 res = rdi;
RegisterX64 written = ecx;
if (functionVariadic)
{
build.mov(res, qword[rState + offsetof(lua_State, ci)]);
build.mov(res, qword[res + offsetof(CallInfo, func)]);
}
else if (actualResults != 1)
build.lea(res, addr[rBase - sizeof(TValue)]);
if (actualResults == 0)
{
build.xor_(written, written);
build.jmp(helpers.return_);
}
else if (actualResults == 1 && !functionVariadic)
{
build.vmovups(xmm0, luauReg(ra));
build.vmovups(xmmword[rBase - sizeof(TValue)], xmm0);
build.mov(res, rBase);
build.mov(written, 1);
build.jmp(helpers.return_);
}
else if (actualResults >= 1 && actualResults <= 3)
{
for (int r = 0; r < actualResults; ++r)
{
build.vmovups(xmm0, luauReg(ra + r));
build.vmovups(xmmword[res + r * sizeof(TValue)], xmm0);
}
build.add(res, actualResults * sizeof(TValue));
build.mov(written, actualResults);
build.jmp(helpers.return_);
}
else
{
RegisterX64 vali = rax;
RegisterX64 valend = rdx;
build.lea(vali, luauRegAddress(ra));
if (actualResults == LUA_MULTRET)
build.mov(valend, qword[rState + offsetof(lua_State, top)]);
else
build.lea(valend, luauRegAddress(ra + actualResults));
build.xor_(written, written);
Label repeatValueLoop, exitValueLoop;
if (actualResults == LUA_MULTRET)
{
build.cmp(vali, valend);
build.jcc(ConditionX64::NotBelow, exitValueLoop);
}
build.setLabel(repeatValueLoop);
build.vmovups(xmm0, xmmword[vali]);
build.vmovups(xmmword[res], xmm0);
build.add(vali, sizeof(TValue));
build.add(res, sizeof(TValue));
build.inc(written);
build.cmp(vali, valend);
build.jcc(ConditionX64::Below, repeatValueLoop);
build.setLabel(exitValueLoop);
build.jmp(helpers.return_);
}
}
void emitInstSetList(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int rb, int count, uint32_t index, int knownSize)
{
OperandX64 last = index + count - 1;
RegisterX64 cscaled = rbx;
if (count == LUA_MULTRET)
{
RegisterX64 tmp = rax;
build.mov(cscaled, qword[rState + offsetof(lua_State, top)]);
build.lea(tmp, luauRegAddress(rb));
build.sub(cscaled, tmp);
build.mov(tmp, qword[rState + offsetof(lua_State, ci)]);
build.mov(tmp, qword[tmp + offsetof(CallInfo, top)]);
build.mov(qword[rState + offsetof(lua_State, top)], tmp);
last = edx;
build.mov(last, dwordReg(cscaled));
build.shr(last, kTValueSizeLog2);
build.add(last, index - 1);
}
RegisterX64 table = regs.takeReg(rax, kInvalidInstIdx);
build.mov(table, luauRegValue(ra));
if (count == LUA_MULTRET || knownSize < 0 || knownSize < int(index + count - 1))
{
Label skipResize;
build.cmp(dword[table + offsetof(LuaTable, sizearray)], last);
build.jcc(ConditionX64::NotBelow, skipResize);
if (FFlag::LuauCodeGenCallWrapperEmitInst)
{
if (count == LUA_MULTRET)
regs.takeReg(last.base, kInvalidInstIdx);
IrCallWrapperX64 callWrapper(regs, build);
callWrapper.addArgument(SizeX64::qword, rState);
callWrapper.addArgument(SizeX64::qword, table);
callWrapper.addArgument(SizeX64::dword, last);
callWrapper.call(qword[rNativeContext + offsetof(NativeContext, luaH_resizearray)]);
table = regs.takeReg(rax, kInvalidInstIdx);
}
else
{
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
RegisterX64 rArg2 = (build.abi == ABIX64::Windows) ? rdx : rsi;
RegisterX64 rArg3 = (build.abi == ABIX64::Windows) ? r8 : rdx;
CODEGEN_ASSERT(rArg3 != table);
build.mov(dwordReg(rArg3), last);
build.mov(rArg2, table);
build.mov(rArg1, rState);
build.call(qword[rNativeContext + offsetof(NativeContext, luaH_resizearray)]);
}
build.mov(table, luauRegValue(ra));
build.setLabel(skipResize);
}
RegisterX64 arrayDst = rdx;
RegisterX64 offset = rcx;
build.mov(arrayDst, qword[table + offsetof(LuaTable, array)]);
const int kUnrollSetListLimit = 4;
if (count != LUA_MULTRET && count <= kUnrollSetListLimit)
{
for (int i = 0; i < count; ++i)
{
build.vmovups(xmm0, luauRegValue(rb + i));
build.vmovups(xmmword[arrayDst + (index + i - 1) * sizeof(TValue)], xmm0);
}
}
else
{
CODEGEN_ASSERT(count != 0);
build.xor_(offset, offset);
if (index != 1)
build.add(arrayDst, (index - 1) * sizeof(TValue));
Label repeatLoop, endLoop;
OperandX64 limit = count == LUA_MULTRET ? cscaled : OperandX64(count * sizeof(TValue));
if (count == LUA_MULTRET)
{
build.cmp(offset, limit);
build.jcc(ConditionX64::NotBelow, endLoop);
}
build.setLabel(repeatLoop);
build.vmovups(xmm0, xmmword[offset + rBase + rb * sizeof(TValue)]);
build.vmovups(xmmword[offset + arrayDst], xmm0);
build.add(offset, sizeof(TValue));
build.cmp(offset, limit);
build.jcc(ConditionX64::Below, repeatLoop);
build.setLabel(endLoop);
}
callBarrierTableFast(regs, build, table, {});
}
void emitInstForGLoop(IrRegAllocX64& regs, AssemblyBuilderX64& build, int ra, int aux, Label& loopRepeat)
{
CODEGEN_ASSERT(aux >= 0);
RegisterX64 table = (build.abi == ABIX64::Windows) ? rdx : rsi;
RegisterX64 index = (build.abi == ABIX64::Windows) ? r8 : rdx;
RegisterX64 elemPtr = rax;
build.mov(table, luauRegValue(ra + 1));
build.mov(index, luauRegValue(ra + 2));
build.mov(dwordReg(elemPtr), dwordReg(index));
build.shl(dwordReg(elemPtr), kTValueSizeLog2);
build.add(elemPtr, qword[table + offsetof(LuaTable, array)]);
for (int i = 2; i < aux; ++i)
build.mov(luauRegTag(ra + 3 + i), LUA_TNIL);
Label skipArray, skipArrayNil;
Label arrayLoop = build.setLabel();
build.cmp(dwordReg(index), dword[table + offsetof(LuaTable, sizearray)]);
build.jcc(ConditionX64::NotBelow, skipArray);
build.inc(index);
build.cmp(dword[elemPtr + offsetof(TValue, tt)], LUA_TNIL);
build.jcc(ConditionX64::Equal, skipArrayNil);
build.mov(luauRegValue(ra + 2), index);
build.vcvtsi2sd(xmm0, xmm0, dwordReg(index));
build.vmovsd(luauRegValue(ra + 3), xmm0);
build.mov(luauRegTag(ra + 3), LUA_TNUMBER);
setLuauReg(build, xmm2, ra + 4, xmmword[elemPtr]);
build.jmp(loopRepeat);
build.setLabel(skipArrayNil);
build.add(elemPtr, sizeof(TValue));
build.jmp(arrayLoop);
build.setLabel(skipArray);
if (FFlag::LuauCodeGenCallWrapperEmitInst)
{
regs.takeReg(table, kInvalidInstIdx);
regs.takeReg(index, kInvalidInstIdx);
IrCallWrapperX64 callWrapper(regs, build);
callWrapper.addArgument(SizeX64::qword, rState);
callWrapper.addArgument(SizeX64::qword, table);
callWrapper.addArgument(SizeX64::qword, index);
callWrapper.addArgument(SizeX64::qword, luauRegAddress(ra));
callWrapper.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNodeIter)]);
}
else
{
RegisterX64 rArg1 = (build.abi == ABIX64::Windows) ? rcx : rdi;
RegisterX64 rArg4 = (build.abi == ABIX64::Windows) ? r9 : rcx;
build.mov(rArg1, rState);
build.lea(rArg4, luauRegAddress(ra));
build.call(qword[rNativeContext + offsetof(NativeContext, forgLoopNodeIter)]);
}
build.test(al, al);
build.jcc(ConditionX64::NotZero, loopRepeat);
}
}
}
}