Path: blob/master/CodeGen/include/Luau/AssemblyBuilderA64.h
2727 views
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details1#pragma once23#include "Luau/RegisterA64.h"4#include "Luau/AddressA64.h"5#include "Luau/ConditionA64.h"6#include "Luau/Label.h"78#include <string>9#include <vector>1011namespace Luau12{13namespace CodeGen14{15namespace A6416{1718enum FeaturesA6419{20Feature_JSCVT = 1 << 0,21Feature_AdvSIMD = 1 << 122};2324class AssemblyBuilderA6425{26public:27explicit AssemblyBuilderA64(bool logText, unsigned int features = 0);28~AssemblyBuilderA64();2930// Moves31void mov(RegisterA64 dst, RegisterA64 src);32void mov(RegisterA64 dst, int src); // macro3334// Moves of 32-bit immediates get decomposed into one or more of these35void movz(RegisterA64 dst, uint16_t src, int shift = 0);36void movn(RegisterA64 dst, uint16_t src, int shift = 0);37void movk(RegisterA64 dst, uint16_t src, int shift = 0);3839// Arithmetics40void add(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);41void add(RegisterA64 dst, RegisterA64 src1, uint16_t src2);42void sub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);43void sub(RegisterA64 dst, RegisterA64 src1, uint16_t src2);44void neg(RegisterA64 dst, RegisterA64 src);4546// Prevent implicit conversions from happening47template<typename T>48void add(RegisterA64 dst, RegisterA64 src1, T src2) = delete;49template<typename T>50void sub(RegisterA64 dst, RegisterA64 src1, T src2) = delete;5152// Comparisons53// Note: some arithmetic instructions also have versions that update flags (ADDS etc) but we aren't using them atm54void cmp(RegisterA64 src1, RegisterA64 src2);55void cmp(RegisterA64 src1, uint16_t src2);5657template<typename T>58void cmp(RegisterA64 src1, T src2) = delete; // Prevent implicit conversions from happening5960void csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);61void cset(RegisterA64 dst, ConditionA64 cond);6263// Bitwise64void and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);65void orr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);66void eor(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);67void bic(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);68void tst(RegisterA64 src1, RegisterA64 src2, int shift = 0);69void mvn_(RegisterA64 dst, RegisterA64 src);7071// Bitwise with immediate72// Note: immediate must have a single contiguous sequence of 1 bits set of length 1..3173void and_(RegisterA64 dst, RegisterA64 src1, uint32_t src2);74void orr(RegisterA64 dst, RegisterA64 src1, uint32_t src2);75void eor(RegisterA64 dst, RegisterA64 src1, uint32_t src2);76void tst(RegisterA64 src1, uint32_t src2);7778// Shifts79void lsl(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);80void lsr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);81void asr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);82void ror(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);83void clz(RegisterA64 dst, RegisterA64 src);84void rbit(RegisterA64 dst, RegisterA64 src);85void rev(RegisterA64 dst, RegisterA64 src);8687// Shifts with immediates88// Note: immediate value must be in [0, 31] or [0, 63] range based on register type89void lsl(RegisterA64 dst, RegisterA64 src1, uint8_t src2);90void lsr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);91void asr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);92void ror(RegisterA64 dst, RegisterA64 src1, uint8_t src2);9394// Bitfields95void ubfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);96void ubfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);97void sbfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);98void sbfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);99100// Load101// Note: paired loads are currently omitted for simplicity102void ldr(RegisterA64 dst, AddressA64 src);103void ldrb(RegisterA64 dst, AddressA64 src);104void ldrh(RegisterA64 dst, AddressA64 src);105void ldrsb(RegisterA64 dst, AddressA64 src);106void ldrsh(RegisterA64 dst, AddressA64 src);107void ldrsw(RegisterA64 dst, AddressA64 src);108void ldp(RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);109110// Store111void str(RegisterA64 src, AddressA64 dst);112void strb(RegisterA64 src, AddressA64 dst);113void strh(RegisterA64 src, AddressA64 dst);114void stp(RegisterA64 src1, RegisterA64 src2, AddressA64 dst);115116// Control flow117void b(Label& label);118void bl(Label& label);119void br(RegisterA64 src);120void blr(RegisterA64 src);121void ret();122123// Conditional control flow124void b(ConditionA64 cond, Label& label);125void cbz(RegisterA64 src, Label& label);126void cbnz(RegisterA64 src, Label& label);127void tbz(RegisterA64 src, uint8_t bit, Label& label);128void tbnz(RegisterA64 src, uint8_t bit, Label& label);129130// Address of embedded data131void adr(RegisterA64 dst, const void* ptr, size_t size);132void adr(RegisterA64 dst, uint64_t value);133void adr(RegisterA64 dst, float value);134void adr(RegisterA64 dst, double value);135136template<typename T>137void adr(RegisterA64 dst, T value) = delete; // Prevent implicit conversions from happening138139// Address of code (label)140void adr(RegisterA64 dst, Label& label);141142// Floating-point scalar/vector moves143// Note: constant must be compatible with immediate floating point moves (see isFmovSupportedFp64/isFmovSupportedFp32)144void fmov(RegisterA64 dst, RegisterA64 src);145void fmov(RegisterA64 dst, double src);146void fmov(RegisterA64 dst, float src);147148template<typename T>149void fmov(RegisterA64 dst, T src) = delete; // Prevent implicit conversions from happening150151// Floating-point scalar/vector math152void fabs(RegisterA64 dst, RegisterA64 src);153void fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);154void fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);155void fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);156void fneg(RegisterA64 dst, RegisterA64 src);157void fsqrt(RegisterA64 dst, RegisterA64 src);158void fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);159void faddp(RegisterA64 dst, RegisterA64 src);160void fmla(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);161162// Vector component manipulation163void ins_4s(RegisterA64 dst, RegisterA64 src, uint8_t index);164void ins_4s(RegisterA64 dst, uint8_t dstIndex, RegisterA64 src, uint8_t srcIndex);165void dup_4s(RegisterA64 dst, RegisterA64 src, uint8_t index);166void umov_4s(RegisterA64 dst, RegisterA64 src, uint8_t index);167168void fcmeq_4s(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);169void fcmgt_4s(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);170void bit(RegisterA64 dst, RegisterA64 src, RegisterA64 mask);171void bif(RegisterA64 dst, RegisterA64 src, RegisterA64 mask);172173// Floating-point rounding and conversions174void frinta(RegisterA64 dst, RegisterA64 src);175void frintm(RegisterA64 dst, RegisterA64 src);176void frintp(RegisterA64 dst, RegisterA64 src);177void fcvt(RegisterA64 dst, RegisterA64 src);178void fcvtzs(RegisterA64 dst, RegisterA64 src);179void fcvtzu(RegisterA64 dst, RegisterA64 src);180void scvtf(RegisterA64 dst, RegisterA64 src);181void ucvtf(RegisterA64 dst, RegisterA64 src);182183// Floating-point conversion to integer using JS rules (wrap around 2^32) and set Z flag184// note: this is part of ARM8.3 (JSCVT feature); support of this instruction needs to be checked at runtime185void fjcvtzs(RegisterA64 dst, RegisterA64 src);186187// Floating-point comparisons188void fcmp(RegisterA64 src1, RegisterA64 src2);189void fcmpz(RegisterA64 src);190void fcsel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);191192void udf();193194// Run final checks195bool finalize();196197// Places a label at current location and returns it198Label setLabel();199200// Assigns label position to the current location201void setLabel(Label& label);202203// Extracts code offset (in bytes) from label204uint32_t getLabelOffset(const Label& label)205{206CODEGEN_ASSERT(label.location != ~0u);207return label.location * 4;208}209210void logAppend(const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);211212// Code size is measured in 'code' array units - uint8_t on x64 and uint32_t on arm64213uint32_t getCodeSize() const;214215unsigned getInstructionCount() const;216217// Resulting data and code that need to be copied over one after the other218// The *end* of 'data' has to be aligned to 16 bytes, this will also align 'code'219std::vector<uint8_t> data;220std::vector<uint32_t> code;221222std::string text;223224const bool logText = false;225const unsigned int features = 0;226227// Maximum immediate argument to functions like add/sub/cmp228static constexpr size_t kMaxImmediate = (1 << 12) - 1;229230// Check if immediate mode mask is supported for bitwise operations (and/or/xor)231static bool isMaskSupported(uint32_t mask);232233// Check if fmov can be used to synthesize a constant234static bool isFmovSupportedFp64(double value);235static bool isFmovSupportedFp32(float value);236237private:238// Instruction archetypes239void place0(const char* name, uint32_t word);240void placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift = 0, int N = 0);241void placeSR2(const char* name, RegisterA64 dst, RegisterA64 src, uint8_t op, uint8_t op2 = 0);242void placeR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t op2);243void placeR1(const char* name, RegisterA64 dst, RegisterA64 src, uint32_t op);244void placeI12(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op);245void placeI16(const char* name, RegisterA64 dst, int src, uint8_t op, int shift = 0);246void placeA(const char* name, RegisterA64 dst, AddressA64 src, uint16_t opsize, int sizelog);247void placeB(const char* name, Label& label, uint8_t op);248void placeBC(const char* name, Label& label, uint8_t op, uint8_t cond);249void placeBCR(const char* name, Label& label, uint8_t op, RegisterA64 cond);250void placeBR(const char* name, RegisterA64 src, uint32_t op);251void placeBTR(const char* name, Label& label, uint8_t op, RegisterA64 cond, uint8_t bit);252void placeADR(const char* name, RegisterA64 src, uint8_t op);253void placeADR(const char* name, RegisterA64 src, uint8_t op, Label& label);254void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t opc, int sizelog);255void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc, int invert = 0);256void placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc);257void placeFMOV(const char* name, RegisterA64 dst, double src, uint32_t op);258void placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op);259void placeBFM(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op, int immr, int imms);260void placeER(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift);261void placeVR(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint16_t op, uint8_t op2);262263void place(uint32_t word);264265struct Patch266{267enum Kind268{269Imm26,270Imm19,271Imm14,272};273274Kind kind : 2;275uint32_t label : 30;276uint32_t location;277};278279void patchLabel(Label& label, Patch::Kind kind);280void patchOffset(uint32_t location, int value, Patch::Kind kind);281282void commit();283LUAU_NOINLINE void extend();284285// Data286size_t allocateData(size_t size, size_t align);287288// Logging of assembly in text form289LUAU_NOINLINE void log(const char* opcode);290LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);291LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, int src2);292LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src);293LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, int src, int shift = 0);294LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, double src);295LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, AddressA64 src);296LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);297LUAU_NOINLINE void log(const char* opcode, RegisterA64 src, Label label, int imm = -1);298LUAU_NOINLINE void log(const char* opcode, RegisterA64 src);299LUAU_NOINLINE void log(const char* opcode, Label label);300LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);301LUAU_NOINLINE void log(Label label);302LUAU_NOINLINE void log(RegisterA64 reg);303LUAU_NOINLINE void log(AddressA64 addr);304305uint32_t nextLabel = 1;306std::vector<Patch> pendingLabels;307std::vector<uint32_t> labelLocations;308309bool finalized = false;310bool overflowed = false;311312size_t dataPos = 0;313314uint32_t* codePos = nullptr;315uint32_t* codeEnd = nullptr;316};317318} // namespace A64319} // namespace CodeGen320} // namespace Luau321322323