Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
Roblox
GitHub Repository: Roblox/luau
Path: blob/master/CodeGen/include/Luau/AssemblyBuilderA64.h
2727 views
1
// This file is part of the Luau programming language and is licensed under MIT License; see LICENSE.txt for details
2
#pragma once
3
4
#include "Luau/RegisterA64.h"
5
#include "Luau/AddressA64.h"
6
#include "Luau/ConditionA64.h"
7
#include "Luau/Label.h"
8
9
#include <string>
10
#include <vector>
11
12
namespace Luau
13
{
14
namespace CodeGen
15
{
16
namespace A64
17
{
18
19
enum FeaturesA64
20
{
21
Feature_JSCVT = 1 << 0,
22
Feature_AdvSIMD = 1 << 1
23
};
24
25
class AssemblyBuilderA64
26
{
27
public:
28
explicit AssemblyBuilderA64(bool logText, unsigned int features = 0);
29
~AssemblyBuilderA64();
30
31
// Moves
32
void mov(RegisterA64 dst, RegisterA64 src);
33
void mov(RegisterA64 dst, int src); // macro
34
35
// Moves of 32-bit immediates get decomposed into one or more of these
36
void movz(RegisterA64 dst, uint16_t src, int shift = 0);
37
void movn(RegisterA64 dst, uint16_t src, int shift = 0);
38
void movk(RegisterA64 dst, uint16_t src, int shift = 0);
39
40
// Arithmetics
41
void add(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
42
void add(RegisterA64 dst, RegisterA64 src1, uint16_t src2);
43
void sub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
44
void sub(RegisterA64 dst, RegisterA64 src1, uint16_t src2);
45
void neg(RegisterA64 dst, RegisterA64 src);
46
47
// Prevent implicit conversions from happening
48
template<typename T>
49
void add(RegisterA64 dst, RegisterA64 src1, T src2) = delete;
50
template<typename T>
51
void sub(RegisterA64 dst, RegisterA64 src1, T src2) = delete;
52
53
// Comparisons
54
// Note: some arithmetic instructions also have versions that update flags (ADDS etc) but we aren't using them atm
55
void cmp(RegisterA64 src1, RegisterA64 src2);
56
void cmp(RegisterA64 src1, uint16_t src2);
57
58
template<typename T>
59
void cmp(RegisterA64 src1, T src2) = delete; // Prevent implicit conversions from happening
60
61
void csel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
62
void cset(RegisterA64 dst, ConditionA64 cond);
63
64
// Bitwise
65
void and_(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
66
void orr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
67
void eor(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
68
void bic(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
69
void tst(RegisterA64 src1, RegisterA64 src2, int shift = 0);
70
void mvn_(RegisterA64 dst, RegisterA64 src);
71
72
// Bitwise with immediate
73
// Note: immediate must have a single contiguous sequence of 1 bits set of length 1..31
74
void and_(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
75
void orr(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
76
void eor(RegisterA64 dst, RegisterA64 src1, uint32_t src2);
77
void tst(RegisterA64 src1, uint32_t src2);
78
79
// Shifts
80
void lsl(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
81
void lsr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
82
void asr(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
83
void ror(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
84
void clz(RegisterA64 dst, RegisterA64 src);
85
void rbit(RegisterA64 dst, RegisterA64 src);
86
void rev(RegisterA64 dst, RegisterA64 src);
87
88
// Shifts with immediates
89
// Note: immediate value must be in [0, 31] or [0, 63] range based on register type
90
void lsl(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
91
void lsr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
92
void asr(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
93
void ror(RegisterA64 dst, RegisterA64 src1, uint8_t src2);
94
95
// Bitfields
96
void ubfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
97
void ubfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
98
void sbfiz(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
99
void sbfx(RegisterA64 dst, RegisterA64 src, uint8_t f, uint8_t w);
100
101
// Load
102
// Note: paired loads are currently omitted for simplicity
103
void ldr(RegisterA64 dst, AddressA64 src);
104
void ldrb(RegisterA64 dst, AddressA64 src);
105
void ldrh(RegisterA64 dst, AddressA64 src);
106
void ldrsb(RegisterA64 dst, AddressA64 src);
107
void ldrsh(RegisterA64 dst, AddressA64 src);
108
void ldrsw(RegisterA64 dst, AddressA64 src);
109
void ldp(RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);
110
111
// Store
112
void str(RegisterA64 src, AddressA64 dst);
113
void strb(RegisterA64 src, AddressA64 dst);
114
void strh(RegisterA64 src, AddressA64 dst);
115
void stp(RegisterA64 src1, RegisterA64 src2, AddressA64 dst);
116
117
// Control flow
118
void b(Label& label);
119
void bl(Label& label);
120
void br(RegisterA64 src);
121
void blr(RegisterA64 src);
122
void ret();
123
124
// Conditional control flow
125
void b(ConditionA64 cond, Label& label);
126
void cbz(RegisterA64 src, Label& label);
127
void cbnz(RegisterA64 src, Label& label);
128
void tbz(RegisterA64 src, uint8_t bit, Label& label);
129
void tbnz(RegisterA64 src, uint8_t bit, Label& label);
130
131
// Address of embedded data
132
void adr(RegisterA64 dst, const void* ptr, size_t size);
133
void adr(RegisterA64 dst, uint64_t value);
134
void adr(RegisterA64 dst, float value);
135
void adr(RegisterA64 dst, double value);
136
137
template<typename T>
138
void adr(RegisterA64 dst, T value) = delete; // Prevent implicit conversions from happening
139
140
// Address of code (label)
141
void adr(RegisterA64 dst, Label& label);
142
143
// Floating-point scalar/vector moves
144
// Note: constant must be compatible with immediate floating point moves (see isFmovSupportedFp64/isFmovSupportedFp32)
145
void fmov(RegisterA64 dst, RegisterA64 src);
146
void fmov(RegisterA64 dst, double src);
147
void fmov(RegisterA64 dst, float src);
148
149
template<typename T>
150
void fmov(RegisterA64 dst, T src) = delete; // Prevent implicit conversions from happening
151
152
// Floating-point scalar/vector math
153
void fabs(RegisterA64 dst, RegisterA64 src);
154
void fadd(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
155
void fdiv(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
156
void fmul(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
157
void fneg(RegisterA64 dst, RegisterA64 src);
158
void fsqrt(RegisterA64 dst, RegisterA64 src);
159
void fsub(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
160
void faddp(RegisterA64 dst, RegisterA64 src);
161
void fmla(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
162
163
// Vector component manipulation
164
void ins_4s(RegisterA64 dst, RegisterA64 src, uint8_t index);
165
void ins_4s(RegisterA64 dst, uint8_t dstIndex, RegisterA64 src, uint8_t srcIndex);
166
void dup_4s(RegisterA64 dst, RegisterA64 src, uint8_t index);
167
void umov_4s(RegisterA64 dst, RegisterA64 src, uint8_t index);
168
169
void fcmeq_4s(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
170
void fcmgt_4s(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2);
171
void bit(RegisterA64 dst, RegisterA64 src, RegisterA64 mask);
172
void bif(RegisterA64 dst, RegisterA64 src, RegisterA64 mask);
173
174
// Floating-point rounding and conversions
175
void frinta(RegisterA64 dst, RegisterA64 src);
176
void frintm(RegisterA64 dst, RegisterA64 src);
177
void frintp(RegisterA64 dst, RegisterA64 src);
178
void fcvt(RegisterA64 dst, RegisterA64 src);
179
void fcvtzs(RegisterA64 dst, RegisterA64 src);
180
void fcvtzu(RegisterA64 dst, RegisterA64 src);
181
void scvtf(RegisterA64 dst, RegisterA64 src);
182
void ucvtf(RegisterA64 dst, RegisterA64 src);
183
184
// Floating-point conversion to integer using JS rules (wrap around 2^32) and set Z flag
185
// note: this is part of ARM8.3 (JSCVT feature); support of this instruction needs to be checked at runtime
186
void fjcvtzs(RegisterA64 dst, RegisterA64 src);
187
188
// Floating-point comparisons
189
void fcmp(RegisterA64 src1, RegisterA64 src2);
190
void fcmpz(RegisterA64 src);
191
void fcsel(RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
192
193
void udf();
194
195
// Run final checks
196
bool finalize();
197
198
// Places a label at current location and returns it
199
Label setLabel();
200
201
// Assigns label position to the current location
202
void setLabel(Label& label);
203
204
// Extracts code offset (in bytes) from label
205
uint32_t getLabelOffset(const Label& label)
206
{
207
CODEGEN_ASSERT(label.location != ~0u);
208
return label.location * 4;
209
}
210
211
void logAppend(const char* fmt, ...) LUAU_PRINTF_ATTR(2, 3);
212
213
// Code size is measured in 'code' array units - uint8_t on x64 and uint32_t on arm64
214
uint32_t getCodeSize() const;
215
216
unsigned getInstructionCount() const;
217
218
// Resulting data and code that need to be copied over one after the other
219
// The *end* of 'data' has to be aligned to 16 bytes, this will also align 'code'
220
std::vector<uint8_t> data;
221
std::vector<uint32_t> code;
222
223
std::string text;
224
225
const bool logText = false;
226
const unsigned int features = 0;
227
228
// Maximum immediate argument to functions like add/sub/cmp
229
static constexpr size_t kMaxImmediate = (1 << 12) - 1;
230
231
// Check if immediate mode mask is supported for bitwise operations (and/or/xor)
232
static bool isMaskSupported(uint32_t mask);
233
234
// Check if fmov can be used to synthesize a constant
235
static bool isFmovSupportedFp64(double value);
236
static bool isFmovSupportedFp32(float value);
237
238
private:
239
// Instruction archetypes
240
void place0(const char* name, uint32_t word);
241
void placeSR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift = 0, int N = 0);
242
void placeSR2(const char* name, RegisterA64 dst, RegisterA64 src, uint8_t op, uint8_t op2 = 0);
243
void placeR3(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t op2);
244
void placeR1(const char* name, RegisterA64 dst, RegisterA64 src, uint32_t op);
245
void placeI12(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op);
246
void placeI16(const char* name, RegisterA64 dst, int src, uint8_t op, int shift = 0);
247
void placeA(const char* name, RegisterA64 dst, AddressA64 src, uint16_t opsize, int sizelog);
248
void placeB(const char* name, Label& label, uint8_t op);
249
void placeBC(const char* name, Label& label, uint8_t op, uint8_t cond);
250
void placeBCR(const char* name, Label& label, uint8_t op, RegisterA64 cond);
251
void placeBR(const char* name, RegisterA64 src, uint32_t op);
252
void placeBTR(const char* name, Label& label, uint8_t op, RegisterA64 cond, uint8_t bit);
253
void placeADR(const char* name, RegisterA64 src, uint8_t op);
254
void placeADR(const char* name, RegisterA64 src, uint8_t op, Label& label);
255
void placeP(const char* name, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src, uint8_t op, uint8_t opc, int sizelog);
256
void placeCS(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond, uint8_t op, uint8_t opc, int invert = 0);
257
void placeFCMP(const char* name, RegisterA64 src1, RegisterA64 src2, uint8_t op, uint8_t opc);
258
void placeFMOV(const char* name, RegisterA64 dst, double src, uint32_t op);
259
void placeBM(const char* name, RegisterA64 dst, RegisterA64 src1, uint32_t src2, uint8_t op);
260
void placeBFM(const char* name, RegisterA64 dst, RegisterA64 src1, int src2, uint8_t op, int immr, int imms);
261
void placeER(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint8_t op, int shift);
262
void placeVR(const char* name, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, uint16_t op, uint8_t op2);
263
264
void place(uint32_t word);
265
266
struct Patch
267
{
268
enum Kind
269
{
270
Imm26,
271
Imm19,
272
Imm14,
273
};
274
275
Kind kind : 2;
276
uint32_t label : 30;
277
uint32_t location;
278
};
279
280
void patchLabel(Label& label, Patch::Kind kind);
281
void patchOffset(uint32_t location, int value, Patch::Kind kind);
282
283
void commit();
284
LUAU_NOINLINE void extend();
285
286
// Data
287
size_t allocateData(size_t size, size_t align);
288
289
// Logging of assembly in text form
290
LUAU_NOINLINE void log(const char* opcode);
291
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, int shift = 0);
292
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, int src2);
293
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src);
294
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, int src, int shift = 0);
295
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, double src);
296
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, AddressA64 src);
297
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst1, RegisterA64 dst2, AddressA64 src);
298
LUAU_NOINLINE void log(const char* opcode, RegisterA64 src, Label label, int imm = -1);
299
LUAU_NOINLINE void log(const char* opcode, RegisterA64 src);
300
LUAU_NOINLINE void log(const char* opcode, Label label);
301
LUAU_NOINLINE void log(const char* opcode, RegisterA64 dst, RegisterA64 src1, RegisterA64 src2, ConditionA64 cond);
302
LUAU_NOINLINE void log(Label label);
303
LUAU_NOINLINE void log(RegisterA64 reg);
304
LUAU_NOINLINE void log(AddressA64 addr);
305
306
uint32_t nextLabel = 1;
307
std::vector<Patch> pendingLabels;
308
std::vector<uint32_t> labelLocations;
309
310
bool finalized = false;
311
bool overflowed = false;
312
313
size_t dataPos = 0;
314
315
uint32_t* codePos = nullptr;
316
uint32_t* codeEnd = nullptr;
317
};
318
319
} // namespace A64
320
} // namespace CodeGen
321
} // namespace Luau
322
323