CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Arm64Emitter.h
Views: 1401
1
// Copyright 2015 Dolphin Emulator Project
2
// Licensed under GPLv2+
3
// Refer to the license.txt file included.
4
5
#pragma once
6
7
#include <functional>
8
9
#include "Common/ArmCommon.h"
10
#include "Common/BitSet.h"
11
#include "Common/CodeBlock.h"
12
#include "Common/CommonTypes.h"
13
#include "Common/Log.h"
14
15
#define DYNA_REC JIT
16
17
#ifdef FMAX
18
#undef FMAX
19
#endif
20
#ifdef FMIN
21
#undef FMIN
22
#endif
23
24
namespace Arm64Gen
25
{
26
27
// X30 serves a dual purpose as a link register
28
// Encoded as <u3:type><u5:reg>
29
// Types:
30
// 000 - 32bit GPR
31
// 001 - 64bit GPR
32
// 010 - VFP single precision
33
// 100 - VFP double precision
34
// 110 - VFP quad precision
35
enum ARM64Reg
36
{
37
// 32bit registers
38
W0 = 0, W1, W2, W3, W4, W5, W6,
39
W7, W8, W9, W10, W11, W12, W13, W14,
40
W15, W16, W17, W18, W19, W20, W21, W22,
41
W23, W24, W25, W26, W27, W28, W29, W30,
42
43
WSP, // 32bit stack pointer
44
45
// 64bit registers
46
X0 = 0x20, X1, X2, X3, X4, X5, X6,
47
X7, X8, X9, X10, X11, X12, X13, X14,
48
X15, X16, X17, X18, X19, X20, X21, X22,
49
X23, X24, X25, X26, X27, X28, X29, X30,
50
51
SP, // 64bit stack pointer
52
53
// VFP single precision registers
54
S0 = 0x40, S1, S2, S3, S4, S5, S6,
55
S7, S8, S9, S10, S11, S12, S13,
56
S14, S15, S16, S17, S18, S19, S20,
57
S21, S22, S23, S24, S25, S26, S27,
58
S28, S29, S30, S31,
59
60
// VFP Double Precision registers
61
D0 = 0x80, D1, D2, D3, D4, D5, D6, D7,
62
D8, D9, D10, D11, D12, D13, D14, D15,
63
D16, D17, D18, D19, D20, D21, D22, D23,
64
D24, D25, D26, D27, D28, D29, D30, D31,
65
66
// ASIMD Quad-Word registers
67
Q0 = 0xC0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
68
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
69
Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23,
70
Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31,
71
72
// For PRFM(prefetch memory) encoding
73
// This is encoded in the Rt register
74
// Data preload
75
PLDL1KEEP = 0, PLDL1STRM,
76
PLDL2KEEP, PLDL2STRM,
77
PLDL3KEEP, PLDL3STRM,
78
// Instruction preload
79
PLIL1KEEP = 8, PLIL1STRM,
80
PLIL2KEEP, PLIL2STRM,
81
PLIL3KEEP, PLIL3STRM,
82
// Prepare for store
83
PLTL1KEEP = 16, PLTL1STRM,
84
PLTL2KEEP, PLTL2STRM,
85
PLTL3KEEP, PLTL3STRM,
86
87
WZR = WSP,
88
ZR = SP,
89
FP = X29,
90
LR = X30,
91
92
INVALID_REG = 0xFFFFFFFF
93
};
94
95
// R19-R28. R29 (FP), R30 (LR) are always saved and FP updated appropriately.
96
const u32 ALL_CALLEE_SAVED = 0x1FF80000;
97
const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // q8-q15
98
99
inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }
100
inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }
101
inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }
102
inline bool IsScalar(ARM64Reg reg) { return IsSingle(reg) || IsDouble(reg); }
103
inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }
104
inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }
105
inline bool IsGPR(ARM64Reg reg) { return (int)reg < 0x40; }
106
107
int CountLeadingZeros(uint64_t value, int width);
108
109
inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }
110
inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }
111
inline ARM64Reg EncodeRegToSingle(ARM64Reg reg) { return (ARM64Reg)(DecodeReg(reg) + S0); }
112
inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }
113
inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }
114
115
// For AND/TST/ORR/EOR etc
116
bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r);
117
// For ADD/SUB
118
bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift);
119
120
float FPImm8ToFloat(uint8_t bits);
121
bool FPImm8FromFloat(float value, uint8_t *immOut);
122
123
enum OpType
124
{
125
TYPE_IMM = 0,
126
TYPE_REG,
127
TYPE_IMMSREG,
128
TYPE_RSR,
129
TYPE_MEM
130
};
131
132
enum ShiftType
133
{
134
ST_LSL = 0,
135
ST_LSR = 1,
136
ST_ASR = 2,
137
ST_ROR = 3,
138
};
139
140
enum IndexType
141
{
142
INDEX_UNSIGNED = 0,
143
INDEX_POST = 1,
144
INDEX_PRE = 2,
145
INDEX_SIGNED = 3, // used in LDP/STP
146
};
147
148
enum ShiftAmount
149
{
150
SHIFT_0 = 0,
151
SHIFT_16 = 1,
152
SHIFT_32 = 2,
153
SHIFT_48 = 3,
154
};
155
156
enum RoundingMode {
157
ROUND_A, // round to nearest, ties to away
158
ROUND_M, // round towards -inf
159
ROUND_N, // round to nearest, ties to even
160
ROUND_P, // round towards +inf
161
ROUND_Z, // round towards zero
162
};
163
164
struct FixupBranch
165
{
166
// Pointer to executable code address.
167
const u8 *ptr;
168
// Type defines
169
// 0 = CBZ (32bit)
170
// 1 = CBNZ (32bit)
171
// 2 = B (conditional)
172
// 3 = TBZ
173
// 4 = TBNZ
174
// 5 = B (unconditional)
175
// 6 = BL (unconditional)
176
u32 type;
177
178
// Used with B.cond
179
CCFlags cond;
180
181
// Used with TBZ/TBNZ
182
u8 bit;
183
184
// Used with Test/Compare and Branch
185
ARM64Reg reg;
186
};
187
188
enum PStateField
189
{
190
FIELD_SPSel = 0,
191
FIELD_DAIFSet,
192
FIELD_DAIFClr,
193
FIELD_NZCV, // The only system registers accessible from EL0 (user space)
194
FIELD_FPCR = 0x340,
195
FIELD_FPSR = 0x341,
196
};
197
198
enum SystemHint
199
{
200
HINT_NOP = 0,
201
HINT_YIELD,
202
HINT_WFE,
203
HINT_WFI,
204
HINT_SEV,
205
HINT_SEVL,
206
};
207
208
enum BarrierType
209
{
210
OSHLD = 1,
211
OSHST = 2,
212
OSH = 3,
213
NSHLD = 5,
214
NSHST = 6,
215
NSH = 7,
216
ISHLD = 9,
217
ISHST = 10,
218
ISH = 11,
219
LD = 13,
220
ST = 14,
221
SY = 15,
222
};
223
224
class ArithOption
225
{
226
public:
227
enum WidthSpecifier
228
{
229
WIDTH_DEFAULT,
230
WIDTH_32BIT,
231
WIDTH_64BIT,
232
};
233
234
enum ExtendSpecifier
235
{
236
EXTEND_UXTB = 0x0,
237
EXTEND_UXTH = 0x1,
238
EXTEND_UXTW = 0x2, /* Also LSL on 32bit width */
239
EXTEND_UXTX = 0x3, /* Also LSL on 64bit width */
240
EXTEND_SXTB = 0x4,
241
EXTEND_SXTH = 0x5,
242
EXTEND_SXTW = 0x6,
243
EXTEND_SXTX = 0x7,
244
};
245
246
enum TypeSpecifier
247
{
248
TYPE_EXTENDEDREG,
249
TYPE_IMM,
250
TYPE_SHIFTEDREG,
251
};
252
253
private:
254
ARM64Reg m_destReg;
255
WidthSpecifier m_width;
256
ExtendSpecifier m_extend = EXTEND_UXTB;
257
TypeSpecifier m_type;
258
ShiftType m_shifttype;
259
u32 m_shift;
260
261
public:
262
ArithOption(ARM64Reg Rd, bool index = false)
263
{
264
// Indexed registers are a certain feature of AARch64
265
// On Loadstore instructions that use a register offset
266
// We can have the register as an index
267
// If we are indexing then the offset register will
268
// be shifted to the left so we are indexing at intervals
269
// of the size of what we are loading
270
// 8-bit: Index does nothing
271
// 16-bit: Index LSL 1
272
// 32-bit: Index LSL 2
273
// 64-bit: Index LSL 3
274
if (index)
275
m_shift = 4;
276
else
277
m_shift = 0;
278
279
m_destReg = Rd;
280
m_type = TYPE_EXTENDEDREG;
281
if (Is64Bit(Rd))
282
{
283
m_width = WIDTH_64BIT;
284
m_extend = EXTEND_UXTX;
285
}
286
else
287
{
288
m_width = WIDTH_32BIT;
289
m_extend = EXTEND_UXTW;
290
}
291
m_shifttype = ST_LSL;
292
}
293
ArithOption(ARM64Reg Rd, bool index, bool signExtend) {
294
if (index)
295
m_shift = 4;
296
else
297
m_shift = 0;
298
299
m_destReg = Rd;
300
m_type = TYPE_EXTENDEDREG;
301
if (Is64Bit(Rd)) {
302
m_width = WIDTH_64BIT;
303
m_extend = EXTEND_UXTX;
304
} else {
305
m_width = WIDTH_32BIT;
306
m_extend = signExtend ? EXTEND_SXTW : EXTEND_UXTW;
307
}
308
m_shifttype = ST_LSL;
309
}
310
ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)
311
{
312
m_destReg = Rd;
313
m_shift = shift;
314
m_shifttype = shift_type;
315
m_type = TYPE_SHIFTEDREG;
316
if (Is64Bit(Rd))
317
{
318
m_width = WIDTH_64BIT;
319
if (shift == 64)
320
m_shift = 0;
321
}
322
else
323
{
324
m_width = WIDTH_32BIT;
325
if (shift == 32)
326
m_shift = 0;
327
}
328
}
329
TypeSpecifier GetType() const
330
{
331
return m_type;
332
}
333
ARM64Reg GetReg() const
334
{
335
return m_destReg;
336
}
337
u32 GetData() const
338
{
339
switch (m_type)
340
{
341
case TYPE_EXTENDEDREG:
342
return (m_extend << 13) |
343
(m_shift << 10);
344
break;
345
case TYPE_SHIFTEDREG:
346
return (m_shifttype << 22) |
347
(m_shift << 10);
348
break;
349
default:
350
_dbg_assert_msg_(false, "Invalid type in GetData");
351
break;
352
}
353
return 0;
354
}
355
};
356
357
class ARM64XEmitter
358
{
359
friend class ARM64FloatEmitter;
360
friend class ARM64CodeBlock;
361
362
private:
363
const u8 *m_code = nullptr;
364
u8 *m_writable = nullptr;
365
const u8 *m_lastCacheFlushEnd = nullptr;
366
367
void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr);
368
void EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr);
369
void EncodeUnconditionalBranchInst(u32 op, const void* ptr);
370
void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn);
371
void EncodeExceptionInst(u32 instenc, u32 imm);
372
void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt);
373
void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);
374
void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
375
void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
376
void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
377
void EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
378
void EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn);
379
void EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
380
void EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
381
void EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
382
void EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm);
383
void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt);
384
void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
385
void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
386
void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size);
387
void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos);
388
void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
389
void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
390
void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);
391
void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);
392
void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
393
void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);
394
void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
395
396
protected:
397
inline void Write32(u32 value)
398
{
399
*(u32 *)m_writable = value;
400
m_code += 4;
401
m_writable += 4;
402
}
403
404
public:
405
ARM64XEmitter()
406
{
407
}
408
409
ARM64XEmitter(const u8 *codePtr, u8 *writablePtr);
410
411
virtual ~ARM64XEmitter()
412
{
413
}
414
415
void SetCodePointer(const u8 *ptr, u8 *writePtr);
416
const u8* GetCodePointer() const;
417
418
void ReserveCodeSpace(u32 bytes);
419
const u8* AlignCode16();
420
const u8* AlignCodePage();
421
const u8 *NopAlignCode16();
422
void FlushIcache();
423
void FlushIcacheSection(const u8* start, const u8* end);
424
u8* GetWritableCodePtr();
425
426
// FixupBranch branching
427
void SetJumpTarget(FixupBranch const& branch);
428
FixupBranch CBZ(ARM64Reg Rt);
429
FixupBranch CBNZ(ARM64Reg Rt);
430
FixupBranch B(CCFlags cond);
431
FixupBranch TBZ(ARM64Reg Rt, u8 bit);
432
FixupBranch TBNZ(ARM64Reg Rt, u8 bit);
433
FixupBranch B();
434
FixupBranch BL();
435
436
// Compare and Branch
437
void CBZ(ARM64Reg Rt, const void* ptr);
438
void CBNZ(ARM64Reg Rt, const void* ptr);
439
440
// Conditional Branch
441
void B(CCFlags cond, const void* ptr);
442
443
// Test and Branch
444
void TBZ(ARM64Reg Rt, u8 bits, const void* ptr);
445
void TBNZ(ARM64Reg Rt, u8 bits, const void* ptr);
446
447
// Unconditional Branch
448
void B(const void* ptr);
449
void BL(const void* ptr);
450
451
// Unconditional Branch (register)
452
void BR(ARM64Reg Rn);
453
void BLR(ARM64Reg Rn);
454
void RET(ARM64Reg Rn = X30);
455
void ERET();
456
void DRPS();
457
458
// Exception generation
459
void SVC(u32 imm);
460
void HVC(u32 imm);
461
void SMC(u32 imm);
462
void BRK(u32 imm);
463
void HLT(u32 imm);
464
void DCPS1(u32 imm);
465
void DCPS2(u32 imm);
466
void DCPS3(u32 imm);
467
468
// System
469
void _MSR(PStateField field, u8 imm);
470
471
void _MSR(PStateField field, ARM64Reg Rt);
472
void MRS(ARM64Reg Rt, PStateField field);
473
474
void HINT(SystemHint op);
475
void CLREX();
476
void DSB(BarrierType type);
477
void DMB(BarrierType type);
478
void ISB(BarrierType type);
479
480
// Add/Subtract (Extended/Shifted register)
481
void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
482
void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);
483
void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
484
void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);
485
void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
486
void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);
487
void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
488
void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);
489
void CMN(ARM64Reg Rn, ARM64Reg Rm);
490
void CMN(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);
491
void CMP(ARM64Reg Rn, ARM64Reg Rm);
492
void CMP(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);
493
494
// Add/Subtract (with carry)
495
void ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
496
void ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
497
void SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
498
void SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
499
500
// Conditional Compare (immediate)
501
void CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
502
void CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);
503
504
// Conditional Compare (register)
505
void CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
506
void CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);
507
508
// Conditional Select
509
void CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
510
void CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
511
void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
512
void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
513
514
// Aliases
515
void CSET(ARM64Reg Rd, CCFlags cond) {
516
ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;
517
CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));
518
}
519
void NEG(ARM64Reg Rd, ARM64Reg Rs) {
520
SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs);
521
}
522
523
// Data-Processing 1 source
524
void RBIT(ARM64Reg Rd, ARM64Reg Rn);
525
void REV16(ARM64Reg Rd, ARM64Reg Rn);
526
void REV32(ARM64Reg Rd, ARM64Reg Rn);
527
void REV64(ARM64Reg Rd, ARM64Reg Rn);
528
void CLZ(ARM64Reg Rd, ARM64Reg Rn);
529
void CLS(ARM64Reg Rd, ARM64Reg Rn);
530
531
// Data-Processing 2 source
532
void UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
533
void SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
534
void LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
535
void LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
536
void ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
537
void RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
538
void CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
539
void CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
540
void CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
541
void CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
542
void CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
543
void CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
544
void CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
545
void CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
546
547
// Data-Processing 3 source
548
void MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
549
void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
550
void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
551
void SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
552
void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
553
void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
554
void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
555
void UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
556
void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
557
void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
558
void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
559
void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
560
561
// Logical (shifted register)
562
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
563
void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
564
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
565
void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
566
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
567
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
568
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
569
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
570
void TST(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);
571
572
// Wrap the above for saner syntax
573
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
574
void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BIC(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
575
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
576
void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORN(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
577
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EOR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
578
void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
579
void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
580
void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }
581
void TST(ARM64Reg Rn, ARM64Reg Rm) { TST(Rn, Rm, ArithOption(Is64Bit(Rn) ? ZR : WZR, ST_LSL, 0)); }
582
583
// Convenience wrappers around ORR. These match the official convenience syntax.
584
void MOV(ARM64Reg Rd, ARM64Reg Rm, const ArithOption &Shift);
585
void MOV(ARM64Reg Rd, ARM64Reg Rm);
586
void MVN(ARM64Reg Rd, ARM64Reg Rm);
587
588
// Wrapper around ADD reg, reg, imm.
589
void MOVfromSP(ARM64Reg Rd);
590
void MOVtoSP(ARM64Reg Rn);
591
592
// TODO: These are "slow" as they use arith+shift, should be replaced with UBFM/EXTR variants.
593
void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);
594
void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);
595
void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);
596
void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);
597
598
// Logical (immediate)
599
void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
600
void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
601
void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
602
void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
603
void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);
604
605
// Add/subtract (immediate)
606
void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
607
void ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
608
void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
609
void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);
610
void CMP(ARM64Reg Rn, u32 imm, bool shift = false);
611
void CMN(ARM64Reg Rn, u32 imm, bool shift = false);
612
613
// Data Processing (Immediate)
614
void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
615
void MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
616
void MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);
617
618
// Bitfield move
619
void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
620
void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
621
void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);
622
void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
623
void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);
624
625
// Extract register (ROR with two inputs, if same then faster on A67)
626
void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift);
627
628
// Aliases
629
void SXTB(ARM64Reg Rd, ARM64Reg Rn);
630
void SXTH(ARM64Reg Rd, ARM64Reg Rn);
631
void SXTW(ARM64Reg Rd, ARM64Reg Rn);
632
void UXTB(ARM64Reg Rd, ARM64Reg Rn);
633
void UXTH(ARM64Reg Rd, ARM64Reg Rn);
634
635
void UBFX(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width) {
636
UBFM(Rd, Rn, lsb, lsb + width - 1);
637
}
638
639
// Load Register (Literal)
640
void LDR(ARM64Reg Rt, u32 imm);
641
void LDRSW(ARM64Reg Rt, u32 imm);
642
void PRFM(ARM64Reg Rt, u32 imm);
643
644
// Load/Store Exclusive
645
void STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
646
void STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
647
void LDXRB(ARM64Reg Rt, ARM64Reg Rn);
648
void LDAXRB(ARM64Reg Rt, ARM64Reg Rn);
649
void STLRB(ARM64Reg Rt, ARM64Reg Rn);
650
void LDARB(ARM64Reg Rt, ARM64Reg Rn);
651
void STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
652
void STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
653
void LDXRH(ARM64Reg Rt, ARM64Reg Rn);
654
void LDAXRH(ARM64Reg Rt, ARM64Reg Rn);
655
void STLRH(ARM64Reg Rt, ARM64Reg Rn);
656
void LDARH(ARM64Reg Rt, ARM64Reg Rn);
657
void STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
658
void STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);
659
void STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
660
void STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
661
void LDXR(ARM64Reg Rt, ARM64Reg Rn);
662
void LDAXR(ARM64Reg Rt, ARM64Reg Rn);
663
void LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
664
void LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);
665
void STLR(ARM64Reg Rt, ARM64Reg Rn);
666
void LDAR(ARM64Reg Rt, ARM64Reg Rn);
667
668
// Load/Store no-allocate pair (offset)
669
void STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
670
void LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);
671
672
// Load/Store register (immediate indexed)
673
void STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
674
void LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
675
void LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
676
void STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
677
void LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
678
void LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
679
void STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
680
void LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
681
void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
682
683
// Load/Store register (register offset)
684
void STRB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
685
void LDRB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
686
void LDRSB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
687
void STRH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
688
void LDRH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
689
void LDRSH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
690
void STR(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
691
void LDR(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
692
void LDRSW(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
693
void PRFM(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
694
695
// Load/Store register (unscaled offset)
696
void STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
697
void LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
698
void LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
699
void STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
700
void LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
701
void LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
702
void STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
703
void LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
704
void LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm);
705
706
// Load/Store pair
707
void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
708
void LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
709
void STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
710
711
// Address of label/page PC-relative
712
void ADR(ARM64Reg Rd, s32 imm);
713
void ADRP(ARM64Reg Rd, s32 imm);
714
715
// Wrapper around MOVZ+MOVK
716
void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);
717
template <class P>
718
void MOVP2R(ARM64Reg Rd, P *ptr) {
719
_assert_msg_(Is64Bit(Rd), "Can't store pointers in 32-bit registers");
720
MOVI2R(Rd, (uintptr_t)ptr);
721
}
722
723
// Wrapper around AND x, y, imm etc. If you are sure the imm will work, no need to pass a scratch register.
724
void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
725
void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
726
void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG) { ANDSI2R(Is64Bit(Rn) ? ZR : WZR, Rn, imm, scratch); }
727
void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
728
void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
729
void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
730
731
void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
732
void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
733
void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);
734
735
bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
736
bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
737
bool TryCMPI2R(ARM64Reg Rn, u64 imm);
738
739
bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
740
bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
741
bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);
742
743
// Pseudo-instruction for convenience. PUSH pushes 16 bytes even though we only push a single register.
744
// This is so the stack pointer is always 16-byte aligned, which is checked by hardware!
745
void PUSH(ARM64Reg Rd);
746
void POP(ARM64Reg Rd);
747
void PUSH2(ARM64Reg Rd, ARM64Reg Rn);
748
void POP2(ARM64Reg Rd, ARM64Reg Rn);
749
750
751
// Utility to generate a call to a std::function object.
752
//
753
// Unfortunately, calling operator() directly is undefined behavior in C++
754
// (this method might be a thunk in the case of multi-inheritance) so we
755
// have to go through a trampoline function.
756
template <typename T, typename... Args>
757
static void CallLambdaTrampoline(const std::function<T(Args...)>* f,
758
Args... args)
759
{
760
(*f)(args...);
761
}
762
763
// This function expects you to have set up the state.
764
// Overwrites X0 and X30
765
template <typename T, typename... Args>
766
ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)
767
{
768
auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;
769
MOVI2R(X30, (uintptr_t)trampoline);
770
MOVI2R(X0, (uintptr_t)const_cast<void*>((const void*)f));
771
return X30;
772
}
773
774
// Plain function call
775
void QuickCallFunction(ARM64Reg scratchreg, const void *func);
776
template <typename T> void QuickCallFunction(ARM64Reg scratchreg, T func) {
777
QuickCallFunction(scratchreg, (const void *)func);
778
}
779
};
780
781
class ARM64FloatEmitter
782
{
783
public:
784
ARM64FloatEmitter(ARM64XEmitter* emit) : m_emit(emit) {}
785
786
void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
787
void STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
788
789
// Loadstore unscaled
790
void LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
791
void STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
792
793
// Loadstore single structure
794
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
795
void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
796
void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
797
void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn);
798
void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
799
void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
800
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);
801
void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);
802
803
// Loadstore multiple structure
804
void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
805
void LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
806
void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);
807
void ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);
808
809
// Loadstore paired
810
void LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
811
void STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
812
813
// Loadstore register offset
814
void STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
815
void LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
816
817
// Scalar - 1 Source
818
void FABS(ARM64Reg Rd, ARM64Reg Rn);
819
void FNEG(ARM64Reg Rd, ARM64Reg Rn);
820
void FSQRT(ARM64Reg Rd, ARM64Reg Rn);
821
void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP
822
823
// Scalar - pairwise
824
void FADDP(ARM64Reg Rd, ARM64Reg Rn);
825
void FMAXP(ARM64Reg Rd, ARM64Reg Rn);
826
void FMINP(ARM64Reg Rd, ARM64Reg Rn);
827
void FMAXNMP(ARM64Reg Rd, ARM64Reg Rn);
828
void FMINNMP(ARM64Reg Rd, ARM64Reg Rn);
829
830
// Scalar - 2 Source
831
void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
832
void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
833
void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
834
void FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
835
void FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
836
void FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
837
void FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
838
void FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
839
void FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
840
841
// Scalar - 3 Source. Note - the accumulator is last on ARM!
842
void FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
843
void FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
844
void FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
845
void FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);
846
847
// Scalar floating point immediate
848
void FMOV(ARM64Reg Rd, uint8_t imm8);
849
850
// Vector
851
void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
852
void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
853
void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
854
void BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
855
void BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
856
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
857
void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
858
void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
859
void FADDP(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
860
void FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
861
void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
862
void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
863
void FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
864
void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);
865
void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);
866
void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
867
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);
868
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);
869
void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
870
void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
871
void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
872
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
873
void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);
874
void FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
875
void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
876
void NOT(ARM64Reg Rd, ARM64Reg Rn);
877
void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
878
void MOV(ARM64Reg Rd, ARM64Reg Rn) {
879
ORR(Rd, Rn, Rn);
880
}
881
882
void UMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
883
void UMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
884
void SMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
885
void SMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
886
887
void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);
888
void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);
889
void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);
890
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
891
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);
892
void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
893
void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);
894
void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
895
void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
896
void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
897
void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
898
void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
899
void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);
900
901
void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
902
void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
903
void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
904
void CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
905
void CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
906
void CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
907
void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);
908
void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
909
void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
910
void CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
911
void CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
912
913
// Move
914
void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);
915
void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);
916
void INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2);
917
void UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
918
void SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);
919
920
// Vector immediates
921
void FMOV(u8 size, ARM64Reg Rd, u8 imm8);
922
// MSL means bits shifted in are 1s. For size=64, each bit of imm8 is expanded to 8 actual bits.
923
void MOVI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0, bool MSL = false);
924
void MVNI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0, bool MSL = false);
925
void ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
926
void BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);
927
928
bool TryMOVI(u8 size, ARM64Reg Rd, uint64_t value);
929
// Allow using a different size. Unclear if there's a penalty.
930
bool TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t value);
931
932
// One source
933
void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);
934
935
// Scalar convert float to int, in a lot of variants.
936
// Note that the scalar version of this operation has two encodings, one that goes to an integer register
937
// and one that outputs to a scalar fp register.
938
void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
939
void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);
940
void FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale);
941
void FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale);
942
943
// Scalar convert int to float. No rounding mode specifier necessary.
944
void SCVTF(ARM64Reg Rd, ARM64Reg Rn);
945
void UCVTF(ARM64Reg Rd, ARM64Reg Rn);
946
947
// Scalar fixed point to float. scale is the number of fractional bits.
948
void SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
949
void UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);
950
951
// Float comparison
952
void FCMP(ARM64Reg Rn, ARM64Reg Rm);
953
void FCMP(ARM64Reg Rn);
954
void FCMPE(ARM64Reg Rn, ARM64Reg Rm);
955
void FCMPE(ARM64Reg Rn);
956
void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
957
void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);
958
void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
959
void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
960
void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
961
void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
962
void FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);
963
void FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);
964
965
// Conditional select
966
void FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);
967
968
// Conditional compare
969
void FCCMP(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond);
970
void FCCMPE(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond);
971
972
// Permute
973
void UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
974
void TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
975
void ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
976
void UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
977
void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
978
void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
979
// Related to permute, extract vector from pair (always by byte arrangement.)
980
void EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, int index);
981
982
// Shift by immediate
983
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
984
void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
985
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
986
void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
987
// Shift == src_size for these.
988
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
989
void SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
990
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
991
void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
992
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
993
void SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
994
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
995
void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);
996
997
void SHL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
998
void USHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
999
void SSHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);
1000
1001
// vector x indexed element
1002
void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
1003
void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);
1004
1005
void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
1006
void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);
1007
1008
// ABI related
1009
void ABI_PushRegisters(uint32_t gpr_registers, uint32_t fp_registers);
1010
void ABI_PopRegisters(uint32_t gpr_registers, uint32_t fp_registers);
1011
1012
private:
1013
ARM64XEmitter* m_emit;
1014
inline void Write32(u32 value) { m_emit->Write32(value); }
1015
1016
// Emitting functions
1017
void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
1018
void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
1019
void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
1020
void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);
1021
void EmitScalarPairwise(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
1022
void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
1023
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);
1024
void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
1025
void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
1026
void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
1027
void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn);
1028
void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);
1029
void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
1030
void EmitCondCompare(bool M, bool S, CCFlags cond, int op, u8 nzcv, ARM64Reg Rn, ARM64Reg Rm);
1031
void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
1032
void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);
1033
void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
1034
void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
1035
void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);
1036
void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);
1037
void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);
1038
void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);
1039
void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);
1040
void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);
1041
void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);
1042
void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);
1043
void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);
1044
void EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh);
1045
1046
void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
1047
void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
1048
void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
1049
void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);
1050
void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
1051
void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);
1052
};
1053
1054
class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>
1055
{
1056
private:
1057
void PoisonMemory(int offset) override;
1058
};
1059
}
1060
1061