Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/Arm64Emitter.cpp
5654 views
1
// Copyright 2013 Dolphin Emulator Project
2
// Licensed under GPLv2
3
// Refer to the license.txt file included.
4
5
#include "ppsspp_config.h"
6
7
#include <limits>
8
#include <vector>
9
#include <cmath>
10
#include <cinttypes>
11
12
#include <cstdlib>
13
#include <cstring>
14
15
#include "Common/Arm64Emitter.h"
16
#include "Common/Math/math_util.h"
17
#include "Common/CommonTypes.h"
18
#include "Common/CommonWindows.h"
19
#include "Common/CPUDetect.h"
20
#include "Common/Log.h"
21
22
#if PPSSPP_PLATFORM(IOS) || PPSSPP_PLATFORM(MAC)
23
#include <libkern/OSCacheControl.h>
24
#endif
25
26
namespace Arm64Gen
27
{
28
29
const int kWRegSizeInBits = 32;
30
const int kXRegSizeInBits = 64;
31
32
// The below few functions are taken from V8.
33
int CountLeadingZeros(uint64_t value, int width) {
34
// TODO(jbramley): Optimize this for ARM64 hosts.
35
int count = 0;
36
uint64_t bit_test = 1ULL << (width - 1);
37
while ((count < width) && ((bit_test & value) == 0)) {
38
count++;
39
bit_test >>= 1;
40
}
41
return count;
42
}
43
44
uint64_t LargestPowerOf2Divisor(uint64_t value) {
45
return value & -(int64_t)value;
46
}
47
48
bool IsPowerOfTwo(uint64_t x) {
49
return (x != 0) && ((x & (x - 1)) == 0);
50
}
51
52
#define V8_UINT64_C(x) ((uint64_t)(x))
53
54
bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift) {
55
if (input < 4096) {
56
if (val) *val = (uint32_t)input;
57
if (shift) *shift = false;
58
return true;
59
} else if ((input & 0xFFF000) == input) {
60
if (val) *val = (uint32_t)(input >> 12);
61
if (shift) *shift = true;
62
return true;
63
}
64
return false;
65
}
66
67
bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r) {
68
//DCHECK((n != NULL) && (imm_s != NULL) && (imm_r != NULL));
69
// DCHECK((width == kWRegSizeInBits) || (width == kXRegSizeInBits));
70
71
bool negate = false;
72
73
// Logical immediates are encoded using parameters n, imm_s and imm_r using
74
// the following table:
75
//
76
// N imms immr size S R
77
// 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)
78
// 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)
79
// 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)
80
// 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)
81
// 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)
82
// 0 11110s xxxxxr 2 UInt(s) UInt(r)
83
// (s bits must not be all set)
84
//
85
// A pattern is constructed of size bits, where the least significant S+1 bits
86
// are set. The pattern is rotated right by R, and repeated across a 32 or
87
// 64-bit value, depending on destination register width.
88
//
89
// Put another way: the basic format of a logical immediate is a single
90
// contiguous stretch of 1 bits, repeated across the whole word at intervals
91
// given by a power of 2. To identify them quickly, we first locate the
92
// lowest stretch of 1 bits, then the next 1 bit above that; that combination
93
// is different for every logical immediate, so it gives us all the
94
// information we need to identify the only logical immediate that our input
95
// could be, and then we simply check if that's the value we actually have.
96
//
97
// (The rotation parameter does give the possibility of the stretch of 1 bits
98
// going 'round the end' of the word. To deal with that, we observe that in
99
// any situation where that happens the bitwise NOT of the value is also a
100
// valid logical immediate. So we simply invert the input whenever its low bit
101
// is set, and then we know that the rotated case can't arise.)
102
103
if (value & 1) {
104
// If the low bit is 1, negate the value, and set a flag to remember that we
105
// did (so that we can adjust the return values appropriately).
106
negate = true;
107
value = ~value;
108
}
109
110
if (width == kWRegSizeInBits) {
111
// To handle 32-bit logical immediates, the very easiest thing is to repeat
112
// the input value twice to make a 64-bit word. The correct encoding of that
113
// as a logical immediate will also be the correct encoding of the 32-bit
114
// value.
115
116
// The most-significant 32 bits may not be zero (ie. negate is true) so
117
// shift the value left before duplicating it.
118
value <<= kWRegSizeInBits;
119
value |= value >> kWRegSizeInBits;
120
}
121
122
// The basic analysis idea: imagine our input word looks like this.
123
//
124
// 0011111000111110001111100011111000111110001111100011111000111110
125
// c b a
126
// |<--d-->|
127
//
128
// We find the lowest set bit (as an actual power-of-2 value, not its index)
129
// and call it a. Then we add a to our original number, which wipes out the
130
// bottommost stretch of set bits and replaces it with a 1 carried into the
131
// next zero bit. Then we look for the new lowest set bit, which is in
132
// position b, and subtract it, so now our number is just like the original
133
// but with the lowest stretch of set bits completely gone. Now we find the
134
// lowest set bit again, which is position c in the diagram above. Then we'll
135
// measure the distance d between bit positions a and c (using CLZ), and that
136
// tells us that the only valid logical immediate that could possibly be equal
137
// to this number is the one in which a stretch of bits running from a to just
138
// below b is replicated every d bits.
139
uint64_t a = LargestPowerOf2Divisor(value);
140
uint64_t value_plus_a = value + a;
141
uint64_t b = LargestPowerOf2Divisor(value_plus_a);
142
uint64_t value_plus_a_minus_b = value_plus_a - b;
143
uint64_t c = LargestPowerOf2Divisor(value_plus_a_minus_b);
144
145
int d, clz_a, out_n;
146
uint64_t mask;
147
148
if (c != 0) {
149
// The general case, in which there is more than one stretch of set bits.
150
// Compute the repeat distance d, and set up a bitmask covering the basic
151
// unit of repetition (i.e. a word with the bottom d bits set). Also, in all
152
// of these cases the N bit of the output will be zero.
153
clz_a = CountLeadingZeros(a, kXRegSizeInBits);
154
int clz_c = CountLeadingZeros(c, kXRegSizeInBits);
155
d = clz_a - clz_c;
156
mask = ((V8_UINT64_C(1) << d) - 1);
157
out_n = 0;
158
} else {
159
// Handle degenerate cases.
160
//
161
// If any of those 'find lowest set bit' operations didn't find a set bit at
162
// all, then the word will have been zero thereafter, so in particular the
163
// last lowest_set_bit operation will have returned zero. So we can test for
164
// all the special case conditions in one go by seeing if c is zero.
165
if (a == 0) {
166
// The input was zero (or all 1 bits, which will come to here too after we
167
// inverted it at the start of the function), for which we just return
168
// false.
169
return false;
170
} else {
171
// Otherwise, if c was zero but a was not, then there's just one stretch
172
// of set bits in our word, meaning that we have the trivial case of
173
// d == 64 and only one 'repetition'. Set up all the same variables as in
174
// the general case above, and set the N bit in the output.
175
clz_a = CountLeadingZeros(a, kXRegSizeInBits);
176
d = 64;
177
mask = ~V8_UINT64_C(0);
178
out_n = 1;
179
}
180
}
181
182
// If the repeat period d is not a power of two, it can't be encoded.
183
if (!IsPowerOfTwo(d)) {
184
return false;
185
}
186
187
if (((b - a) & ~mask) != 0) {
188
// If the bit stretch (b - a) does not fit within the mask derived from the
189
// repeat period, then fail.
190
return false;
191
}
192
193
// The only possible option is b - a repeated every d bits. Now we're going to
194
// actually construct the valid logical immediate derived from that
195
// specification, and see if it equals our original input.
196
//
197
// To repeat a value every d bits, we multiply it by a number of the form
198
// (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can
199
// be derived using a table lookup on CLZ(d).
200
static const uint64_t multipliers[] = {
201
0x0000000000000001UL,
202
0x0000000100000001UL,
203
0x0001000100010001UL,
204
0x0101010101010101UL,
205
0x1111111111111111UL,
206
0x5555555555555555UL,
207
};
208
int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;
209
// Ensure that the index to the multipliers array is within bounds.
210
_dbg_assert_((multiplier_idx >= 0) &&
211
(static_cast<size_t>(multiplier_idx) < ARRAY_SIZE(multipliers)));
212
uint64_t multiplier = multipliers[multiplier_idx];
213
uint64_t candidate = (b - a) * multiplier;
214
215
if (value != candidate) {
216
// The candidate pattern doesn't match our input value, so fail.
217
return false;
218
}
219
220
// We have a match! This is a valid logical immediate, so now we have to
221
// construct the bits and pieces of the instruction encoding that generates
222
// it.
223
224
// Count the set bits in our basic stretch. The special case of clz(0) == -1
225
// makes the answer come out right for stretches that reach the very top of
226
// the word (e.g. numbers like 0xffffc00000000000).
227
int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);
228
int s = clz_a - clz_b;
229
230
// Decide how many bits to rotate right by, to put the low bit of that basic
231
// stretch in position a.
232
int r;
233
if (negate) {
234
// If we inverted the input right at the start of this function, here's
235
// where we compensate: the number of set bits becomes the number of clear
236
// bits, and the rotation count is based on position b rather than position
237
// a (since b is the location of the 'lowest' 1 bit after inversion).
238
s = d - s;
239
r = (clz_b + 1) & (d - 1);
240
} else {
241
r = (clz_a + 1) & (d - 1);
242
}
243
244
// Now we're done, except for having to encode the S output in such a way that
245
// it gives both the number of set bits and the length of the repeated
246
// segment. The s field is encoded like this:
247
//
248
// imms size S
249
// ssssss 64 UInt(ssssss)
250
// 0sssss 32 UInt(sssss)
251
// 10ssss 16 UInt(ssss)
252
// 110sss 8 UInt(sss)
253
// 1110ss 4 UInt(ss)
254
// 11110s 2 UInt(s)
255
//
256
// So we 'or' (-d << 1) with our computed s to form imms.
257
*n = out_n;
258
*imm_s = ((-d << 1) | (s - 1)) & 0x3f;
259
*imm_r = r;
260
261
return true;
262
}
263
264
static int EncodeSize(int size) {
265
switch (size) {
266
case 8: return 0;
267
case 16: return 1;
268
case 32: return 2;
269
case 64: return 3;
270
default: return 0;
271
}
272
}
273
274
ARM64XEmitter::ARM64XEmitter(const u8 *ptr, u8 *writePtr) {
275
SetCodePointer(ptr, writePtr);
276
}
277
278
void ARM64XEmitter::SetCodePointer(const u8 *ptr, u8 *writePtr)
279
{
280
m_code = ptr;
281
m_writable = writePtr;
282
m_lastCacheFlushEnd = ptr;
283
}
284
285
const u8* ARM64XEmitter::GetCodePointer() const
286
{
287
return m_code;
288
}
289
290
u8* ARM64XEmitter::GetWritableCodePtr()
291
{
292
return m_writable;
293
}
294
295
void ARM64XEmitter::ReserveCodeSpace(u32 bytes)
296
{
297
for (u32 i = 0; i < bytes/4; i++)
298
BRK(0);
299
}
300
301
const u8* ARM64XEmitter::AlignCode16()
302
{
303
int c = int((u64)m_code & 15);
304
if (c)
305
ReserveCodeSpace(16 - c);
306
return m_code;
307
}
308
309
const u8* ARM64XEmitter::AlignCodePage()
310
{
311
int page_size = GetMemoryProtectPageSize();
312
int c = int((u64)m_code & (page_size - 1));
313
if (c)
314
ReserveCodeSpace(page_size - c);
315
return m_code;
316
}
317
318
const u8 *ARM64XEmitter::NopAlignCode16() {
319
int bytes = ((-(intptr_t)m_code) & 15);
320
for (int i = 0; i < bytes / 4; i++) {
321
Write32(0xD503201F); // official nop instruction
322
}
323
return m_code;
324
}
325
326
void ARM64XEmitter::FlushIcache()
327
{
328
FlushIcacheSection(m_lastCacheFlushEnd, m_code);
329
m_lastCacheFlushEnd = m_code;
330
}
331
332
void ARM64XEmitter::FlushIcacheSection(const u8 *start, const u8 *end)
333
{
334
#if PPSSPP_PLATFORM(IOS) || PPSSPP_PLATFORM(MAC)
335
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
336
sys_cache_control(kCacheFunctionPrepareForExecution, (void *)start, end - start);
337
#elif PPSSPP_PLATFORM(WINDOWS)
338
FlushInstructionCache(GetCurrentProcess(), start, end - start);
339
#elif PPSSPP_ARCH(ARM64)
340
// Code from Dolphin, contributed by the Mono project.
341
342
size_t isize, dsize;
343
if (cpu_info.sQuirks.bExynos8890DifferingCachelineSizes) {
344
// Don't rely on GCC's __clear_cache implementation, as it caches
345
// icache/dcache cache line sizes, that can vary between cores on
346
// very buggy big.LITTLE architectures like Exynos8890D.
347
// Enforce the minimum cache line size to be completely safe on these CPUs.
348
isize = 64;
349
dsize = 64;
350
} else {
351
u64 ctr_el0;
352
static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
353
__asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
354
isize = 4 << ((ctr_el0 >> 0) & 0xf);
355
dsize = 4 << ((ctr_el0 >> 16) & 0xf);
356
357
// use the global minimum cache line size
358
icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
359
dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
360
}
361
362
u64 addr = (u64)start & ~(u64)(dsize - 1);
363
for (; addr < (u64)end; addr += dsize)
364
// use "civac" instead of "cvau", as this is the suggested workaround for
365
// Cortex-A53 errata 819472, 826319, 827319 and 824069.
366
__asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
367
__asm__ volatile("dsb ish" : : : "memory");
368
369
addr = (u64)start & ~(u64)(isize - 1);
370
for (; addr < (u64)end; addr += isize)
371
__asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
372
373
__asm__ volatile("dsb ish" : : : "memory");
374
__asm__ volatile("isb" : : : "memory");
375
#endif
376
}
377
378
// Exception generation
379
static const u32 ExcEnc[][3] = {
380
{0, 0, 1}, // SVC
381
{0, 0, 2}, // HVC
382
{0, 0, 3}, // SMC
383
{1, 0, 0}, // BRK
384
{2, 0, 0}, // HLT
385
{5, 0, 1}, // DCPS1
386
{5, 0, 2}, // DCPS2
387
{5, 0, 3}, // DCPS3
388
};
389
390
// Arithmetic generation
391
static const u32 ArithEnc[] = {
392
0x058, // ADD
393
0x258, // SUB
394
};
395
396
// Conditional Select
397
static const u32 CondSelectEnc[][2] = {
398
{0, 0}, // CSEL
399
{0, 1}, // CSINC
400
{1, 0}, // CSINV
401
{1, 1}, // CSNEG
402
};
403
404
// Data-Processing (1 source)
405
static const u32 Data1SrcEnc[][2] = {
406
{0, 0}, // RBIT
407
{0, 1}, // REV16
408
{0, 2}, // REV32
409
{0, 3}, // REV64
410
{0, 4}, // CLZ
411
{0, 5}, // CLS
412
};
413
414
// Data-Processing (2 source)
415
static const u32 Data2SrcEnc[] = {
416
0x02, // UDIV
417
0x03, // SDIV
418
0x08, // LSLV
419
0x09, // LSRV
420
0x0A, // ASRV
421
0x0B, // RORV
422
0x10, // CRC32B
423
0x11, // CRC32H
424
0x12, // CRC32W
425
0x14, // CRC32CB
426
0x15, // CRC32CH
427
0x16, // CRC32CW
428
0x13, // CRC32X (64bit Only)
429
0x17, // XRC32CX (64bit Only)
430
};
431
432
// Data-Processing (3 source)
433
static const u32 Data3SrcEnc[][2] = {
434
{0, 0}, // MADD
435
{0, 1}, // MSUB
436
{1, 0}, // SMADDL (64Bit Only)
437
{1, 1}, // SMSUBL (64Bit Only)
438
{2, 0}, // SMULH (64Bit Only)
439
{5, 0}, // UMADDL (64Bit Only)
440
{5, 1}, // UMSUBL (64Bit Only)
441
{6, 0}, // UMULH (64Bit Only)
442
};
443
444
// Logical (shifted register)
445
static const u32 LogicalEnc[][2] = {
446
{0, 0}, // AND
447
{0, 1}, // BIC
448
{1, 0}, // OOR
449
{1, 1}, // ORN
450
{2, 0}, // EOR
451
{2, 1}, // EON
452
{3, 0}, // ANDS
453
{3, 1}, // BICS
454
};
455
456
// Load/Store Exclusive
457
static const u32 LoadStoreExcEnc[][5] = {
458
{0, 0, 0, 0, 0}, // STXRB
459
{0, 0, 0, 0, 1}, // STLXRB
460
{0, 0, 1, 0, 0}, // LDXRB
461
{0, 0, 1, 0, 1}, // LDAXRB
462
{0, 1, 0, 0, 1}, // STLRB
463
{0, 1, 1, 0, 1}, // LDARB
464
{1, 0, 0, 0, 0}, // STXRH
465
{1, 0, 0, 0, 1}, // STLXRH
466
{1, 0, 1, 0, 0}, // LDXRH
467
{1, 0, 1, 0, 1}, // LDAXRH
468
{1, 1, 0, 0, 1}, // STLRH
469
{1, 1, 1, 0, 1}, // LDARH
470
{2, 0, 0, 0, 0}, // STXR
471
{3, 0, 0, 0, 0}, // (64bit) STXR
472
{2, 0, 0, 0, 1}, // STLXR
473
{3, 0, 0, 0, 1}, // (64bit) STLXR
474
{2, 0, 0, 1, 0}, // STXP
475
{3, 0, 0, 1, 0}, // (64bit) STXP
476
{2, 0, 0, 1, 1}, // STLXP
477
{3, 0, 0, 1, 1}, // (64bit) STLXP
478
{2, 0, 1, 0, 0}, // LDXR
479
{3, 0, 1, 0, 0}, // (64bit) LDXR
480
{2, 0, 1, 0, 1}, // LDAXR
481
{3, 0, 1, 0, 1}, // (64bit) LDAXR
482
{2, 0, 1, 1, 0}, // LDXP
483
{3, 0, 1, 1, 0}, // (64bit) LDXP
484
{2, 0, 1, 1, 1}, // LDAXP
485
{3, 0, 1, 1, 1}, // (64bit) LDAXP
486
{2, 1, 0, 0, 1}, // STLR
487
{3, 1, 0, 0, 1}, // (64bit) STLR
488
{2, 1, 1, 0, 1}, // LDAR
489
{3, 1, 1, 0, 1}, // (64bit) LDAR
490
};
491
492
void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr)
493
{
494
bool b64Bit = Is64Bit(Rt);
495
s64 distance = (s64)ptr - (s64)m_code;
496
497
_assert_msg_(!(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);
498
499
distance >>= 2;
500
501
_assert_msg_(distance >= -0x40000 && distance <= 0x3FFFF, "%s: Received too large distance: %llx", __FUNCTION__, distance);
502
503
Rt = DecodeReg(Rt);
504
Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \
505
(((u32)distance << 5) & 0xFFFFE0) | Rt);
506
}
507
508
void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr)
509
{
510
bool b64Bit = Is64Bit(Rt);
511
s64 distance = (s64)ptr - (s64)m_code;
512
513
_assert_msg_(!(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);
514
515
distance >>= 2;
516
517
_assert_msg_(distance >= -0x2000 && distance <= 0x1FFF, "%s: Received too large distance: %llx", __FUNCTION__, distance);
518
519
Rt = DecodeReg(Rt);
520
Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \
521
(bits << 19) | (((u32)distance << 5) & 0x7FFE0) | Rt);
522
}
523
524
void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr)
525
{
526
s64 distance = (s64)ptr - s64(m_code);
527
528
_assert_msg_(!(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);
529
530
distance >>= 2;
531
532
_assert_msg_(distance >= -0x2000000LL && distance <= 0x1FFFFFFLL, "%s: Received too large distance: %llx", __FUNCTION__, distance);
533
534
Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF));
535
}
536
537
void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn)
538
{
539
Rn = DecodeReg(Rn);
540
Write32((0x6B << 25) | (opc << 21) | (op2 << 16) | (op3 << 10) | (Rn << 5) | op4);
541
}
542
543
void ARM64XEmitter::EncodeExceptionInst(u32 instenc, u32 imm)
544
{
545
_assert_msg_(!(imm & ~0xFFFF), "%s: Exception instruction too large immediate: %d", __FUNCTION__, imm);
546
547
Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) | ExcEnc[instenc][2]);
548
}
549
550
void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt)
551
{
552
Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) | Rt);
553
}
554
555
void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)
556
{
557
bool b64Bit = Is64Bit(Rd);
558
559
Rd = DecodeReg(Rd);
560
Rn = DecodeReg(Rn);
561
Rm = DecodeReg(Rm);
562
Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) | \
563
(Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? (1 << 21) : 0) | (Rm << 16) | Option.GetData() | (Rn << 5) | Rd);
564
}
565
566
void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
567
{
568
bool b64Bit = Is64Bit(Rd);
569
570
Rd = DecodeReg(Rd);
571
Rm = DecodeReg(Rm);
572
Rn = DecodeReg(Rn);
573
Write32((b64Bit << 31) | (op << 30) | (flags << 29) | \
574
(0xD0 << 21) | (Rm << 16) | (Rn << 5) | Rd);
575
}
576
577
void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)
578
{
579
bool b64Bit = Is64Bit(Rn);
580
581
_assert_msg_(!(imm & ~0x1F), "%s: too large immediate: %d", __FUNCTION__, imm)
582
_assert_msg_(!(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv)
583
584
Rn = DecodeReg(Rn);
585
Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \
586
(imm << 16) | (cond << 12) | (1 << 11) | (Rn << 5) | nzcv);
587
}
588
589
void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)
590
{
591
bool b64Bit = Is64Bit(Rm);
592
593
_assert_msg_(!(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv)
594
595
Rm = DecodeReg(Rm);
596
Rn = DecodeReg(Rn);
597
Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \
598
(Rm << 16) | (cond << 12) | (Rn << 5) | nzcv);
599
}
600
601
void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
602
{
603
bool b64Bit = Is64Bit(Rd);
604
605
Rd = DecodeReg(Rd);
606
Rm = DecodeReg(Rm);
607
Rn = DecodeReg(Rn);
608
Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | \
609
(0xD4 << 21) | (Rm << 16) | (cond << 12) | (CondSelectEnc[instenc][1] << 10) | \
610
(Rn << 5) | Rd);
611
}
612
613
void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn)
614
{
615
bool b64Bit = Is64Bit(Rd);
616
617
Rd = DecodeReg(Rd);
618
Rn = DecodeReg(Rn);
619
Write32((b64Bit << 31) | (0x2D6 << 21) | \
620
(Data1SrcEnc[instenc][0] << 16) | (Data1SrcEnc[instenc][1] << 10) | \
621
(Rn << 5) | Rd);
622
}
623
624
void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
625
{
626
bool b64Bit = Is64Bit(Rd);
627
628
Rd = DecodeReg(Rd);
629
Rm = DecodeReg(Rm);
630
Rn = DecodeReg(Rn);
631
Write32((b64Bit << 31) | (0x0D6 << 21) | \
632
(Rm << 16) | (Data2SrcEnc[instenc] << 10) | \
633
(Rn << 5) | Rd);
634
}
635
636
void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
637
{
638
bool b64Bit = Is64Bit(Rd);
639
640
Rd = DecodeReg(Rd);
641
Rm = DecodeReg(Rm);
642
Rn = DecodeReg(Rn);
643
Ra = DecodeReg(Ra);
644
Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | \
645
(Rm << 16) | (Data3SrcEnc[instenc][1] << 15) | \
646
(Ra << 10) | (Rn << 5) | Rd);
647
}
648
649
void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
650
{
651
bool b64Bit = Is64Bit(Rd);
652
653
Rd = DecodeReg(Rd);
654
Rm = DecodeReg(Rm);
655
Rn = DecodeReg(Rn);
656
Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x5 << 25) | (LogicalEnc[instenc][1] << 21) | \
657
Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd);
658
}
659
660
void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm)
661
{
662
bool b64Bit = Is64Bit(Rt);
663
bool bVec = IsVector(Rt);
664
665
_assert_msg_(!(imm & 0xFFFFF), "%s: offset too large %d", __FUNCTION__, imm);
666
667
Rt = DecodeReg(Rt);
668
if (b64Bit && bitop != 0x2) // LDRSW(0x2) uses 64bit reg, doesn't have 64bit bit set
669
bitop |= 0x1;
670
Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (imm << 5) | Rt);
671
}
672
673
void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc,
674
ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt)
675
{
676
Rs = DecodeReg(Rs);
677
Rt2 = DecodeReg(Rt2);
678
Rn = DecodeReg(Rn);
679
Rt = DecodeReg(Rt);
680
Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | (LoadStoreExcEnc[instenc][1] << 23) | \
681
(LoadStoreExcEnc[instenc][2] << 22) | (LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | \
682
(LoadStoreExcEnc[instenc][4] << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
683
}
684
685
void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)
686
{
687
bool b64Bit = Is64Bit(Rt);
688
bool b128Bit = IsQuad(Rt);
689
bool bVec = IsVector(Rt);
690
691
if (b128Bit)
692
imm >>= 4;
693
else if (b64Bit)
694
imm >>= 3;
695
else
696
imm >>= 2;
697
698
_assert_msg_(!(imm & ~0xF), "%s: offset too large %d", __FUNCTION__, imm);
699
700
u32 opc = 0;
701
if (b128Bit)
702
opc = 2;
703
else if (b64Bit && bVec)
704
opc = 1;
705
else if (b64Bit && !bVec)
706
opc = 2;
707
708
Rt = DecodeReg(Rt);
709
Rt2 = DecodeReg(Rt2);
710
Rn = DecodeReg(Rn);
711
Write32((opc << 30) | (bVec << 26) | (op << 22) | (imm << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
712
}
713
714
void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
715
{
716
bool b64Bit = Is64Bit(Rt);
717
bool bVec = IsVector(Rt);
718
719
u32 offset = imm & 0x1FF;
720
721
_assert_msg_(!(imm < -256 || imm > 255), "%s: offset too large %d", __FUNCTION__, imm);
722
723
Rt = DecodeReg(Rt);
724
Rn = DecodeReg(Rn);
725
Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) | Rt);
726
}
727
728
void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size)
729
{
730
bool b64Bit = Is64Bit(Rt);
731
bool bVec = IsVector(Rt);
732
733
u8 shift = 0;
734
if (size == 64)
735
shift = 3;
736
else if (size == 32)
737
shift = 2;
738
else if (size == 16)
739
shift = 1;
740
741
if (shift) {
742
_assert_msg_(((imm >> shift) << shift) == imm, "%s(INDEX_UNSIGNED): offset must be aligned %d", __FUNCTION__, imm);
743
imm >>= shift;
744
}
745
746
_assert_msg_(imm >= 0, "%s(INDEX_UNSIGNED): offset must be positive %d", __FUNCTION__, imm);
747
_assert_msg_(!(imm & ~0xFFF), "%s(INDEX_UNSIGNED): offset too large %d", __FUNCTION__, imm);
748
749
Rt = DecodeReg(Rt);
750
Rn = DecodeReg(Rn);
751
Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 10) | (Rn << 5) | Rt);
752
}
753
754
void ARM64XEmitter::EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos)
755
{
756
bool b64Bit = Is64Bit(Rd);
757
758
_assert_msg_(!(imm & ~0xFFFF), "%s: immediate out of range: %d", __FUNCTION__, imm);
759
760
Rd = DecodeReg(Rd);
761
Write32((b64Bit << 31) | (op << 29) | (0x25 << 23) | (pos << 21) | (imm << 5) | Rd);
762
}
763
764
void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
765
{
766
bool b64Bit = Is64Bit(Rd);
767
768
Rd = DecodeReg(Rd);
769
Rn = DecodeReg(Rn);
770
Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | \
771
(immr << 16) | (imms << 10) | (Rn << 5) | Rd);
772
}
773
774
void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
775
{
776
Rt = DecodeReg(Rt);
777
Rn = DecodeReg(Rn);
778
ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
779
780
Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | \
781
Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
782
}
783
784
void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd)
785
{
786
bool b64Bit = Is64Bit(Rd);
787
788
_assert_msg_(!(imm & ~0xFFF), "%s: immediate too large: %x", __FUNCTION__, imm);
789
790
Rd = DecodeReg(Rd);
791
Rn = DecodeReg(Rn);
792
Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | \
793
(imm << 10) | (Rn << 5) | Rd);
794
}
795
796
void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n)
797
{
798
// Sometimes Rd is fixed to SP, but can still be 32bit or 64bit.
799
// Use Rn to determine bitness here.
800
bool b64Bit = Is64Bit(Rn);
801
802
Rd = DecodeReg(Rd);
803
Rn = DecodeReg(Rn);
804
805
Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | \
806
(immr << 16) | (imms << 10) | (Rn << 5) | Rd);
807
}
808
809
void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
810
{
811
bool b64Bit = Is64Bit(Rt);
812
u32 type_encode = 0;
813
814
switch (type) {
815
case INDEX_SIGNED:
816
type_encode = 2;
817
break;
818
case INDEX_POST:
819
type_encode = 1;
820
break;
821
case INDEX_PRE:
822
type_encode = 3;
823
break;
824
case INDEX_UNSIGNED:
825
_assert_msg_(false, "%s doesn't support INDEX_UNSIGNED!", __FUNCTION__);
826
break;
827
}
828
829
if (b64Bit) {
830
op |= 2;
831
imm >>= 3;
832
}
833
else
834
{
835
imm >>= 2;
836
}
837
838
_assert_msg_(imm >= -64 && imm <= 63, "%s recieved too large imm: %d", __FUNCTION__, imm);
839
840
Rt = DecodeReg(Rt);
841
Rt2 = DecodeReg(Rt2);
842
Rn = DecodeReg(Rn);
843
844
Write32((op << 30) | (5 << 27) | (type_encode << 23) | (load << 22) | \
845
(((uint32_t)imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
846
}
847
void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm)
848
{
849
Rd = DecodeReg(Rd);
850
851
Write32((op << 31) | ((imm & 0x3) << 29) | (0x10 << 24) | \
852
((imm & 0x1FFFFC) << 3) | Rd);
853
}
854
855
void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
856
{
857
_assert_msg_(!(imm < -256 || imm > 255), "%s received too large offset: %d", __FUNCTION__, imm);
858
Rt = DecodeReg(Rt);
859
Rn = DecodeReg(Rn);
860
861
Write32((size << 30) | (7 << 27) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);
862
}
863
864
static inline bool IsInRangeImm19(s64 distance) {
865
return (distance >= -0x40000 && distance <= 0x3FFFF);
866
}
867
868
static inline bool IsInRangeImm14(s64 distance) {
869
return (distance >= -0x2000 && distance <= 0x1FFF);
870
}
871
872
static inline bool IsInRangeImm26(s64 distance) {
873
return (distance >= -0x2000000 && distance <= 0x1FFFFFF);
874
}
875
876
static inline u32 MaskImm19(s64 distance) {
877
return distance & 0x7FFFF;
878
}
879
880
static inline u32 MaskImm14(s64 distance) {
881
return distance & 0x3FFF;
882
}
883
884
static inline u32 MaskImm26(s64 distance) {
885
return distance & 0x3FFFFFF;
886
}
887
888
// FixupBranch branching
889
void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)
890
{
891
bool Not = false;
892
u32 inst = 0;
893
s64 distance = (s64)(m_code - branch.ptr);
894
distance >>= 2;
895
896
switch (branch.type)
897
{
898
case 1: // CBNZ
899
{
900
Not = true;
901
[[fallthrough]];
902
}
903
case 0: // CBZ
904
{
905
_assert_msg_(IsInRangeImm19(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
906
bool b64Bit = Is64Bit(branch.reg);
907
ARM64Reg reg = DecodeReg(branch.reg);
908
inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | (MaskImm19(distance) << 5) | reg;
909
}
910
break;
911
case 2: // B (conditional)
912
_assert_msg_(IsInRangeImm19(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
913
inst = (0x2A << 25) | (MaskImm19(distance) << 5) | branch.cond;
914
break;
915
case 4: // TBNZ
916
{
917
Not = true;
918
[[fallthrough]];
919
}
920
case 3: // TBZ
921
{
922
_assert_msg_(IsInRangeImm14(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
923
ARM64Reg reg = DecodeReg(branch.reg);
924
inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg;
925
}
926
break;
927
case 5: // B (unconditional)
928
_assert_msg_(IsInRangeImm26(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
929
inst = (0x5 << 26) | MaskImm26(distance);
930
break;
931
case 6: // BL (unconditional)
932
_assert_msg_(IsInRangeImm26(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);
933
inst = (0x25 << 26) | MaskImm26(distance);
934
break;
935
}
936
937
ptrdiff_t writable = m_writable - m_code;
938
*(u32 *)(branch.ptr + writable) = inst;
939
}
940
941
FixupBranch ARM64XEmitter::CBZ(ARM64Reg Rt)
942
{
943
FixupBranch branch{};
944
branch.ptr = m_code;
945
branch.type = 0;
946
branch.reg = Rt;
947
HINT(HINT_NOP);
948
return branch;
949
}
950
FixupBranch ARM64XEmitter::CBNZ(ARM64Reg Rt)
951
{
952
FixupBranch branch{};
953
branch.ptr = m_code;
954
branch.type = 1;
955
branch.reg = Rt;
956
HINT(HINT_NOP);
957
return branch;
958
}
959
FixupBranch ARM64XEmitter::B(CCFlags cond)
960
{
961
FixupBranch branch{};
962
branch.ptr = m_code;
963
branch.type = 2;
964
branch.cond = cond;
965
HINT(HINT_NOP);
966
return branch;
967
}
968
FixupBranch ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bit)
969
{
970
FixupBranch branch{};
971
branch.ptr = m_code;
972
branch.type = 3;
973
branch.reg = Rt;
974
branch.bit = bit;
975
HINT(HINT_NOP);
976
return branch;
977
}
978
FixupBranch ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bit)
979
{
980
FixupBranch branch{};
981
branch.ptr = m_code;
982
branch.type = 4;
983
branch.reg = Rt;
984
branch.bit = bit;
985
HINT(HINT_NOP);
986
return branch;
987
}
988
FixupBranch ARM64XEmitter::B()
989
{
990
FixupBranch branch{};
991
branch.ptr = m_code;
992
branch.type = 5;
993
HINT(HINT_NOP);
994
return branch;
995
}
996
FixupBranch ARM64XEmitter::BL()
997
{
998
FixupBranch branch{};
999
branch.ptr = m_code;
1000
branch.type = 6;
1001
HINT(HINT_NOP);
1002
return branch;
1003
}
1004
1005
// Compare and Branch
1006
void ARM64XEmitter::CBZ(ARM64Reg Rt, const void* ptr)
1007
{
1008
EncodeCompareBranchInst(0, Rt, ptr);
1009
}
1010
void ARM64XEmitter::CBNZ(ARM64Reg Rt, const void* ptr)
1011
{
1012
EncodeCompareBranchInst(1, Rt, ptr);
1013
}
1014
1015
// Conditional Branch
1016
void ARM64XEmitter::B(CCFlags cond, const void* ptr)
1017
{
1018
s64 distance = (s64)ptr - (s64)m_code;
1019
1020
distance >>= 2;
1021
1022
_assert_msg_(IsInRangeImm19(distance), "%s: Received too large distance: %p->%p %lld %llx", __FUNCTION__, m_code, ptr, distance, distance);
1023
Write32((0x54 << 24) | (MaskImm19(distance) << 5) | cond);
1024
}
1025
1026
// Test and Branch
1027
void ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bits, const void* ptr)
1028
{
1029
EncodeTestBranchInst(0, Rt, bits, ptr);
1030
}
1031
void ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bits, const void* ptr)
1032
{
1033
EncodeTestBranchInst(1, Rt, bits, ptr);
1034
}
1035
1036
// Unconditional Branch
1037
void ARM64XEmitter::B(const void* ptr)
1038
{
1039
EncodeUnconditionalBranchInst(0, ptr);
1040
}
1041
void ARM64XEmitter::BL(const void* ptr)
1042
{
1043
EncodeUnconditionalBranchInst(1, ptr);
1044
}
1045
1046
void ARM64XEmitter::QuickCallFunction(ARM64Reg scratchreg, const void *func) {
1047
s64 distance = (s64)func - (s64)m_code;
1048
distance >>= 2; // Can only branch to opcode-aligned (4) addresses
1049
if (!IsInRangeImm26(distance)) {
1050
// WARN_LOG(Log::JIT, "Distance too far in function call (%p to %p)! Using scratch.", m_code, func);
1051
MOVI2R(scratchreg, (uintptr_t)func);
1052
BLR(scratchreg);
1053
} else {
1054
BL(func);
1055
}
1056
}
1057
1058
// Unconditional Branch (register)
1059
void ARM64XEmitter::BR(ARM64Reg Rn)
1060
{
1061
EncodeUnconditionalBranchInst(0, 0x1F, 0, 0, Rn);
1062
}
1063
void ARM64XEmitter::BLR(ARM64Reg Rn)
1064
{
1065
EncodeUnconditionalBranchInst(1, 0x1F, 0, 0, Rn);
1066
}
1067
void ARM64XEmitter::RET(ARM64Reg Rn)
1068
{
1069
EncodeUnconditionalBranchInst(2, 0x1F, 0, 0, Rn);
1070
}
1071
void ARM64XEmitter::ERET()
1072
{
1073
EncodeUnconditionalBranchInst(4, 0x1F, 0, 0, SP);
1074
}
1075
void ARM64XEmitter::DRPS()
1076
{
1077
EncodeUnconditionalBranchInst(5, 0x1F, 0, 0, SP);
1078
}
1079
1080
// Exception generation
1081
void ARM64XEmitter::SVC(u32 imm)
1082
{
1083
EncodeExceptionInst(0, imm);
1084
}
1085
1086
void ARM64XEmitter::HVC(u32 imm)
1087
{
1088
EncodeExceptionInst(1, imm);
1089
}
1090
1091
void ARM64XEmitter::SMC(u32 imm)
1092
{
1093
EncodeExceptionInst(2, imm);
1094
}
1095
1096
void ARM64XEmitter::BRK(u32 imm)
1097
{
1098
EncodeExceptionInst(3, imm);
1099
}
1100
1101
void ARM64XEmitter::HLT(u32 imm)
1102
{
1103
EncodeExceptionInst(4, imm);
1104
}
1105
1106
void ARM64XEmitter::DCPS1(u32 imm)
1107
{
1108
EncodeExceptionInst(5, imm);
1109
}
1110
1111
void ARM64XEmitter::DCPS2(u32 imm)
1112
{
1113
EncodeExceptionInst(6, imm);
1114
}
1115
1116
void ARM64XEmitter::DCPS3(u32 imm)
1117
{
1118
EncodeExceptionInst(7, imm);
1119
}
1120
1121
// System
1122
void ARM64XEmitter::_MSR(PStateField field, u8 imm)
1123
{
1124
u32 op1 = 0, op2 = 0;
1125
switch (field)
1126
{
1127
case FIELD_SPSel: op1 = 0; op2 = 5; break;
1128
case FIELD_DAIFSet: op1 = 3; op2 = 6; break;
1129
case FIELD_DAIFClr: op1 = 3; op2 = 7; break;
1130
default:
1131
_assert_msg_(false, "Invalid PStateField to do a imm move to");
1132
break;
1133
}
1134
EncodeSystemInst(0, op1, 4, imm, op2, WSP);
1135
}
1136
1137
static void GetSystemReg(PStateField field, int &o0, int &op1, int &CRn, int &CRm, int &op2) {
1138
switch (field) {
1139
case FIELD_NZCV:
1140
o0 = 3; op1 = 3; CRn = 4; CRm = 2; op2 = 0;
1141
break;
1142
case FIELD_FPCR:
1143
o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 0;
1144
break;
1145
case FIELD_FPSR:
1146
o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 1;
1147
break;
1148
default:
1149
_assert_msg_(false, "Invalid PStateField to do a register move from/to");
1150
break;
1151
}
1152
}
1153
1154
void ARM64XEmitter::_MSR(PStateField field, ARM64Reg Rt) {
1155
int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;
1156
_assert_msg_(Is64Bit(Rt), "MSR: Rt must be 64-bit");
1157
GetSystemReg(field, o0, op1, CRn, CRm, op2);
1158
EncodeSystemInst(o0, op1, CRn, CRm, op2, DecodeReg(Rt));
1159
}
1160
1161
void ARM64XEmitter::MRS(ARM64Reg Rt, PStateField field) {
1162
int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;
1163
_assert_msg_(Is64Bit(Rt), "MRS: Rt must be 64-bit");
1164
GetSystemReg(field, o0, op1, CRn, CRm, op2);
1165
EncodeSystemInst(o0 | 4, op1, CRn, CRm, op2, DecodeReg(Rt));
1166
}
1167
1168
void ARM64XEmitter::HINT(SystemHint op)
1169
{
1170
EncodeSystemInst(0, 3, 2, 0, op, WSP);
1171
}
1172
void ARM64XEmitter::CLREX()
1173
{
1174
EncodeSystemInst(0, 3, 3, 0, 2, WSP);
1175
}
1176
void ARM64XEmitter::DSB(BarrierType type)
1177
{
1178
EncodeSystemInst(0, 3, 3, type, 4, WSP);
1179
}
1180
void ARM64XEmitter::DMB(BarrierType type)
1181
{
1182
EncodeSystemInst(0, 3, 3, type, 5, WSP);
1183
}
1184
void ARM64XEmitter::ISB(BarrierType type)
1185
{
1186
EncodeSystemInst(0, 3, 3, type, 6, WSP);
1187
}
1188
1189
// Add/Subtract (extended register)
1190
void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1191
{
1192
ADD(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1193
}
1194
1195
void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)
1196
{
1197
EncodeArithmeticInst(0, false, Rd, Rn, Rm, Option);
1198
}
1199
1200
void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1201
{
1202
EncodeArithmeticInst(0, true, Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1203
}
1204
1205
void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)
1206
{
1207
EncodeArithmeticInst(0, true, Rd, Rn, Rm, Option);
1208
}
1209
1210
void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1211
{
1212
SUB(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1213
}
1214
1215
void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)
1216
{
1217
EncodeArithmeticInst(1, false, Rd, Rn, Rm, Option);
1218
}
1219
1220
void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1221
{
1222
EncodeArithmeticInst(1, true, Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));
1223
}
1224
1225
void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)
1226
{
1227
EncodeArithmeticInst(1, true, Rd, Rn, Rm, Option);
1228
}
1229
1230
void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm)
1231
{
1232
CMN(Rn, Rm, ArithOption(Rn, ST_LSL, 0));
1233
}
1234
1235
void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)
1236
{
1237
EncodeArithmeticInst(0, true, Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Option);
1238
}
1239
1240
void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm)
1241
{
1242
CMP(Rn, Rm, ArithOption(Rn, ST_LSL, 0));
1243
}
1244
1245
void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)
1246
{
1247
EncodeArithmeticInst(1, true, Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Option);
1248
}
1249
1250
// Add/Subtract (with carry)
1251
void ARM64XEmitter::ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1252
{
1253
EncodeArithmeticCarryInst(0, false, Rd, Rn, Rm);
1254
}
1255
void ARM64XEmitter::ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1256
{
1257
EncodeArithmeticCarryInst(0, true, Rd, Rn, Rm);
1258
}
1259
void ARM64XEmitter::SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1260
{
1261
EncodeArithmeticCarryInst(1, false, Rd, Rn, Rm);
1262
}
1263
void ARM64XEmitter::SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1264
{
1265
EncodeArithmeticCarryInst(1, true, Rd, Rn, Rm);
1266
}
1267
1268
// Conditional Compare (immediate)
1269
void ARM64XEmitter::CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)
1270
{
1271
EncodeCondCompareImmInst(0, Rn, imm, nzcv, cond);
1272
}
1273
void ARM64XEmitter::CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)
1274
{
1275
EncodeCondCompareImmInst(1, Rn, imm, nzcv, cond);
1276
}
1277
1278
// Conditiona Compare (register)
1279
void ARM64XEmitter::CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)
1280
{
1281
EncodeCondCompareRegInst(0, Rn, Rm, nzcv, cond);
1282
}
1283
void ARM64XEmitter::CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)
1284
{
1285
EncodeCondCompareRegInst(1, Rn, Rm, nzcv, cond);
1286
}
1287
1288
// Conditional Select
1289
void ARM64XEmitter::CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
1290
{
1291
EncodeCondSelectInst(0, Rd, Rn, Rm, cond);
1292
}
1293
void ARM64XEmitter::CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
1294
{
1295
EncodeCondSelectInst(1, Rd, Rn, Rm, cond);
1296
}
1297
void ARM64XEmitter::CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
1298
{
1299
EncodeCondSelectInst(2, Rd, Rn, Rm, cond);
1300
}
1301
void ARM64XEmitter::CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
1302
{
1303
EncodeCondSelectInst(3, Rd, Rn, Rm, cond);
1304
}
1305
1306
// Data-Processing 1 source
1307
void ARM64XEmitter::RBIT(ARM64Reg Rd, ARM64Reg Rn)
1308
{
1309
EncodeData1SrcInst(0, Rd, Rn);
1310
}
1311
void ARM64XEmitter::REV16(ARM64Reg Rd, ARM64Reg Rn)
1312
{
1313
EncodeData1SrcInst(1, Rd, Rn);
1314
}
1315
void ARM64XEmitter::REV32(ARM64Reg Rd, ARM64Reg Rn)
1316
{
1317
EncodeData1SrcInst(2, Rd, Rn);
1318
}
1319
void ARM64XEmitter::REV64(ARM64Reg Rd, ARM64Reg Rn)
1320
{
1321
EncodeData1SrcInst(3, Rd, Rn);
1322
}
1323
void ARM64XEmitter::CLZ(ARM64Reg Rd, ARM64Reg Rn)
1324
{
1325
EncodeData1SrcInst(4, Rd, Rn);
1326
}
1327
void ARM64XEmitter::CLS(ARM64Reg Rd, ARM64Reg Rn)
1328
{
1329
EncodeData1SrcInst(5, Rd, Rn);
1330
}
1331
1332
// Data-Processing 2 source
1333
void ARM64XEmitter::UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1334
{
1335
EncodeData2SrcInst(0, Rd, Rn, Rm);
1336
}
1337
void ARM64XEmitter::SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1338
{
1339
EncodeData2SrcInst(1, Rd, Rn, Rm);
1340
}
1341
void ARM64XEmitter::LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1342
{
1343
EncodeData2SrcInst(2, Rd, Rn, Rm);
1344
}
1345
void ARM64XEmitter::LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1346
{
1347
EncodeData2SrcInst(3, Rd, Rn, Rm);
1348
}
1349
void ARM64XEmitter::ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1350
{
1351
EncodeData2SrcInst(4, Rd, Rn, Rm);
1352
}
1353
void ARM64XEmitter::RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1354
{
1355
EncodeData2SrcInst(5, Rd, Rn, Rm);
1356
}
1357
void ARM64XEmitter::CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1358
{
1359
EncodeData2SrcInst(6, Rd, Rn, Rm);
1360
}
1361
void ARM64XEmitter::CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1362
{
1363
EncodeData2SrcInst(7, Rd, Rn, Rm);
1364
}
1365
void ARM64XEmitter::CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1366
{
1367
EncodeData2SrcInst(8, Rd, Rn, Rm);
1368
}
1369
void ARM64XEmitter::CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1370
{
1371
EncodeData2SrcInst(9, Rd, Rn, Rm);
1372
}
1373
void ARM64XEmitter::CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1374
{
1375
EncodeData2SrcInst(10, Rd, Rn, Rm);
1376
}
1377
void ARM64XEmitter::CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1378
{
1379
EncodeData2SrcInst(11, Rd, Rn, Rm);
1380
}
1381
void ARM64XEmitter::CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1382
{
1383
EncodeData2SrcInst(12, Rd, Rn, Rm);
1384
}
1385
void ARM64XEmitter::CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1386
{
1387
EncodeData2SrcInst(13, Rd, Rn, Rm);
1388
}
1389
1390
// Data-Processing 3 source
1391
void ARM64XEmitter::MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1392
{
1393
EncodeData3SrcInst(0, Rd, Rn, Rm, Ra);
1394
}
1395
void ARM64XEmitter::MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1396
{
1397
EncodeData3SrcInst(1, Rd, Rn, Rm, Ra);
1398
}
1399
void ARM64XEmitter::SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1400
{
1401
EncodeData3SrcInst(2, Rd, Rn, Rm, Ra);
1402
}
1403
void ARM64XEmitter::SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1404
{
1405
SMADDL(Rd, Rn, Rm, SP);
1406
}
1407
void ARM64XEmitter::SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1408
{
1409
EncodeData3SrcInst(3, Rd, Rn, Rm, Ra);
1410
}
1411
void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1412
{
1413
EncodeData3SrcInst(4, Rd, Rn, Rm, SP);
1414
}
1415
void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1416
{
1417
EncodeData3SrcInst(5, Rd, Rn, Rm, Ra);
1418
}
1419
void ARM64XEmitter::UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1420
{
1421
UMADDL(Rd, Rn, Rm, SP);
1422
}
1423
void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)
1424
{
1425
EncodeData3SrcInst(6, Rd, Rn, Rm, Ra);
1426
}
1427
void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1428
{
1429
EncodeData3SrcInst(7, Rd, Rn, Rm, SP);
1430
}
1431
void ARM64XEmitter::MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1432
{
1433
EncodeData3SrcInst(0, Rd, Rn, Rm, SP);
1434
}
1435
void ARM64XEmitter::MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
1436
{
1437
EncodeData3SrcInst(1, Rd, Rn, Rm, SP);
1438
}
1439
1440
// Logical (shifted register)
1441
void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
1442
{
1443
EncodeLogicalInst(0, Rd, Rn, Rm, Shift);
1444
}
1445
void ARM64XEmitter::BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
1446
{
1447
EncodeLogicalInst(1, Rd, Rn, Rm, Shift);
1448
}
1449
void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
1450
{
1451
EncodeLogicalInst(2, Rd, Rn, Rm, Shift);
1452
}
1453
void ARM64XEmitter::ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
1454
{
1455
EncodeLogicalInst(3, Rd, Rn, Rm, Shift);
1456
}
1457
void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
1458
{
1459
EncodeLogicalInst(4, Rd, Rn, Rm, Shift);
1460
}
1461
void ARM64XEmitter::EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
1462
{
1463
EncodeLogicalInst(5, Rd, Rn, Rm, Shift);
1464
}
1465
void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
1466
{
1467
EncodeLogicalInst(6, Rd, Rn, Rm, Shift);
1468
}
1469
void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
1470
{
1471
EncodeLogicalInst(7, Rd, Rn, Rm, Shift);
1472
}
1473
void ARM64XEmitter::TST(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)
1474
{
1475
ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Shift);
1476
}
1477
1478
void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm, const ArithOption &Shift) {
1479
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, Shift);
1480
}
1481
1482
void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm)
1483
{
1484
if (IsGPR(Rd) && IsGPR(Rm)) {
1485
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, 0));
1486
} else {
1487
_assert_msg_(false, "Non-GPRs not supported in MOV");
1488
}
1489
}
1490
1491
void ARM64XEmitter::MOVfromSP(ARM64Reg Rd) {
1492
ADD(Rd, ARM64Reg::SP, 0, false);
1493
}
1494
1495
void ARM64XEmitter::MOVtoSP(ARM64Reg Rn) {
1496
ADD(ARM64Reg::SP, Rn, 0, false);
1497
}
1498
1499
void ARM64XEmitter::MVN(ARM64Reg Rd, ARM64Reg Rm)
1500
{
1501
ORN(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, 0));
1502
}
1503
void ARM64XEmitter::LSL(ARM64Reg Rd, ARM64Reg Rm, int shift)
1504
{
1505
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, shift));
1506
}
1507
void ARM64XEmitter::LSR(ARM64Reg Rd, ARM64Reg Rm, int shift)
1508
{
1509
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSR, shift));
1510
}
1511
void ARM64XEmitter::ASR(ARM64Reg Rd, ARM64Reg Rm, int shift)
1512
{
1513
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ASR, shift));
1514
}
1515
void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift)
1516
{
1517
ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ROR, shift));
1518
}
1519
1520
// Logical (immediate)
1521
void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1522
{
1523
EncodeLogicalImmInst(0, Rd, Rn, immr, imms, invert);
1524
}
1525
void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1526
{
1527
EncodeLogicalImmInst(3, Rd, Rn, immr, imms, invert);
1528
}
1529
void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1530
{
1531
EncodeLogicalImmInst(2, Rd, Rn, immr, imms, invert);
1532
}
1533
void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1534
{
1535
EncodeLogicalImmInst(1, Rd, Rn, immr, imms, invert);
1536
}
1537
void ARM64XEmitter::TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert)
1538
{
1539
EncodeLogicalImmInst(3, Is64Bit(Rn) ? ZR : WZR, Rn, immr, imms, invert);
1540
}
1541
1542
// Add/subtract (immediate)
1543
void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
1544
{
1545
EncodeAddSubImmInst(0, false, shift, imm, Rn, Rd);
1546
}
1547
void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
1548
{
1549
EncodeAddSubImmInst(0, true, shift, imm, Rn, Rd);
1550
}
1551
void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
1552
{
1553
EncodeAddSubImmInst(1, false, shift, imm, Rn, Rd);
1554
}
1555
void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)
1556
{
1557
EncodeAddSubImmInst(1, true, shift, imm, Rn, Rd);
1558
}
1559
void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift)
1560
{
1561
EncodeAddSubImmInst(1, true, shift, imm, Rn, Is64Bit(Rn) ? SP : WSP);
1562
}
1563
void ARM64XEmitter::CMN(ARM64Reg Rn, u32 imm, bool shift)
1564
{
1565
EncodeAddSubImmInst(0, true, shift, imm, Rn, Is64Bit(Rn) ? SP : WSP);
1566
}
1567
1568
// Data Processing (Immediate)
1569
void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos)
1570
{
1571
EncodeMOVWideInst(2, Rd, imm, pos);
1572
}
1573
void ARM64XEmitter::MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos)
1574
{
1575
EncodeMOVWideInst(0, Rd, imm, pos);
1576
}
1577
void ARM64XEmitter::MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos)
1578
{
1579
EncodeMOVWideInst(3, Rd, imm, pos);
1580
}
1581
1582
// Bitfield move
1583
void ARM64XEmitter::BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
1584
{
1585
EncodeBitfieldMOVInst(1, Rd, Rn, immr, imms);
1586
}
1587
void ARM64XEmitter::SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
1588
{
1589
EncodeBitfieldMOVInst(0, Rd, Rn, immr, imms);
1590
}
1591
void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)
1592
{
1593
EncodeBitfieldMOVInst(2, Rd, Rn, immr, imms);
1594
}
1595
1596
void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
1597
{
1598
u32 size = Is64Bit(Rn) ? 64 : 32;
1599
_assert_msg_((lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!",
1600
__FUNCTION__, lsb, width);
1601
EncodeBitfieldMOVInst(1, Rd, Rn, (size - lsb) % size, width - 1);
1602
}
1603
void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)
1604
{
1605
u32 size = Is64Bit(Rn) ? 64 : 32;
1606
_assert_msg_((lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!",
1607
__FUNCTION__, lsb, width);
1608
EncodeBitfieldMOVInst(2, Rd, Rn, (size - lsb) % size, width - 1);
1609
}
1610
void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift) {
1611
bool sf = Is64Bit(Rd);
1612
bool N = sf;
1613
Rd = DecodeReg(Rd);
1614
Rn = DecodeReg(Rn);
1615
Rm = DecodeReg(Rm);
1616
Write32((sf << 31) | (0x27 << 23) | (N << 22) | (Rm << 16) | (shift << 10) | (Rm << 5) | Rd);
1617
}
1618
void ARM64XEmitter::SXTB(ARM64Reg Rd, ARM64Reg Rn)
1619
{
1620
SBFM(Rd, Rn, 0, 7);
1621
}
1622
void ARM64XEmitter::SXTH(ARM64Reg Rd, ARM64Reg Rn)
1623
{
1624
SBFM(Rd, Rn, 0, 15);
1625
}
1626
void ARM64XEmitter::SXTW(ARM64Reg Rd, ARM64Reg Rn)
1627
{
1628
_assert_msg_(Is64Bit(Rd), "%s requires 64bit register as destination", __FUNCTION__);
1629
SBFM(Rd, Rn, 0, 31);
1630
}
1631
void ARM64XEmitter::UXTB(ARM64Reg Rd, ARM64Reg Rn)
1632
{
1633
UBFM(Rd, Rn, 0, 7);
1634
}
1635
void ARM64XEmitter::UXTH(ARM64Reg Rd, ARM64Reg Rn)
1636
{
1637
UBFM(Rd, Rn, 0, 15);
1638
}
1639
1640
// Load Register (Literal)
1641
void ARM64XEmitter::LDR(ARM64Reg Rt, u32 imm)
1642
{
1643
EncodeLoadRegisterInst(0, Rt, imm);
1644
}
1645
void ARM64XEmitter::LDRSW(ARM64Reg Rt, u32 imm)
1646
{
1647
EncodeLoadRegisterInst(2, Rt, imm);
1648
}
1649
void ARM64XEmitter::PRFM(ARM64Reg Rt, u32 imm)
1650
{
1651
EncodeLoadRegisterInst(3, Rt, imm);
1652
}
1653
1654
// Load/Store pair
1655
void ARM64XEmitter::LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
1656
{
1657
EncodeLoadStorePair(0, 1, type, Rt, Rt2, Rn, imm);
1658
}
1659
void ARM64XEmitter::LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
1660
{
1661
EncodeLoadStorePair(1, 1, type, Rt, Rt2, Rn, imm);
1662
}
1663
void ARM64XEmitter::STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
1664
{
1665
EncodeLoadStorePair(0, 0, type, Rt, Rt2, Rn, imm);
1666
}
1667
1668
// Load/Store Exclusive
1669
void ARM64XEmitter::STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1670
{
1671
EncodeLoadStoreExcInst(0, Rs, SP, Rt, Rn);
1672
}
1673
void ARM64XEmitter::STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1674
{
1675
EncodeLoadStoreExcInst(1, Rs, SP, Rt, Rn);
1676
}
1677
void ARM64XEmitter::LDXRB(ARM64Reg Rt, ARM64Reg Rn)
1678
{
1679
EncodeLoadStoreExcInst(2, SP, SP, Rt, Rn);
1680
}
1681
void ARM64XEmitter::LDAXRB(ARM64Reg Rt, ARM64Reg Rn)
1682
{
1683
EncodeLoadStoreExcInst(3, SP, SP, Rt, Rn);
1684
}
1685
void ARM64XEmitter::STLRB(ARM64Reg Rt, ARM64Reg Rn)
1686
{
1687
EncodeLoadStoreExcInst(4, SP, SP, Rt, Rn);
1688
}
1689
void ARM64XEmitter::LDARB(ARM64Reg Rt, ARM64Reg Rn)
1690
{
1691
EncodeLoadStoreExcInst(5, SP, SP, Rt, Rn);
1692
}
1693
void ARM64XEmitter::STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1694
{
1695
EncodeLoadStoreExcInst(6, Rs, SP, Rt, Rn);
1696
}
1697
void ARM64XEmitter::STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1698
{
1699
EncodeLoadStoreExcInst(7, Rs, SP, Rt, Rn);
1700
}
1701
void ARM64XEmitter::LDXRH(ARM64Reg Rt, ARM64Reg Rn)
1702
{
1703
EncodeLoadStoreExcInst(8, SP, SP, Rt, Rn);
1704
}
1705
void ARM64XEmitter::LDAXRH(ARM64Reg Rt, ARM64Reg Rn)
1706
{
1707
EncodeLoadStoreExcInst(9, SP, SP, Rt, Rn);
1708
}
1709
void ARM64XEmitter::STLRH(ARM64Reg Rt, ARM64Reg Rn)
1710
{
1711
EncodeLoadStoreExcInst(10, SP, SP, Rt, Rn);
1712
}
1713
void ARM64XEmitter::LDARH(ARM64Reg Rt, ARM64Reg Rn)
1714
{
1715
EncodeLoadStoreExcInst(11, SP, SP, Rt, Rn);
1716
}
1717
void ARM64XEmitter::STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1718
{
1719
EncodeLoadStoreExcInst(12 + Is64Bit(Rt), Rs, SP, Rt, Rn);
1720
}
1721
void ARM64XEmitter::STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)
1722
{
1723
EncodeLoadStoreExcInst(14 + Is64Bit(Rt), Rs, SP, Rt, Rn);
1724
}
1725
void ARM64XEmitter::STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
1726
{
1727
EncodeLoadStoreExcInst(16 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);
1728
}
1729
void ARM64XEmitter::STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
1730
{
1731
EncodeLoadStoreExcInst(18 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);
1732
}
1733
void ARM64XEmitter::LDXR(ARM64Reg Rt, ARM64Reg Rn)
1734
{
1735
EncodeLoadStoreExcInst(20 + Is64Bit(Rt), SP, SP, Rt, Rn);
1736
}
1737
void ARM64XEmitter::LDAXR(ARM64Reg Rt, ARM64Reg Rn)
1738
{
1739
EncodeLoadStoreExcInst(22 + Is64Bit(Rt), SP, SP, Rt, Rn);
1740
}
1741
void ARM64XEmitter::LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
1742
{
1743
EncodeLoadStoreExcInst(24 + Is64Bit(Rt), SP, Rt2, Rt, Rn);
1744
}
1745
void ARM64XEmitter::LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)
1746
{
1747
EncodeLoadStoreExcInst(26 + Is64Bit(Rt), SP, Rt2, Rt, Rn);
1748
}
1749
void ARM64XEmitter::STLR(ARM64Reg Rt, ARM64Reg Rn)
1750
{
1751
EncodeLoadStoreExcInst(28 + Is64Bit(Rt), SP, SP, Rt, Rn);
1752
}
1753
void ARM64XEmitter::LDAR(ARM64Reg Rt, ARM64Reg Rn)
1754
{
1755
EncodeLoadStoreExcInst(30 + Is64Bit(Rt), SP, SP, Rt, Rn);
1756
}
1757
1758
// Load/Store no-allocate pair (offset)
1759
void ARM64XEmitter::STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)
1760
{
1761
EncodeLoadStorePairedInst(0xA0, Rt, Rt2, Rn, imm);
1762
}
1763
void ARM64XEmitter::LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)
1764
{
1765
EncodeLoadStorePairedInst(0xA1, Rt, Rt2, Rn, imm);
1766
}
1767
1768
// Load/Store register (immediate post-indexed)
1769
// XXX: Most of these support vectors
1770
void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1771
{
1772
if (type == INDEX_UNSIGNED)
1773
EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm, 8);
1774
else
1775
EncodeLoadStoreIndexedInst(0x0E0,
1776
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1777
}
1778
void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1779
{
1780
if (type == INDEX_UNSIGNED)
1781
EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm, 8);
1782
else
1783
EncodeLoadStoreIndexedInst(0x0E1,
1784
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1785
}
1786
void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1787
{
1788
if (type == INDEX_UNSIGNED)
1789
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm, 8);
1790
else
1791
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E2 : 0x0E3,
1792
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1793
}
1794
void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1795
{
1796
if (type == INDEX_UNSIGNED)
1797
EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm, 16);
1798
else
1799
EncodeLoadStoreIndexedInst(0x1E0,
1800
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1801
}
1802
void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1803
{
1804
if (type == INDEX_UNSIGNED)
1805
EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm, 16);
1806
else
1807
EncodeLoadStoreIndexedInst(0x1E1,
1808
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1809
}
1810
void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1811
{
1812
if (type == INDEX_UNSIGNED)
1813
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm, 16);
1814
else
1815
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E2 : 0x1E3,
1816
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1817
}
1818
void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1819
{
1820
if (type == INDEX_UNSIGNED)
1821
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);
1822
else
1823
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E0 : 0x2E0,
1824
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1825
}
1826
void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1827
{
1828
if (type == INDEX_UNSIGNED)
1829
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);
1830
else
1831
EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E1 : 0x2E1,
1832
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1833
}
1834
void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1835
{
1836
if (type == INDEX_UNSIGNED)
1837
EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm, 32);
1838
else
1839
EncodeLoadStoreIndexedInst(0x2E2,
1840
type == INDEX_POST ? 1 : 3, Rt, Rn, imm);
1841
}
1842
1843
// Load/Store register (register offset)
1844
void ARM64XEmitter::STRB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1845
{
1846
EncodeLoadStoreRegisterOffset(0, 0, Rt, Rn, Rm);
1847
}
1848
void ARM64XEmitter::LDRB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1849
{
1850
EncodeLoadStoreRegisterOffset(0, 1, Rt, Rn, Rm);
1851
}
1852
void ARM64XEmitter::LDRSB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1853
{
1854
bool b64Bit = Is64Bit(Rt);
1855
EncodeLoadStoreRegisterOffset(0, 3 - b64Bit, Rt, Rn, Rm);
1856
}
1857
void ARM64XEmitter::STRH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1858
{
1859
EncodeLoadStoreRegisterOffset(1, 0, Rt, Rn, Rm);
1860
}
1861
void ARM64XEmitter::LDRH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1862
{
1863
EncodeLoadStoreRegisterOffset(1, 1, Rt, Rn, Rm);
1864
}
1865
void ARM64XEmitter::LDRSH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1866
{
1867
bool b64Bit = Is64Bit(Rt);
1868
EncodeLoadStoreRegisterOffset(1, 3 - b64Bit, Rt, Rn, Rm);
1869
}
1870
void ARM64XEmitter::STR(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1871
{
1872
bool b64Bit = Is64Bit(Rt);
1873
EncodeLoadStoreRegisterOffset(2 + b64Bit, 0, Rt, Rn, Rm);
1874
}
1875
void ARM64XEmitter::LDR(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1876
{
1877
bool b64Bit = Is64Bit(Rt);
1878
EncodeLoadStoreRegisterOffset(2 + b64Bit, 1, Rt, Rn, Rm);
1879
}
1880
void ARM64XEmitter::LDRSW(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1881
{
1882
EncodeLoadStoreRegisterOffset(2, 2, Rt, Rn, Rm);
1883
}
1884
void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
1885
{
1886
EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm);
1887
}
1888
1889
// Load/Store register (unscaled offset)
1890
void ARM64XEmitter::STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1891
{
1892
EncodeLoadStoreUnscaled(0, 0, Rt, Rn, imm);
1893
}
1894
void ARM64XEmitter::LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1895
{
1896
EncodeLoadStoreUnscaled(0, 1, Rt, Rn, imm);
1897
}
1898
void ARM64XEmitter::LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1899
{
1900
EncodeLoadStoreUnscaled(0, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);
1901
}
1902
void ARM64XEmitter::STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1903
{
1904
EncodeLoadStoreUnscaled(1, 0, Rt, Rn, imm);
1905
}
1906
void ARM64XEmitter::LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1907
{
1908
EncodeLoadStoreUnscaled(1, 1, Rt, Rn, imm);
1909
}
1910
void ARM64XEmitter::LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1911
{
1912
EncodeLoadStoreUnscaled(1, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);
1913
}
1914
void ARM64XEmitter::STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1915
{
1916
EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 0, Rt, Rn, imm);
1917
}
1918
void ARM64XEmitter::LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1919
{
1920
EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 1, Rt, Rn, imm);
1921
}
1922
void ARM64XEmitter::LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm)
1923
{
1924
_assert_msg_(!Is64Bit(Rt), "%s must have a 64bit destination register!", __FUNCTION__);
1925
EncodeLoadStoreUnscaled(2, 2, Rt, Rn, imm);
1926
}
1927
1928
// Address of label/page PC-relative
1929
void ARM64XEmitter::ADR(ARM64Reg Rd, s32 imm)
1930
{
1931
EncodeAddressInst(0, Rd, imm);
1932
}
1933
void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)
1934
{
1935
EncodeAddressInst(1, Rd, imm >> 12);
1936
}
1937
1938
// LLVM is unhappy about the regular abs function, so here we go.
1939
inline int64_t abs64(int64_t x) {
1940
return x >= 0 ? x : -x;
1941
}
1942
1943
static int Count(const bool part[4]) {
1944
int cnt = 0;
1945
for (int i = 0; i < 4; i++) {
1946
if (part[i])
1947
cnt++;
1948
}
1949
return cnt;
1950
}
1951
1952
// Wrapper around MOVZ+MOVK (and later MOVN)
1953
void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)
1954
{
1955
unsigned int parts = Is64Bit(Rd) ? 4 : 2;
1956
bool upload_part[4];
1957
1958
// Always start with a movz! Kills the dependency on the register.
1959
bool use_movz = true;
1960
1961
if (!imm) {
1962
// Zero immediate, just clear the register. EOR is pointless when we have MOVZ, which looks clearer in disasm too.
1963
MOVZ(Rd, 0, SHIFT_0);
1964
return;
1965
}
1966
1967
if ((Is64Bit(Rd) && imm == std::numeric_limits<u64>::max()) ||
1968
(!Is64Bit(Rd) && imm == std::numeric_limits<u32>::max()))
1969
{
1970
// Max unsigned value (or if signed, -1)
1971
// Set to ~ZR
1972
ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP;
1973
ORN(Rd, ZR, ZR, ArithOption(ZR, ST_LSL, 0));
1974
return;
1975
}
1976
1977
// TODO: Make some more systemic use of MOVN, but this will take care of most cases.
1978
// Small negative integer. Use MOVN
1979
if (!Is64Bit(Rd) && (imm | 0xFFFF0000) == imm) {
1980
MOVN(Rd, (u32)(~imm), SHIFT_0);
1981
return;
1982
}
1983
1984
1985
// XXX: Use MOVN when possible.
1986
// XXX: Optimize more
1987
// XXX: Support rotating immediates to save instructions
1988
if (optimize)
1989
{
1990
for (unsigned int i = 0; i < parts; ++i)
1991
{
1992
if ((imm >> (i * 16)) & 0xFFFF)
1993
upload_part[i] = 1;
1994
}
1995
}
1996
1997
u64 aligned_pc = (u64)GetCodePointer() & ~0xFFF;
1998
s64 aligned_offset = (s64)imm - (s64)aligned_pc;
1999
if (Count(upload_part) > 1 && abs64(aligned_offset) < 0x7FFFFFFFLL)
2000
{
2001
// Immediate we are loading is within 4GB of our aligned range
2002
// Most likely a address that we can load in one or two instructions
2003
if (!(abs64(aligned_offset) & 0xFFF))
2004
{
2005
// Aligned ADR
2006
ADRP(Rd, (s32)aligned_offset);
2007
return;
2008
}
2009
else
2010
{
2011
// If the address is within 1MB of PC we can load it in a single instruction still
2012
s64 offset = (s64)imm - (s64)GetCodePointer();
2013
if (offset >= -0xFFFFF && offset <= 0xFFFFF)
2014
{
2015
ADR(Rd, (s32)offset);
2016
return;
2017
}
2018
else
2019
{
2020
ADRP(Rd, (s32)(aligned_offset & ~0xFFF));
2021
ADD(Rd, Rd, imm & 0xFFF);
2022
return;
2023
}
2024
}
2025
}
2026
2027
for (unsigned i = 0; i < parts; ++i)
2028
{
2029
if (use_movz && upload_part[i])
2030
{
2031
MOVZ(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);
2032
use_movz = false;
2033
}
2034
else
2035
{
2036
if (upload_part[i] || !optimize)
2037
MOVK(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);
2038
}
2039
}
2040
}
2041
2042
void ARM64XEmitter::PUSH(ARM64Reg Rd) {
2043
STR(INDEX_PRE, Rd, SP, -16);
2044
}
2045
2046
void ARM64XEmitter::POP(ARM64Reg Rd) {
2047
LDR(INDEX_POST, Rd, SP, 16);
2048
}
2049
2050
void ARM64XEmitter::PUSH2(ARM64Reg Rd, ARM64Reg Rn) {
2051
STP(INDEX_PRE, Rd, Rn, SP, -16);
2052
}
2053
2054
void ARM64XEmitter::POP2(ARM64Reg Rd, ARM64Reg Rn) {
2055
LDP(INDEX_POST, Rd, Rn, SP, 16);
2056
}
2057
2058
// Float Emitter
2059
void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2060
{
2061
Rt = DecodeReg(Rt);
2062
Rn = DecodeReg(Rn);
2063
u32 encoded_size = 0;
2064
u32 encoded_imm = 0;
2065
2066
if (size == 8)
2067
encoded_size = 0;
2068
else if (size == 16)
2069
encoded_size = 1;
2070
else if (size == 32)
2071
encoded_size = 2;
2072
else if (size == 64)
2073
encoded_size = 3;
2074
else if (size == 128)
2075
encoded_size = 0;
2076
2077
if (type == INDEX_UNSIGNED)
2078
{
2079
_assert_msg_(!(imm & ((size - 1) >> 3)), "%s(INDEX_UNSIGNED) immediate offset must be aligned to size! (%d) (%p)", __FUNCTION__, imm, m_emit->GetCodePointer());
2080
_assert_msg_(imm >= 0, "%s(INDEX_UNSIGNED) immediate offset must be positive!", __FUNCTION__);
2081
if (size == 16)
2082
imm >>= 1;
2083
else if (size == 32)
2084
imm >>= 2;
2085
else if (size == 64)
2086
imm >>= 3;
2087
else if (size == 128)
2088
imm >>= 4;
2089
encoded_imm = (imm & 0xFFF);
2090
}
2091
else
2092
{
2093
_assert_msg_(!(imm < -256 || imm > 255), "%s immediate offset must be within range of -256 to 255!", __FUNCTION__);
2094
encoded_imm = (imm & 0x1FF) << 2;
2095
if (type == INDEX_POST)
2096
encoded_imm |= 1;
2097
else
2098
encoded_imm |= 3;
2099
}
2100
2101
Write32((encoded_size << 30) | (0xF << 26) | (type == INDEX_UNSIGNED ? (1 << 24) : 0) | \
2102
(size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt);
2103
}
2104
2105
void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2106
{
2107
_assert_msg_(!IsQuad(Rd), "%s only supports double and single registers!", __FUNCTION__);
2108
Rd = DecodeReg(Rd);
2109
Rn = DecodeReg(Rn);
2110
Rm = DecodeReg(Rm);
2111
2112
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (Rm << 16) | \
2113
(opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
2114
}
2115
2116
void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2117
{
2118
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
2119
bool quad = IsQuad(Rd);
2120
Rd = DecodeReg(Rd);
2121
Rn = DecodeReg(Rn);
2122
Rm = DecodeReg(Rm);
2123
2124
Write32((quad << 30) | (U << 29) | (0x71 << 21) | (size << 22) | \
2125
(Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
2126
}
2127
2128
void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn)
2129
{
2130
Rd = DecodeReg(Rd);
2131
Rn = DecodeReg(Rn);
2132
2133
Write32((Q << 30) | (op << 29) | (0x7 << 25) | (imm5 << 16) | (imm4 << 11) | \
2134
(1 << 10) | (Rn << 5) | Rd);
2135
}
2136
2137
void ARM64FloatEmitter::EmitScalarPairwise(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
2138
Rd = DecodeReg(Rd);
2139
Rn = DecodeReg(Rn);
2140
2141
Write32((1 << 30) | (U << 29) | (0b111100011 << 20) | (size << 22) | (opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
2142
}
2143
2144
void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2145
{
2146
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
2147
Rd = DecodeReg(Rd);
2148
Rn = DecodeReg(Rn);
2149
2150
Write32((Q << 30) | (U << 29) | (0x71 << 21) | (size << 22) | \
2151
(opcode << 12) | (1 << 11) | (Rn << 5) | Rd);
2152
}
2153
2154
void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn)
2155
{
2156
_assert_msg_(!IsSingle(Rt), "%s doesn't support singles!", __FUNCTION__);
2157
bool quad = IsQuad(Rt);
2158
Rt = DecodeReg(Rt);
2159
Rn = DecodeReg(Rn);
2160
2161
Write32((quad << 30) | (0xD << 24) | (L << 22) | (R << 21) | (opcode << 13) | \
2162
(S << 12) | (size << 10) | (Rn << 5) | Rt);
2163
}
2164
2165
void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2166
{
2167
_assert_msg_(!IsSingle(Rt), "%s doesn't support singles!", __FUNCTION__);
2168
bool quad = IsQuad(Rt);
2169
Rt = DecodeReg(Rt);
2170
Rn = DecodeReg(Rn);
2171
Rm = DecodeReg(Rm);
2172
2173
Write32((quad << 30) | (0x1B << 23) | (L << 22) | (R << 21) | (Rm << 16) | \
2174
(opcode << 13) | (S << 12) | (size << 10) | (Rn << 5) | Rt);
2175
}
2176
2177
void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2178
{
2179
_assert_msg_(!IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
2180
Rd = DecodeReg(Rd);
2181
Rn = DecodeReg(Rn);
2182
2183
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | \
2184
(1 << 14) | (Rn << 5) | Rd);
2185
}
2186
2187
void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2188
{
2189
_assert_msg_(Rn <= SP, "%s only supports GPR as source!", __FUNCTION__);
2190
Rd = DecodeReg(Rd);
2191
Rn = DecodeReg(Rn);
2192
2193
Write32((sf << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (rmode << 19) | \
2194
(opcode << 16) | (Rn << 5) | Rd);
2195
}
2196
2197
void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign)
2198
{
2199
_dbg_assert_msg_(IsScalar(Rn), "fcvts: Rn must be floating point");
2200
if (IsGPR(Rd)) {
2201
// Use the encoding that transfers the result to a GPR.
2202
bool sf = Is64Bit(Rd);
2203
int type = IsDouble(Rn) ? 1 : 0;
2204
Rd = DecodeReg(Rd);
2205
Rn = DecodeReg(Rn);
2206
int opcode = (sign ? 1 : 0);
2207
int rmode = 0;
2208
switch (round) {
2209
case ROUND_A: rmode = 0; opcode |= 4; break;
2210
case ROUND_P: rmode = 1; break;
2211
case ROUND_M: rmode = 2; break;
2212
case ROUND_Z: rmode = 3; break;
2213
case ROUND_N: rmode = 0; break;
2214
}
2215
EmitConversion2(sf, 0, true, type, rmode, opcode, 0, Rd, Rn);
2216
}
2217
else
2218
{
2219
// Use the encoding (vector, single) that keeps the result in the fp register.
2220
int sz = IsDouble(Rn);
2221
Rd = DecodeReg(Rd);
2222
Rn = DecodeReg(Rn);
2223
int opcode = 0;
2224
switch (round) {
2225
case ROUND_A: opcode = 0x1C; break;
2226
case ROUND_N: opcode = 0x1A; break;
2227
case ROUND_M: opcode = 0x1B; break;
2228
case ROUND_P: opcode = 0x1A; sz |= 2; break;
2229
case ROUND_Z: opcode = 0x1B; sz |= 2; break;
2230
}
2231
Write32((0x5E << 24) | (sign << 29) | (sz << 22) | (1 << 21) | (opcode << 12) | (2 << 10) | (Rn << 5) | Rd);
2232
}
2233
}
2234
2235
void ARM64FloatEmitter::FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
2236
EmitConvertScalarToInt(Rd, Rn, round, false);
2237
}
2238
2239
void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {
2240
EmitConvertScalarToInt(Rd, Rn, round, true);
2241
}
2242
2243
void ARM64FloatEmitter::FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale) {
2244
if (IsScalar(Rd)) {
2245
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
2246
Rd = DecodeReg(Rd);
2247
Rn = DecodeReg(Rn);
2248
2249
Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);
2250
} else {
2251
bool sf = Is64Bit(Rd);
2252
u32 type = 0;
2253
if (IsDouble(Rd))
2254
type = 1;
2255
int rmode = 3;
2256
int opcode = 0;
2257
2258
Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
2259
2260
}
2261
}
2262
2263
void ARM64FloatEmitter::FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale) {
2264
if (IsScalar(Rd)) {
2265
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
2266
Rd = DecodeReg(Rd);
2267
Rn = DecodeReg(Rn);
2268
2269
Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);
2270
} else {
2271
bool sf = Is64Bit(Rd);
2272
u32 type = 0;
2273
if (IsDouble(Rd))
2274
type = 1;
2275
int rmode = 3;
2276
int opcode = 1;
2277
2278
Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
2279
}
2280
}
2281
2282
void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn)
2283
{
2284
Rd = DecodeReg(Rd);
2285
Rn = DecodeReg(Rn);
2286
2287
Write32((sf << 31) | (S << 29) | (0xF0 << 21) | (direction << 21) | (type << 22) | (rmode << 19) | \
2288
(opcode << 16) | (scale << 10) | (Rn << 5) | Rd);
2289
}
2290
2291
void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm)
2292
{
2293
_assert_msg_(!IsQuad(Rn), "%s doesn't support vector!", __FUNCTION__);
2294
bool is_double = IsDouble(Rn);
2295
2296
Rn = DecodeReg(Rn);
2297
Rm = DecodeReg(Rm);
2298
2299
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | \
2300
(op << 14) | (1 << 13) | (Rn << 5) | opcode2);
2301
}
2302
2303
void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2304
{
2305
_assert_msg_(!IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
2306
bool is_double = IsDouble(Rd);
2307
2308
Rd = DecodeReg(Rd);
2309
Rn = DecodeReg(Rn);
2310
Rm = DecodeReg(Rm);
2311
2312
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | \
2313
(cond << 12) | (3 << 10) | (Rn << 5) | Rd);
2314
}
2315
2316
void ARM64FloatEmitter::EmitCondCompare(bool M, bool S, CCFlags cond, int op, u8 nzcv, ARM64Reg Rn, ARM64Reg Rm) {
2317
_assert_msg_(!IsQuad(Rn), "%s doesn't support vector!", __FUNCTION__);
2318
bool is_double = IsDouble(Rn);
2319
2320
Rn = DecodeReg(Rn);
2321
Rm = DecodeReg(Rm);
2322
2323
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | \
2324
(cond << 12) | (1 << 10) | (Rn << 5) | (op << 4) | nzcv);
2325
}
2326
2327
void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2328
{
2329
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
2330
2331
bool quad = IsQuad(Rd);
2332
2333
u32 encoded_size = 0;
2334
if (size == 16)
2335
encoded_size = 1;
2336
else if (size == 32)
2337
encoded_size = 2;
2338
else if (size == 64)
2339
encoded_size = 3;
2340
2341
Rd = DecodeReg(Rd);
2342
Rn = DecodeReg(Rn);
2343
Rm = DecodeReg(Rm);
2344
2345
Write32((quad << 30) | (7 << 25) | (encoded_size << 22) | (Rm << 16) | (op << 12) | \
2346
(1 << 11) | (Rn << 5) | Rd);
2347
}
2348
2349
void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8)
2350
{
2351
_assert_msg_(!IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
2352
2353
bool is_double = !IsSingle(Rd);
2354
2355
Rd = DecodeReg(Rd);
2356
2357
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (type << 22) | \
2358
(imm8 << 13) | (1 << 12) | (imm5 << 5) | Rd);
2359
}
2360
2361
void ARM64FloatEmitter::EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2362
{
2363
_assert_msg_(immh, "%s bad encoding! Can't have zero immh", __FUNCTION__);
2364
2365
Rd = DecodeReg(Rd);
2366
Rn = DecodeReg(Rn);
2367
2368
Write32((Q << 30) | (U << 29) | (0xF << 24) | (immh << 19) | (immb << 16) | \
2369
(opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
2370
}
2371
2372
void ARM64FloatEmitter::EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {
2373
Rd = DecodeReg(Rd);
2374
Rn = DecodeReg(Rn);
2375
2376
Write32((2 << 30) | (U << 29) | (0x3E << 23) | (immh << 19) | (immb << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);
2377
}
2378
2379
void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn)
2380
{
2381
bool quad = IsQuad(Rt);
2382
u32 encoded_size = 0;
2383
2384
if (size == 16)
2385
encoded_size = 1;
2386
else if (size == 32)
2387
encoded_size = 2;
2388
else if (size == 64)
2389
encoded_size = 3;
2390
2391
Rt = DecodeReg(Rt);
2392
Rn = DecodeReg(Rn);
2393
2394
Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | \
2395
(encoded_size << 10) | (Rn << 5) | Rt);
2396
}
2397
2398
void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2399
{
2400
bool quad = IsQuad(Rt);
2401
u32 encoded_size = 0;
2402
2403
if (size == 16)
2404
encoded_size = 1;
2405
else if (size == 32)
2406
encoded_size = 2;
2407
else if (size == 64)
2408
encoded_size = 3;
2409
2410
Rt = DecodeReg(Rt);
2411
Rn = DecodeReg(Rn);
2412
Rm = DecodeReg(Rm);
2413
2414
Write32((quad << 30) | (0x19 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) | \
2415
(encoded_size << 10) | (Rn << 5) | Rt);
2416
2417
}
2418
2419
void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)
2420
{
2421
_assert_msg_(!IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);
2422
2423
Rd = DecodeReg(Rd);
2424
Rn = DecodeReg(Rn);
2425
2426
Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | \
2427
(opcode << 15) | (1 << 14) | (Rn << 5) | Rd);
2428
}
2429
2430
void ARM64FloatEmitter::EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2431
{
2432
bool quad = IsQuad(Rd);
2433
2434
Rd = DecodeReg(Rd);
2435
Rn = DecodeReg(Rn);
2436
Rm = DecodeReg(Rm);
2437
2438
Write32((quad << 30) | (U << 29) | (0xF << 24) | (size << 22) | (L << 21) | \
2439
(Rm << 16) | (opcode << 12) | (H << 11) | (Rn << 5) | Rd);
2440
}
2441
2442
void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2443
{
2444
_assert_msg_(!(imm < -256 || imm > 255), "%s received too large offset: %d", __FUNCTION__, imm);
2445
Rt = DecodeReg(Rt);
2446
Rn = DecodeReg(Rn);
2447
2448
Write32((size << 30) | (0xF << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);
2449
}
2450
2451
void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
2452
{
2453
u32 type_encode = 0;
2454
u32 opc = 0;
2455
2456
switch (type)
2457
{
2458
case INDEX_SIGNED:
2459
type_encode = 2;
2460
break;
2461
case INDEX_POST:
2462
type_encode = 1;
2463
break;
2464
case INDEX_PRE:
2465
type_encode = 3;
2466
break;
2467
case INDEX_UNSIGNED:
2468
_assert_msg_(false, "%s doesn't support INDEX_UNSIGNED!", __FUNCTION__);
2469
break;
2470
}
2471
2472
if (size == 128)
2473
{
2474
_assert_msg_(!(imm & 0xF), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
2475
opc = 2;
2476
imm >>= 4;
2477
}
2478
else if (size == 64)
2479
{
2480
_assert_msg_(!(imm & 0x7), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
2481
opc = 1;
2482
imm >>= 3;
2483
}
2484
else if (size == 32)
2485
{
2486
_assert_msg_(!(imm & 0x3), "%s received invalid offset 0x%x!", __FUNCTION__, imm);
2487
opc = 0;
2488
imm >>= 2;
2489
}
2490
2491
Rt = DecodeReg(Rt);
2492
Rt2 = DecodeReg(Rt2);
2493
Rn = DecodeReg(Rn);
2494
2495
Write32((opc << 30) | (0xB << 26) | (type_encode << 23) | (load << 22) | \
2496
((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);
2497
2498
}
2499
2500
void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
2501
{
2502
_assert_msg_(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "%s must contain an extended reg as Rm!", __FUNCTION__);
2503
2504
u32 encoded_size = 0;
2505
u32 encoded_op = 0;
2506
2507
if (size == 8)
2508
{
2509
encoded_size = 0;
2510
encoded_op = 0;
2511
}
2512
else if (size == 16)
2513
{
2514
encoded_size = 1;
2515
encoded_op = 0;
2516
}
2517
else if (size == 32)
2518
{
2519
encoded_size = 2;
2520
encoded_op = 0;
2521
}
2522
else if (size == 64)
2523
{
2524
encoded_size = 3;
2525
encoded_op = 0;
2526
}
2527
else if (size == 128)
2528
{
2529
encoded_size = 0;
2530
encoded_op = 2;
2531
}
2532
2533
if (load)
2534
encoded_op |= 1;
2535
2536
Rt = DecodeReg(Rt);
2537
Rn = DecodeReg(Rn);
2538
ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());
2539
2540
Write32((encoded_size << 30) | (encoded_op << 22) | (0x1E1 << 21) | (decoded_Rm << 16) | \
2541
Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);
2542
}
2543
2544
void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2545
{
2546
EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm);
2547
}
2548
void ARM64FloatEmitter::STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2549
{
2550
EmitLoadStoreImmediate(size, 0, type, Rt, Rn, imm);
2551
}
2552
2553
// Loadstore unscaled
2554
void ARM64FloatEmitter::LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2555
{
2556
u32 encoded_size = 0;
2557
u32 encoded_op = 0;
2558
2559
if (size == 8)
2560
{
2561
encoded_size = 0;
2562
encoded_op = 1;
2563
}
2564
else if (size == 16)
2565
{
2566
encoded_size = 1;
2567
encoded_op = 1;
2568
}
2569
else if (size == 32)
2570
{
2571
encoded_size = 2;
2572
encoded_op = 1;
2573
}
2574
else if (size == 64)
2575
{
2576
encoded_size = 3;
2577
encoded_op = 1;
2578
}
2579
else if (size == 128)
2580
{
2581
encoded_size = 0;
2582
encoded_op = 3;
2583
}
2584
2585
EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);
2586
}
2587
void ARM64FloatEmitter::STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm)
2588
{
2589
u32 encoded_size = 0;
2590
u32 encoded_op = 0;
2591
2592
if (size == 8)
2593
{
2594
encoded_size = 0;
2595
encoded_op = 0;
2596
}
2597
else if (size == 16)
2598
{
2599
encoded_size = 1;
2600
encoded_op = 0;
2601
}
2602
else if (size == 32)
2603
{
2604
encoded_size = 2;
2605
encoded_op = 0;
2606
}
2607
else if (size == 64)
2608
{
2609
encoded_size = 3;
2610
encoded_op = 0;
2611
}
2612
else if (size == 128)
2613
{
2614
encoded_size = 0;
2615
encoded_op = 2;
2616
}
2617
2618
EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);
2619
2620
}
2621
2622
// Loadstore single structure
2623
void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)
2624
{
2625
bool S = 0;
2626
u32 opcode = 0;
2627
u32 encoded_size = 0;
2628
ARM64Reg encoded_reg = INVALID_REG;
2629
2630
if (size == 8)
2631
{
2632
S = (index & 4) != 0;
2633
opcode = 0;
2634
encoded_size = index & 3;
2635
if (index & 8)
2636
encoded_reg = EncodeRegToQuad(Rt);
2637
else
2638
encoded_reg = EncodeRegToDouble(Rt);
2639
2640
}
2641
else if (size == 16)
2642
{
2643
S = (index & 2) != 0;
2644
opcode = 2;
2645
encoded_size = (index & 1) << 1;
2646
if (index & 4)
2647
encoded_reg = EncodeRegToQuad(Rt);
2648
else
2649
encoded_reg = EncodeRegToDouble(Rt);
2650
2651
}
2652
else if (size == 32)
2653
{
2654
S = (index & 1) != 0;
2655
opcode = 4;
2656
encoded_size = 0;
2657
if (index & 2)
2658
encoded_reg = EncodeRegToQuad(Rt);
2659
else
2660
encoded_reg = EncodeRegToDouble(Rt);
2661
}
2662
else if (size == 64)
2663
{
2664
S = 0;
2665
opcode = 4;
2666
encoded_size = 1;
2667
if (index == 1)
2668
encoded_reg = EncodeRegToQuad(Rt);
2669
else
2670
encoded_reg = EncodeRegToDouble(Rt);
2671
}
2672
2673
EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn);
2674
}
2675
2676
void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)
2677
{
2678
bool S = 0;
2679
u32 opcode = 0;
2680
u32 encoded_size = 0;
2681
ARM64Reg encoded_reg = INVALID_REG;
2682
2683
if (size == 8)
2684
{
2685
S = (index & 4) != 0;
2686
opcode = 0;
2687
encoded_size = index & 3;
2688
if (index & 8)
2689
encoded_reg = EncodeRegToQuad(Rt);
2690
else
2691
encoded_reg = EncodeRegToDouble(Rt);
2692
2693
}
2694
else if (size == 16)
2695
{
2696
S = (index & 2) != 0;
2697
opcode = 2;
2698
encoded_size = (index & 1) << 1;
2699
if (index & 4)
2700
encoded_reg = EncodeRegToQuad(Rt);
2701
else
2702
encoded_reg = EncodeRegToDouble(Rt);
2703
2704
}
2705
else if (size == 32)
2706
{
2707
S = (index & 1) != 0;
2708
opcode = 4;
2709
encoded_size = 0;
2710
if (index & 2)
2711
encoded_reg = EncodeRegToQuad(Rt);
2712
else
2713
encoded_reg = EncodeRegToDouble(Rt);
2714
}
2715
else if (size == 64)
2716
{
2717
S = 0;
2718
opcode = 4;
2719
encoded_size = 1;
2720
if (index == 1)
2721
encoded_reg = EncodeRegToQuad(Rt);
2722
else
2723
encoded_reg = EncodeRegToDouble(Rt);
2724
}
2725
2726
EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
2727
}
2728
2729
void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn)
2730
{
2731
EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn);
2732
}
2733
void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn)
2734
{
2735
EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn);
2736
}
2737
void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2738
{
2739
EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn, Rm);
2740
}
2741
void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2742
{
2743
EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn, Rm);
2744
}
2745
2746
void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)
2747
{
2748
bool S = 0;
2749
u32 opcode = 0;
2750
u32 encoded_size = 0;
2751
ARM64Reg encoded_reg = INVALID_REG;
2752
2753
if (size == 8)
2754
{
2755
S = (index & 4) != 0;
2756
opcode = 0;
2757
encoded_size = index & 3;
2758
if (index & 8)
2759
encoded_reg = EncodeRegToQuad(Rt);
2760
else
2761
encoded_reg = EncodeRegToDouble(Rt);
2762
2763
}
2764
else if (size == 16)
2765
{
2766
S = (index & 2) != 0;
2767
opcode = 2;
2768
encoded_size = (index & 1) << 1;
2769
if (index & 4)
2770
encoded_reg = EncodeRegToQuad(Rt);
2771
else
2772
encoded_reg = EncodeRegToDouble(Rt);
2773
2774
}
2775
else if (size == 32)
2776
{
2777
S = (index & 1) != 0;
2778
opcode = 4;
2779
encoded_size = 0;
2780
if (index & 2)
2781
encoded_reg = EncodeRegToQuad(Rt);
2782
else
2783
encoded_reg = EncodeRegToDouble(Rt);
2784
}
2785
else if (size == 64)
2786
{
2787
S = 0;
2788
opcode = 4;
2789
encoded_size = 1;
2790
if (index == 1)
2791
encoded_reg = EncodeRegToQuad(Rt);
2792
else
2793
encoded_reg = EncodeRegToDouble(Rt);
2794
}
2795
2796
EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn);
2797
}
2798
2799
void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)
2800
{
2801
bool S = 0;
2802
u32 opcode = 0;
2803
u32 encoded_size = 0;
2804
ARM64Reg encoded_reg = INVALID_REG;
2805
2806
if (size == 8)
2807
{
2808
S = (index & 4) != 0;
2809
opcode = 0;
2810
encoded_size = index & 3;
2811
if (index & 8)
2812
encoded_reg = EncodeRegToQuad(Rt);
2813
else
2814
encoded_reg = EncodeRegToDouble(Rt);
2815
2816
}
2817
else if (size == 16)
2818
{
2819
S = (index & 2) != 0;
2820
opcode = 2;
2821
encoded_size = (index & 1) << 1;
2822
if (index & 4)
2823
encoded_reg = EncodeRegToQuad(Rt);
2824
else
2825
encoded_reg = EncodeRegToDouble(Rt);
2826
2827
}
2828
else if (size == 32)
2829
{
2830
S = (index & 1) != 0;
2831
opcode = 4;
2832
encoded_size = 0;
2833
if (index & 2)
2834
encoded_reg = EncodeRegToQuad(Rt);
2835
else
2836
encoded_reg = EncodeRegToDouble(Rt);
2837
}
2838
else if (size == 64)
2839
{
2840
S = 0;
2841
opcode = 4;
2842
encoded_size = 1;
2843
if (index == 1)
2844
encoded_reg = EncodeRegToQuad(Rt);
2845
else
2846
encoded_reg = EncodeRegToDouble(Rt);
2847
}
2848
2849
EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);
2850
}
2851
2852
// Loadstore multiple structure
2853
void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
2854
{
2855
_assert_msg_(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
2856
u32 opcode = 0;
2857
if (count == 1)
2858
opcode = 7;
2859
else if (count == 2)
2860
opcode = 0xA;
2861
else if (count == 3)
2862
opcode = 6;
2863
else if (count == 4)
2864
opcode = 2;
2865
EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn);
2866
}
2867
void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2868
{
2869
_assert_msg_(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
2870
_assert_msg_(type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);
2871
2872
u32 opcode = 0;
2873
if (count == 1)
2874
opcode = 7;
2875
else if (count == 2)
2876
opcode = 0xA;
2877
else if (count == 3)
2878
opcode = 6;
2879
else if (count == 4)
2880
opcode = 2;
2881
EmitLoadStoreMultipleStructurePost(size, 1, opcode, Rt, Rn, Rm);
2882
}
2883
void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)
2884
{
2885
_assert_msg_(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
2886
u32 opcode = 0;
2887
if (count == 1)
2888
opcode = 7;
2889
else if (count == 2)
2890
opcode = 0xA;
2891
else if (count == 3)
2892
opcode = 6;
2893
else if (count == 4)
2894
opcode = 2;
2895
EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn);
2896
}
2897
void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)
2898
{
2899
_assert_msg_(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);
2900
_assert_msg_(type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);
2901
2902
u32 opcode = 0;
2903
if (count == 1)
2904
opcode = 7;
2905
else if (count == 2)
2906
opcode = 0xA;
2907
else if (count == 3)
2908
opcode = 6;
2909
else if (count == 4)
2910
opcode = 2;
2911
EmitLoadStoreMultipleStructurePost(size, 0, opcode, Rt, Rn, Rm);
2912
}
2913
2914
// Scalar - 1 Source
2915
void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top)
2916
{
2917
if (IsScalar(Rd) && IsScalar(Rn)) {
2918
EmitScalar1Source(0, 0, IsDouble(Rd), 0, Rd, Rn);
2919
} else {
2920
_assert_msg_(!IsQuad(Rd) && !IsQuad(Rn), "FMOV can't move to/from quads");
2921
int rmode = 0;
2922
int opcode = 6;
2923
int sf = 0;
2924
if (IsSingle(Rd) && !Is64Bit(Rn) && !top) {
2925
// GPR to scalar single
2926
opcode |= 1;
2927
} else if (!Is64Bit(Rd) && IsSingle(Rn) && !top) {
2928
// Scalar single to GPR - defaults are correct
2929
} else {
2930
// TODO
2931
_assert_msg_(false, "FMOV: Unhandled case");
2932
}
2933
Rd = DecodeReg(Rd);
2934
Rn = DecodeReg(Rn);
2935
Write32((sf << 31) | (0x1e2 << 20) | (rmode << 19) | (opcode << 16) | (Rn << 5) | Rd);
2936
}
2937
}
2938
2939
// Loadstore paired
2940
void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
2941
{
2942
EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm);
2943
}
2944
void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)
2945
{
2946
EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm);
2947
}
2948
2949
// Loadstore register offset
2950
void ARM64FloatEmitter::STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
2951
{
2952
EncodeLoadStoreRegisterOffset(size, false, Rt, Rn, Rm);
2953
}
2954
void ARM64FloatEmitter::LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)
2955
{
2956
EncodeLoadStoreRegisterOffset(size, true, Rt, Rn, Rm);
2957
}
2958
2959
void ARM64FloatEmitter::FABS(ARM64Reg Rd, ARM64Reg Rn)
2960
{
2961
EmitScalar1Source(0, 0, IsDouble(Rd), 1, Rd, Rn);
2962
}
2963
void ARM64FloatEmitter::FNEG(ARM64Reg Rd, ARM64Reg Rn)
2964
{
2965
EmitScalar1Source(0, 0, IsDouble(Rd), 2, Rd, Rn);
2966
}
2967
void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)
2968
{
2969
EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);
2970
}
2971
2972
// Scalar - pairwise
2973
void ARM64FloatEmitter::FADDP(ARM64Reg Rd, ARM64Reg Rn) {
2974
EmitScalarPairwise(1, IsDouble(Rd), 0b01101, Rd, Rn);
2975
}
2976
void ARM64FloatEmitter::FMAXP(ARM64Reg Rd, ARM64Reg Rn) {
2977
EmitScalarPairwise(1, IsDouble(Rd), 0b01111, Rd, Rn);
2978
}
2979
void ARM64FloatEmitter::FMINP(ARM64Reg Rd, ARM64Reg Rn) {
2980
EmitScalarPairwise(1, IsDouble(Rd) ? 3 : 2, 0b01111, Rd, Rn);
2981
}
2982
void ARM64FloatEmitter::FMAXNMP(ARM64Reg Rd, ARM64Reg Rn) {
2983
EmitScalarPairwise(1, IsDouble(Rd), 0b01100, Rd, Rn);
2984
}
2985
void ARM64FloatEmitter::FMINNMP(ARM64Reg Rd, ARM64Reg Rn) {
2986
EmitScalarPairwise(1, IsDouble(Rd) ? 3 : 2, 0b01100, Rd, Rn);
2987
}
2988
2989
// Scalar - 2 Source
2990
void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2991
{
2992
EmitScalar2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);
2993
}
2994
void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2995
{
2996
EmitScalar2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);
2997
}
2998
void ARM64FloatEmitter::FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
2999
{
3000
EmitScalar2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm);
3001
}
3002
void ARM64FloatEmitter::FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3003
{
3004
EmitScalar2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm);
3005
}
3006
void ARM64FloatEmitter::FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3007
{
3008
EmitScalar2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm);
3009
}
3010
void ARM64FloatEmitter::FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3011
{
3012
EmitScalar2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm);
3013
}
3014
void ARM64FloatEmitter::FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3015
{
3016
EmitScalar2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm);
3017
}
3018
void ARM64FloatEmitter::FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3019
{
3020
EmitScalar2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm);
3021
}
3022
void ARM64FloatEmitter::FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3023
{
3024
EmitScalar2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm);
3025
}
3026
3027
void ARM64FloatEmitter::FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
3028
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0);
3029
}
3030
void ARM64FloatEmitter::FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
3031
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1);
3032
}
3033
void ARM64FloatEmitter::FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
3034
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2);
3035
}
3036
void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {
3037
EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);
3038
}
3039
3040
void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) {
3041
int type = isDouble ? 1 : 0;
3042
Rd = DecodeReg(Rd);
3043
Rn = DecodeReg(Rn);
3044
Rm = DecodeReg(Rm);
3045
Ra = DecodeReg(Ra);
3046
int o1 = opcode >> 1;
3047
int o0 = opcode & 1;
3048
m_emit->Write32((0x1F << 24) | (type << 22) | (o1 << 21) | (Rm << 16) | (o0 << 15) | (Ra << 10) | (Rn << 5) | Rd);
3049
}
3050
3051
// Scalar floating point immediate
3052
void ARM64FloatEmitter::FMOV(ARM64Reg Rd, uint8_t imm8)
3053
{
3054
EmitScalarImm(0, 0, 0, 0, Rd, imm8);
3055
}
3056
3057
// Vector
3058
void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3059
{
3060
EmitThreeSame(0, 0, 3, Rd, Rn, Rm);
3061
}
3062
void ARM64FloatEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3063
{
3064
EmitThreeSame(1, 0, 3, Rd, Rn, Rm);
3065
}
3066
void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3067
{
3068
EmitThreeSame(1, 1, 3, Rd, Rn, Rm);
3069
}
3070
void ARM64FloatEmitter::BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3071
EmitThreeSame(1, 2, 3, Rd, Rn, Rm);
3072
}
3073
void ARM64FloatEmitter::BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3074
EmitThreeSame(1, 3, 3, Rd, Rn, Rm);
3075
}
3076
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
3077
{
3078
u32 imm5 = 0;
3079
3080
if (size == 8)
3081
{
3082
imm5 = 1;
3083
imm5 |= index << 1;
3084
}
3085
else if (size == 16)
3086
{
3087
imm5 = 2;
3088
imm5 |= index << 2;
3089
}
3090
else if (size == 32)
3091
{
3092
imm5 = 4;
3093
imm5 |= index << 3;
3094
}
3095
else if (size == 64)
3096
{
3097
imm5 = 8;
3098
imm5 |= index << 4;
3099
}
3100
3101
EmitCopy(IsQuad(Rd), 0, imm5, 0, Rd, Rn);
3102
}
3103
void ARM64FloatEmitter::FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3104
{
3105
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xF, Rd, Rn);
3106
}
3107
void ARM64FloatEmitter::FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3108
{
3109
EmitThreeSame(0, size >> 6, 0x1A, Rd, Rn, Rm);
3110
}
3111
void ARM64FloatEmitter::FADDP(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3112
EmitThreeSame(1, size >> 6, 0x1A, Rd, Rn, Rm);
3113
}
3114
void ARM64FloatEmitter::FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3115
{
3116
EmitThreeSame(0, size >> 6, 0x1E, Rd, Rn, Rm);
3117
}
3118
void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3119
{
3120
EmitThreeSame(0, size >> 6, 0x19, Rd, Rn, Rm);
3121
}
3122
void ARM64FloatEmitter::FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3123
{
3124
EmitThreeSame(0, 2 | size >> 6, 0x1E, Rd, Rn, Rm);
3125
}
3126
void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3127
{
3128
Emit2RegMisc(false, 0, size >> 6, 0x17, Rd, Rn);
3129
}
3130
void ARM64FloatEmitter::FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3131
{
3132
Emit2RegMisc(true, 0, size >> 6, 0x17, Rd, Rn);
3133
}
3134
void ARM64FloatEmitter::FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3135
{
3136
Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 5, 0x16, Rd, Rn);
3137
}
3138
void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3139
{
3140
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x1B, Rd, Rn);
3141
}
3142
void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3143
{
3144
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1B, Rd, Rn);
3145
}
3146
void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
3147
int imm = size * 2 - scale;
3148
EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0x1F, Rd, Rn);
3149
}
3150
void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {
3151
int imm = size * 2 - scale;
3152
EmitShiftImm(IsQuad(Rd), true, imm >> 3, imm & 7, 0x1F, Rd, Rn);
3153
}
3154
void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3155
{
3156
EmitThreeSame(1, size >> 6, 0x1F, Rd, Rn, Rm);
3157
}
3158
void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3159
{
3160
EmitThreeSame(1, size >> 6, 0x1B, Rd, Rn, Rm);
3161
}
3162
void ARM64FloatEmitter::UMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3163
{
3164
EmitThreeSame(1, EncodeSize(size), 0xD, Rd, Rn, Rm);
3165
}
3166
void ARM64FloatEmitter::UMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3167
{
3168
EmitThreeSame(1, EncodeSize(size), 0xC, Rd, Rn, Rm);
3169
}
3170
void ARM64FloatEmitter::SMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3171
{
3172
EmitThreeSame(0, EncodeSize(size), 0xD, Rd, Rn, Rm);
3173
}
3174
void ARM64FloatEmitter::SMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3175
{
3176
EmitThreeSame(0, EncodeSize(size), 0xC, Rd, Rn, Rm);
3177
}
3178
void ARM64FloatEmitter::FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3179
{
3180
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xF, Rd, Rn);
3181
}
3182
void ARM64FloatEmitter::FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3183
{
3184
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1D, Rd, Rn);
3185
}
3186
void ARM64FloatEmitter::FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3187
{
3188
EmitThreeSame(0, 2 | (size >> 6), 0x1A, Rd, Rn, Rm);
3189
}
3190
void ARM64FloatEmitter::FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3191
{
3192
EmitThreeSame(0, 2 | (size >> 6), 0x19, Rd, Rn, Rm);
3193
}
3194
void ARM64FloatEmitter::NOT(ARM64Reg Rd, ARM64Reg Rn)
3195
{
3196
Emit2RegMisc(IsQuad(Rd), 1, 0, 5, Rd, Rn);
3197
}
3198
void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3199
{
3200
EmitThreeSame(0, 2, 3, Rd, Rn, Rm);
3201
}
3202
void ARM64FloatEmitter::REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3203
{
3204
Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 1, Rd, Rn);
3205
}
3206
void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3207
{
3208
Emit2RegMisc(IsQuad(Rd), 1, size >> 4, 0, Rd, Rn);
3209
}
3210
void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3211
{
3212
Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 0, Rd, Rn);
3213
}
3214
void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3215
{
3216
Emit2RegMisc(IsQuad(Rd), 0, size >> 6, 0x1D, Rd, Rn);
3217
}
3218
void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3219
{
3220
Emit2RegMisc(IsQuad(Rd), 1, size >> 6, 0x1D, Rd, Rn);
3221
}
3222
void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)
3223
{
3224
int imm = size * 2 - scale;
3225
EmitShiftImm(IsQuad(Rd), 0, imm >> 3, imm & 7, 0x1C, Rd, Rn);
3226
}
3227
void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)
3228
{
3229
int imm = size * 2 - scale;
3230
EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn);
3231
}
3232
void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3233
{
3234
Emit2RegMisc(false, 0, dest_size >> 4, 0x14, Rd, Rn);
3235
}
3236
void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3237
{
3238
Emit2RegMisc(true, 0, dest_size >> 4, 0x14, Rd, Rn);
3239
}
3240
void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3241
{
3242
Emit2RegMisc(false, 1, dest_size >> 4, 0x14, Rd, Rn);
3243
}
3244
void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3245
{
3246
Emit2RegMisc(true, 1, dest_size >> 4, 0x14, Rd, Rn);
3247
}
3248
void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3249
{
3250
Emit2RegMisc(false, 0, dest_size >> 4, 0x12, Rd, Rn);
3251
}
3252
void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)
3253
{
3254
Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn);
3255
}
3256
3257
void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3258
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3259
EmitThreeSame(true, size >> 4, 0b10001, Rd, Rn, Rm);
3260
}
3261
3262
void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3263
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3264
EmitThreeSame(false, size >> 4, 0b00111, Rd, Rn, Rm);
3265
}
3266
3267
void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3268
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3269
EmitThreeSame(false, size >> 4, 0b00110, Rd, Rn, Rm);
3270
}
3271
3272
void ARM64FloatEmitter::CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3273
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3274
EmitThreeSame(true, size >> 4, 0b00110, Rd, Rn, Rm);
3275
}
3276
3277
void ARM64FloatEmitter::CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3278
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3279
EmitThreeSame(true, size >> 4, 0b00111, Rd, Rn, Rm);
3280
}
3281
3282
void ARM64FloatEmitter::CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {
3283
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3284
EmitThreeSame(false, size >> 4, 0b10001, Rd, Rn, Rm);
3285
}
3286
3287
void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3288
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3289
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01001, Rd, Rn);
3290
}
3291
3292
void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3293
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3294
Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01000, Rd, Rn);
3295
}
3296
3297
void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3298
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3299
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01000, Rd, Rn);
3300
}
3301
3302
void ARM64FloatEmitter::CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3303
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3304
Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01001, Rd, Rn);
3305
}
3306
3307
void ARM64FloatEmitter::CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {
3308
_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);
3309
Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01010, Rd, Rn);
3310
}
3311
3312
// Move
3313
void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3314
{
3315
u32 imm5 = 0;
3316
3317
if (size == 8)
3318
imm5 = 1;
3319
else if (size == 16)
3320
imm5 = 2;
3321
else if (size == 32)
3322
imm5 = 4;
3323
else if (size == 64)
3324
imm5 = 8;
3325
3326
EmitCopy(IsQuad(Rd), 0, imm5, 1, Rd, Rn);
3327
3328
}
3329
void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn)
3330
{
3331
u32 imm5 = 0;
3332
3333
if (size == 8)
3334
{
3335
imm5 = 1;
3336
imm5 |= index << 1;
3337
}
3338
else if (size == 16)
3339
{
3340
imm5 = 2;
3341
imm5 |= index << 2;
3342
}
3343
else if (size == 32)
3344
{
3345
imm5 = 4;
3346
imm5 |= index << 3;
3347
}
3348
else if (size == 64)
3349
{
3350
imm5 = 8;
3351
imm5 |= index << 4;
3352
}
3353
3354
EmitCopy(1, 0, imm5, 3, Rd, Rn);
3355
}
3356
void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2)
3357
{
3358
u32 imm5 = 0, imm4 = 0;
3359
3360
if (size == 8)
3361
{
3362
imm5 = 1;
3363
imm5 |= index1 << 1;
3364
imm4 = index2;
3365
}
3366
else if (size == 16)
3367
{
3368
imm5 = 2;
3369
imm5 |= index1 << 2;
3370
imm4 = index2 << 1;
3371
}
3372
else if (size == 32)
3373
{
3374
imm5 = 4;
3375
imm5 |= index1 << 3;
3376
imm4 = index2 << 2;
3377
}
3378
else if (size == 64)
3379
{
3380
imm5 = 8;
3381
imm5 |= index1 << 4;
3382
imm4 = index2 << 3;
3383
}
3384
3385
EmitCopy(1, 1, imm5, imm4, Rd, Rn);
3386
}
3387
3388
void ARM64FloatEmitter::UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
3389
{
3390
bool b64Bit = Is64Bit(Rd);
3391
_assert_msg_(Rd < SP, "%s destination must be a GPR!", __FUNCTION__);
3392
_assert_msg_(!(b64Bit && size != 64), "%s must have a size of 64 when destination is 64bit!", __FUNCTION__);
3393
u32 imm5 = 0;
3394
3395
if (size == 8)
3396
{
3397
imm5 = 1;
3398
imm5 |= index << 1;
3399
}
3400
else if (size == 16)
3401
{
3402
imm5 = 2;
3403
imm5 |= index << 2;
3404
}
3405
else if (size == 32)
3406
{
3407
imm5 = 4;
3408
imm5 |= index << 3;
3409
}
3410
else if (size == 64)
3411
{
3412
imm5 = 8;
3413
imm5 |= index << 4;
3414
}
3415
3416
EmitCopy(b64Bit, 0, imm5, 7, Rd, Rn);
3417
}
3418
void ARM64FloatEmitter::SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)
3419
{
3420
bool b64Bit = Is64Bit(Rd);
3421
_assert_msg_(Rd < SP, "%s destination must be a GPR!", __FUNCTION__);
3422
_assert_msg_(size != 64, "%s doesn't support 64bit destination. Use UMOV!", __FUNCTION__);
3423
u32 imm5 = 0;
3424
3425
if (size == 8)
3426
{
3427
imm5 = 1;
3428
imm5 |= index << 1;
3429
}
3430
else if (size == 16)
3431
{
3432
imm5 = 2;
3433
imm5 |= index << 2;
3434
}
3435
else if (size == 32)
3436
{
3437
imm5 = 4;
3438
imm5 |= index << 3;
3439
}
3440
3441
EmitCopy(b64Bit, 0, imm5, 5, Rd, Rn);
3442
}
3443
3444
void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh) {
3445
Rd = DecodeReg(Rd);
3446
u8 abc = abcdefgh >> 5;
3447
u8 defgh = abcdefgh & 0x1F;
3448
Write32((Q << 30) | (op << 29) | (0xF << 24) | (abc << 16) | (cmode << 12) | (o2 << 11) | (1 << 10) | (defgh << 5) | Rd);
3449
}
3450
3451
void ARM64FloatEmitter::FMOV(u8 size, ARM64Reg Rd, u8 imm8) {
3452
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
3453
_assert_msg_(size == 32 || size == 64, "%s: unsupported size", __FUNCTION__);
3454
_assert_msg_(IsQuad(Rd) || size == 32, "Use non-SIMD FMOV to load one double imm8");
3455
EncodeModImm(IsQuad(Rd), size >> 6, 0b1111, 0, Rd, imm8);
3456
}
3457
3458
void ARM64FloatEmitter::MOVI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift, bool MSL) {
3459
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
3460
_assert_msg_(size == 8 || size == 16 || size == 32 || size == 64, "%s: unsupported size %d", __FUNCTION__, size);
3461
_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);
3462
_assert_msg_(!MSL || (size == 32 && shift > 0 && shift <= 16), "MOVI MSL shift requires size 32, shift must be 8 or 16");
3463
_assert_msg_(size != 64 || shift == 0, "MOVI 64-bit imm cannot be shifted");
3464
3465
u8 cmode = 0;
3466
if (size == 8)
3467
cmode = 0b1110;
3468
else if (size == 16)
3469
cmode = 0b1000 | (shift >> 2);
3470
else if (MSL)
3471
cmode = 0b1100 | (shift >> 3);
3472
else if (size == 32)
3473
cmode = (shift >> 2);
3474
else if (size == 64)
3475
cmode = 0b1110;
3476
else
3477
_assert_msg_(false, "%s: unhandled case", __FUNCTION__);
3478
3479
EncodeModImm(IsQuad(Rd), size >> 6, cmode, 0, Rd, imm8);
3480
}
3481
3482
void ARM64FloatEmitter::MVNI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift, bool MSL) {
3483
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
3484
_assert_msg_(size == 16 || size == 32, "%s: unsupported size %d", __FUNCTION__, size);
3485
_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);
3486
_assert_msg_(!MSL || (size == 32 && shift > 0 && shift <= 16), "MVNI MSL shift requires size 32, shift must be 8 or 16");
3487
3488
u8 cmode = 0;
3489
if (size == 16)
3490
cmode = 0b1000 | (shift >> 2);
3491
else if (MSL)
3492
cmode = 0b1100 | (shift >> 3);
3493
else if (size == 32)
3494
cmode = (shift >> 2);
3495
else
3496
_assert_msg_(false, "%s: unhandled case", __FUNCTION__);
3497
3498
EncodeModImm(IsQuad(Rd), 1, cmode, 0, Rd, imm8);
3499
}
3500
3501
void ARM64FloatEmitter::ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift) {
3502
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
3503
_assert_msg_(size == 16 || size == 32, "%s: unsupported size %d", __FUNCTION__, size);
3504
_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);
3505
3506
u8 cmode = 0;
3507
if (size == 16)
3508
cmode = 0b1001 | (shift >> 2);
3509
else if (size == 32)
3510
cmode = 0b0001 | (shift >> 2);
3511
else
3512
_assert_msg_(false, "%s: unhandled case", __FUNCTION__);
3513
3514
EncodeModImm(IsQuad(Rd), 0, cmode, 0, Rd, imm8);
3515
}
3516
3517
void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift) {
3518
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
3519
_assert_msg_(size == 16 || size == 32, "%s: unsupported size %d", __FUNCTION__, size);
3520
_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);
3521
3522
u8 cmode = 0;
3523
if (size == 16)
3524
cmode = 0b1001 | (shift >> 2);
3525
else if (size == 32)
3526
cmode = 0b0001 | (shift >> 2);
3527
else
3528
_assert_msg_(false, "%s: unhandled case", __FUNCTION__);
3529
3530
EncodeModImm(IsQuad(Rd), 1, cmode, 0, Rd, imm8);
3531
}
3532
3533
// One source
3534
void ARM64FloatEmitter::FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn)
3535
{
3536
u32 dst_encoding = 0;
3537
u32 src_encoding = 0;
3538
3539
if (size_to == 16)
3540
dst_encoding = 3;
3541
else if (size_to == 32)
3542
dst_encoding = 0;
3543
else if (size_to == 64)
3544
dst_encoding = 1;
3545
3546
if (size_from == 16)
3547
src_encoding = 3;
3548
else if (size_from == 32)
3549
src_encoding = 0;
3550
else if (size_from == 64)
3551
src_encoding = 1;
3552
3553
Emit1Source(0, 0, src_encoding, 4 | dst_encoding, Rd, Rn);
3554
}
3555
3556
void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn)
3557
{
3558
if (IsScalar(Rn)) {
3559
// Source is in FP register (like destination!). We must use a vector encoding.
3560
bool sign = false;
3561
Rd = DecodeReg(Rd);
3562
Rn = DecodeReg(Rn);
3563
int sz = IsDouble(Rn);
3564
Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (Rn << 5) | Rd);
3565
} else {
3566
bool sf = Is64Bit(Rn);
3567
u32 type = 0;
3568
if (IsDouble(Rd))
3569
type = 1;
3570
EmitConversion(sf, 0, type, 0, 2, Rd, Rn);
3571
}
3572
}
3573
3574
void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn)
3575
{
3576
if (IsScalar(Rn)) {
3577
// Source is in FP register (like destination!). We must use a vector encoding.
3578
bool sign = true;
3579
Rd = DecodeReg(Rd);
3580
Rn = DecodeReg(Rn);
3581
int sz = IsDouble(Rn);
3582
Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (Rn << 5) | Rd);
3583
} else {
3584
bool sf = Is64Bit(Rn);
3585
u32 type = 0;
3586
if (IsDouble(Rd))
3587
type = 1;
3588
3589
EmitConversion(sf, 0, type, 0, 3, Rd, Rn);
3590
}
3591
}
3592
3593
void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
3594
{
3595
if (IsScalar(Rn)) {
3596
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
3597
Rd = DecodeReg(Rd);
3598
Rn = DecodeReg(Rn);
3599
3600
Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);
3601
} else {
3602
bool sf = Is64Bit(Rn);
3603
u32 type = 0;
3604
if (IsDouble(Rd))
3605
type = 1;
3606
3607
EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);
3608
}
3609
}
3610
3611
void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)
3612
{
3613
if (IsScalar(Rn)) {
3614
int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;
3615
Rd = DecodeReg(Rd);
3616
Rn = DecodeReg(Rn);
3617
3618
Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);
3619
} else {
3620
bool sf = Is64Bit(Rn);
3621
u32 type = 0;
3622
if (IsDouble(Rd))
3623
type = 1;
3624
3625
EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);
3626
}
3627
}
3628
3629
void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm)
3630
{
3631
EmitCompare(0, 0, 0, 0, Rn, Rm);
3632
}
3633
void ARM64FloatEmitter::FCMP(ARM64Reg Rn)
3634
{
3635
EmitCompare(0, 0, 0, 8, Rn, (ARM64Reg)0);
3636
}
3637
void ARM64FloatEmitter::FCMPE(ARM64Reg Rn, ARM64Reg Rm)
3638
{
3639
EmitCompare(0, 0, 0, 0x10, Rn, Rm);
3640
}
3641
void ARM64FloatEmitter::FCMPE(ARM64Reg Rn)
3642
{
3643
EmitCompare(0, 0, 0, 0x18, Rn, (ARM64Reg)0);
3644
}
3645
void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3646
{
3647
EmitThreeSame(0, size >> 6, 0x1C, Rd, Rn, Rm);
3648
}
3649
void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3650
{
3651
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xD, Rd, Rn);
3652
}
3653
void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3654
{
3655
EmitThreeSame(1, size >> 6, 0x1C, Rd, Rn, Rm);
3656
}
3657
void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3658
{
3659
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xC, Rd, Rn);
3660
}
3661
void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3662
{
3663
EmitThreeSame(1, 2 | (size >> 6), 0x1C, Rd, Rn, Rm);
3664
}
3665
void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3666
{
3667
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x0C, Rd, Rn);
3668
}
3669
void ARM64FloatEmitter::FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3670
{
3671
Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xD, Rd, Rn);
3672
}
3673
void ARM64FloatEmitter::FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn)
3674
{
3675
Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xE, Rd, Rn);
3676
}
3677
3678
void ARM64FloatEmitter::FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)
3679
{
3680
EmitCondSelect(0, 0, cond, Rd, Rn, Rm);
3681
}
3682
3683
void ARM64FloatEmitter::FCCMP(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond) {
3684
EmitCondCompare(0, 0, cond, 0, nzcv, Rn, Rm);
3685
}
3686
3687
void ARM64FloatEmitter::FCCMPE(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond) {
3688
EmitCondCompare(0, 0, cond, 1, nzcv, Rn, Rm);
3689
}
3690
3691
// Permute
3692
void ARM64FloatEmitter::UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3693
{
3694
EmitPermute(size, 1, Rd, Rn, Rm);
3695
}
3696
void ARM64FloatEmitter::TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3697
{
3698
EmitPermute(size, 2, Rd, Rn, Rm);
3699
}
3700
void ARM64FloatEmitter::ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3701
{
3702
EmitPermute(size, 3, Rd, Rn, Rm);
3703
}
3704
void ARM64FloatEmitter::UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3705
{
3706
EmitPermute(size, 5, Rd, Rn, Rm);
3707
}
3708
void ARM64FloatEmitter::TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3709
{
3710
EmitPermute(size, 6, Rd, Rn, Rm);
3711
}
3712
void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)
3713
{
3714
EmitPermute(size, 7, Rd, Rn, Rm);
3715
}
3716
3717
void ARM64FloatEmitter::EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, int index) {
3718
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);
3719
3720
bool quad = IsQuad(Rd);
3721
_assert_msg_(index >= 0 && index < 16 && (quad || index < 8), "%s start index out of bounds", __FUNCTION__);
3722
_assert_msg_(IsQuad(Rd) == IsQuad(Rn) && IsQuad(Rd) == IsQuad(Rm), "%s operands not same size", __FUNCTION__);
3723
3724
Rd = DecodeReg(Rd);
3725
Rn = DecodeReg(Rn);
3726
Rm = DecodeReg(Rm);
3727
3728
Write32((quad << 30) | (0x17 << 25) | (Rm << 16) | (index << 11) | (Rn << 5) | Rd);
3729
}
3730
3731
// Shift by immediate
3732
void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3733
{
3734
SSHLL(src_size, Rd, Rn, shift, false);
3735
}
3736
void ARM64FloatEmitter::SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3737
{
3738
SSHLL(src_size, Rd, Rn, shift, true);
3739
}
3740
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3741
{
3742
SHRN(dest_size, Rd, Rn, shift, false);
3743
}
3744
void ARM64FloatEmitter::SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3745
{
3746
SHRN(dest_size, Rd, Rn, shift, true);
3747
}
3748
void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3749
{
3750
USHLL(src_size, Rd, Rn, shift, false);
3751
}
3752
void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)
3753
{
3754
USHLL(src_size, Rd, Rn, shift, true);
3755
}
3756
void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
3757
SHLL(src_size, Rd, Rn, false);
3758
}
3759
void ARM64FloatEmitter::SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {
3760
SHLL(src_size, Rd, Rn, true);
3761
}
3762
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
3763
{
3764
SXTL(src_size, Rd, Rn, false);
3765
}
3766
void ARM64FloatEmitter::SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
3767
{
3768
SXTL(src_size, Rd, Rn, true);
3769
}
3770
void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
3771
{
3772
UXTL(src_size, Rd, Rn, false);
3773
}
3774
void ARM64FloatEmitter::UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)
3775
{
3776
UXTL(src_size, Rd, Rn, true);
3777
}
3778
3779
static u32 EncodeImmShiftLeft(u8 src_size, u32 shift) {
3780
return src_size + shift;
3781
}
3782
3783
static u32 EncodeImmShiftRight(u8 src_size, u32 shift) {
3784
return src_size * 2 - shift;
3785
}
3786
3787
void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
3788
{
3789
_assert_msg_(shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__);
3790
u32 imm = EncodeImmShiftLeft(src_size, shift);
3791
EmitShiftImm(upper, 0, imm >> 3, imm & 7, 0x14, Rd, Rn);
3792
}
3793
3794
void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
3795
{
3796
_assert_msg_(shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__);
3797
u32 imm = EncodeImmShiftLeft(src_size, shift);
3798
EmitShiftImm(upper, 1, imm >> 3, imm & 7, 0x14, Rd, Rn);
3799
}
3800
3801
void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper) {
3802
_assert_msg_(src_size <= 32, "%s shift amount cannot be 64", __FUNCTION__);
3803
Emit2RegMisc(upper, 1, src_size >> 4, 0b10011, Rd, Rn);
3804
}
3805
3806
void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)
3807
{
3808
_assert_msg_(shift > 0, "%s shift amount must be greater than zero!", __FUNCTION__);
3809
_assert_msg_(shift <= dest_size, "%s shift amount must less than or equal to the element size!", __FUNCTION__);
3810
u32 imm = EncodeImmShiftRight(dest_size, shift);
3811
EmitShiftImm(upper, 0, imm >> 3, imm & 7, 0x10, Rd, Rn);
3812
}
3813
3814
void ARM64FloatEmitter::SHL(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3815
_assert_msg_(shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
3816
u32 imm = EncodeImmShiftLeft(dest_size, shift);
3817
EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0xA, Rd, Rn);
3818
}
3819
3820
void ARM64FloatEmitter::USHR(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3821
_assert_msg_(shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
3822
u32 imm = EncodeImmShiftRight(dest_size, shift);
3823
EmitShiftImm(IsQuad(Rd), true, imm >> 3, imm & 7, 0x0, Rd, Rn);
3824
}
3825
3826
void ARM64FloatEmitter::SSHR(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {
3827
_assert_msg_(shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);
3828
u32 imm = EncodeImmShiftRight(dest_size, shift);
3829
EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0x0, Rd, Rn);
3830
}
3831
3832
void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)
3833
{
3834
SSHLL(src_size, Rd, Rn, 0, upper);
3835
}
3836
3837
void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)
3838
{
3839
USHLL(src_size, Rd, Rn, 0, upper);
3840
}
3841
3842
// vector x indexed element
3843
void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)
3844
{
3845
_assert_msg_(size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __FUNCTION__);
3846
3847
bool L = false;
3848
bool H = false;
3849
if (size == 32) {
3850
L = index & 1;
3851
H = (index >> 1) & 1;
3852
} else if (size == 64) {
3853
H = index == 1;
3854
}
3855
3856
EmitVectorxElement(0, 2 | (size >> 6), L, 0x9, H, Rd, Rn, Rm);
3857
}
3858
3859
void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)
3860
{
3861
_assert_msg_(size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __FUNCTION__);
3862
3863
bool L = false;
3864
bool H = false;
3865
if (size == 32) {
3866
L = index & 1;
3867
H = (index >> 1) & 1;
3868
} else if (size == 64) {
3869
H = index == 1;
3870
}
3871
3872
EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);
3873
}
3874
3875
void ARM64FloatEmitter::ABI_PushRegisters(uint32_t registers, uint32_t fp_registers) {
3876
_assert_msg_((registers & 0x60000000) == 0, "ABI_PushRegisters: Do not include FP and LR, those are handled non-conditionally");
3877
3878
ARM64Reg gprs[32]{}, fprs[32]{};
3879
int num_gprs = 0, num_fprs = 0;
3880
for (int i = 0; i < 29; i++) {
3881
if (registers & (1U << i))
3882
gprs[num_gprs++] = (ARM64Reg)(X0 + i);
3883
}
3884
3885
for (int i = 0; i < 32; i++) {
3886
if (fp_registers & (1U << i))
3887
fprs[num_fprs++] = (ARM64Reg)(D0 + i);
3888
}
3889
3890
u32 stack_size = 16 + ROUND_UP(num_gprs * 8, 16) + ROUND_UP(num_fprs * 8, 16);
3891
3892
// Stack is required to be quad-word aligned.
3893
if (stack_size < 256) {
3894
m_emit->STP(INDEX_PRE, FP, LR, SP, -(s32)stack_size);
3895
} else {
3896
m_emit->SUB(SP, SP, stack_size);
3897
m_emit->STP(INDEX_UNSIGNED, FP, LR, SP, 0);
3898
}
3899
m_emit->MOVfromSP(X29); // Set new frame pointer
3900
int offset = 16;
3901
for (int i = 0; i < num_gprs / 2; i++) {
3902
m_emit->STP(INDEX_SIGNED, gprs[i*2], gprs[i*2+1], X29, offset);
3903
offset += 16;
3904
}
3905
if (num_gprs & 1) {
3906
m_emit->STR(INDEX_UNSIGNED, gprs[num_gprs - 1], X29, offset);
3907
offset += 16;
3908
}
3909
3910
for (int i = 0; i < num_fprs / 2; i++) {
3911
STP(64, INDEX_SIGNED, fprs[i * 2], fprs[i * 2 + 1], SP, offset);
3912
offset += 16;
3913
}
3914
if (num_fprs & 1) {
3915
STR(64, INDEX_UNSIGNED, fprs[num_fprs - 1], X29, offset);
3916
offset += 16;
3917
}
3918
// Now offset should be == stack_size.
3919
}
3920
3921
void ARM64FloatEmitter::ABI_PopRegisters(uint32_t registers, uint32_t fp_registers) {
3922
ARM64Reg gprs[32]{}, fprs[32]{};
3923
int num_gprs = 0, num_fprs = 0;
3924
for (int i = 0; i < 29; i++) {
3925
if (registers & (1U << i))
3926
gprs[num_gprs++] = (ARM64Reg)(X0 + i);
3927
}
3928
3929
for (int i = 0; i < 32; i++) {
3930
if (fp_registers & (1U << i))
3931
fprs[num_fprs++] = (ARM64Reg)(D0 + i);
3932
}
3933
3934
u32 stack_size = 16 + ROUND_UP(num_gprs * 8, 16) + ROUND_UP(num_fprs * 8, 16);
3935
3936
// SP points to the bottom. We're gonna walk it upwards.
3937
// Reload FP, LR.
3938
m_emit->LDP(INDEX_SIGNED, FP, LR, SP, 0);
3939
int offset = 16;
3940
for (int i = 0; i < num_gprs / 2; i++) {
3941
m_emit->LDP(INDEX_SIGNED, gprs[i*2], gprs[i*2+1], SP, offset);
3942
offset += 16;
3943
}
3944
// Do the straggler.
3945
if (num_gprs & 1) {
3946
m_emit->LDR(INDEX_UNSIGNED, gprs[num_gprs-1], SP, offset);
3947
offset += 16;
3948
}
3949
3950
// Time for the FP regs.
3951
for (int i = 0; i < num_fprs / 2; i++) {
3952
LDP(64, INDEX_SIGNED, fprs[i * 2], fprs[i * 2 + 1], SP, offset);
3953
offset += 16;
3954
}
3955
// Do the straggler.
3956
if (num_fprs & 1) {
3957
LDR(64, INDEX_UNSIGNED, fprs[num_fprs-1], SP, offset);
3958
offset += 16;
3959
}
3960
// Now offset should be == stack_size.
3961
3962
// Restore the stack pointer.
3963
m_emit->ADD(SP, SP, stack_size);
3964
}
3965
3966
void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3967
// It's probably okay to AND by extra bits.
3968
if (!Is64Bit(Rn))
3969
imm &= 0xFFFFFFFF;
3970
if (!TryANDI2R(Rd, Rn, imm)) {
3971
_assert_msg_(scratch != INVALID_REG, "ANDI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
3972
MOVI2R(scratch, imm);
3973
AND(Rd, Rn, scratch);
3974
}
3975
}
3976
3977
void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3978
_assert_msg_(Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "ORRI2R - more bits in imm than Rn");
3979
if (!TryORRI2R(Rd, Rn, imm)) {
3980
_assert_msg_(scratch != INVALID_REG, "ORRI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
3981
MOVI2R(scratch, imm);
3982
ORR(Rd, Rn, scratch);
3983
}
3984
}
3985
3986
void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3987
_assert_msg_(Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "EORI2R - more bits in imm than Rn");
3988
if (!TryEORI2R(Rd, Rn, imm)) {
3989
_assert_msg_(scratch != INVALID_REG, "EORI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
3990
MOVI2R(scratch, imm);
3991
EOR(Rd, Rn, scratch);
3992
}
3993
}
3994
3995
void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
3996
if (!Is64Bit(Rn))
3997
imm &= 0xFFFFFFFF;
3998
unsigned int n, imm_s, imm_r;
3999
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
4000
ANDS(Rd, Rn, imm_r, imm_s, n != 0);
4001
} else if (imm == 0) {
4002
ANDS(Rd, Rn, Is64Bit(Rn) ? ZR : WZR);
4003
} else {
4004
_assert_msg_(scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);
4005
MOVI2R(scratch, imm);
4006
ANDS(Rd, Rn, scratch);
4007
}
4008
}
4009
4010
void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
4011
if (!TryADDI2R(Rd, Rn, imm)) {
4012
_assert_msg_(scratch != INVALID_REG, "ADDI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
4013
MOVI2R(scratch, imm);
4014
ADD(Rd, Rn, scratch);
4015
}
4016
}
4017
4018
void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
4019
if (!TrySUBI2R(Rd, Rn, imm)) {
4020
_assert_msg_(scratch != INVALID_REG, "SUBI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
4021
MOVI2R(scratch, imm);
4022
SUB(Rd, Rn, scratch);
4023
}
4024
}
4025
4026
void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
4027
if (!TryCMPI2R(Rn, imm)) {
4028
_assert_msg_(scratch != INVALID_REG, "CMPI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);
4029
MOVI2R(scratch, imm);
4030
CMP(Rn, scratch);
4031
}
4032
}
4033
4034
bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
4035
s64 negated = Is64Bit(Rn) ? -(s64)imm : -(s32)(u32)imm;
4036
u32 val;
4037
bool shift;
4038
if (imm == 0) {
4039
// Prefer MOV (ORR) instead of ADD for moves.
4040
MOV(Rd, Rn);
4041
return true;
4042
} else if (IsImmArithmetic(imm, &val, &shift)) {
4043
ADD(Rd, Rn, val, shift);
4044
return true;
4045
} else if (IsImmArithmetic((u64)negated, &val, &shift)) {
4046
SUB(Rd, Rn, val, shift);
4047
return true;
4048
} else {
4049
return false;
4050
}
4051
}
4052
4053
bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
4054
s64 negated = Is64Bit(Rn) ? -(s64)imm : -(s32)(u32)imm;
4055
u32 val;
4056
bool shift;
4057
if (imm == 0) {
4058
// Prefer MOV (ORR) instead of ADD for moves.
4059
MOV(Rd, Rn);
4060
return true;
4061
} else if (IsImmArithmetic(imm, &val, &shift)) {
4062
SUB(Rd, Rn, val, shift);
4063
return true;
4064
} else if (IsImmArithmetic((u64)negated, &val, &shift)) {
4065
ADD(Rd, Rn, val, shift);
4066
return true;
4067
} else {
4068
return false;
4069
}
4070
}
4071
4072
bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u64 imm) {
4073
s64 negated = Is64Bit(Rn) ? -(s64)imm : -(s32)(u32)imm;
4074
u32 val;
4075
bool shift;
4076
if (IsImmArithmetic(imm, &val, &shift)) {
4077
CMP(Rn, val, shift);
4078
return true;
4079
} else if (IsImmArithmetic((u64)negated, &val, &shift)) {
4080
CMN(Rn, val, shift);
4081
return true;
4082
} else {
4083
return false;
4084
}
4085
}
4086
4087
bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
4088
if (!Is64Bit(Rn))
4089
imm &= 0xFFFFFFFF;
4090
u32 n, imm_r, imm_s;
4091
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
4092
AND(Rd, Rn, imm_r, imm_s, n != 0);
4093
return true;
4094
} else if (imm == 0) {
4095
MOVI2R(Rd, 0);
4096
return true;
4097
} else {
4098
return false;
4099
}
4100
}
4101
bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
4102
_assert_msg_(Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "TryORRI2R - more bits in imm than Rn");
4103
u32 n, imm_r, imm_s;
4104
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
4105
ORR(Rd, Rn, imm_r, imm_s, n != 0);
4106
return true;
4107
} else if (imm == 0) {
4108
if (Rd != Rn) {
4109
MOV(Rd, Rn);
4110
}
4111
return true;
4112
} else {
4113
return false;
4114
}
4115
}
4116
bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {
4117
_assert_msg_(Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "TryEORI2R - more bits in imm than Rn");
4118
u32 n, imm_r, imm_s;
4119
if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {
4120
EOR(Rd, Rn, imm_r, imm_s, n != 0);
4121
return true;
4122
} else if (imm == 0) {
4123
if (Rd != Rn) {
4124
MOV(Rd, Rn);
4125
}
4126
return true;
4127
} else {
4128
return false;
4129
}
4130
}
4131
4132
float FPImm8ToFloat(uint8_t bits) {
4133
int sign = bits >> 7;
4134
uint32_t f = 0;
4135
f |= (sign << 31);
4136
int bit6 = (bits >> 6) & 1;
4137
uint32_t exp = ((!bit6) << 7) | (0x7C * bit6) | ((bits >> 4) & 3);
4138
uint32_t mantissa = (bits & 0xF) << 19;
4139
f |= exp << 23;
4140
f |= mantissa;
4141
float fl;
4142
memcpy(&fl, &f, sizeof(float));
4143
return fl;
4144
}
4145
4146
bool FPImm8FromFloat(float value, uint8_t *immOut) {
4147
uint32_t f;
4148
memcpy(&f, &value, sizeof(float));
4149
uint32_t mantissa4 = (f & 0x7FFFFF) >> 19;
4150
uint32_t exponent = (f >> 23) & 0xFF;
4151
uint32_t sign = f >> 31;
4152
if ((exponent >> 7) == ((exponent >> 6) & 1))
4153
return false;
4154
uint8_t imm8 = (sign << 7) | ((!(exponent >> 7)) << 6) | ((exponent & 3) << 4) | mantissa4;
4155
float newFloat = FPImm8ToFloat(imm8);
4156
if (newFloat == value) {
4157
*immOut = imm8;
4158
return true;
4159
} else {
4160
return false;
4161
}
4162
}
4163
4164
void ARM64FloatEmitter::MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch, bool negate) {
4165
_assert_msg_(!IsDouble(Rd), "MOVI2F does not yet support double precision");
4166
uint8_t imm8;
4167
if (value == 0.0) {
4168
if (std::signbit(value)) {
4169
negate = !negate;
4170
}
4171
FMOV(Rd, IsDouble(Rd) ? ZR : WZR);
4172
if (negate) {
4173
FNEG(Rd, Rd);
4174
}
4175
// TODO: There are some other values we could generate with the float-imm instruction, like 1.0...
4176
} else if (negate && FPImm8FromFloat(-value, &imm8)) {
4177
FMOV(Rd, imm8);
4178
} else if (FPImm8FromFloat(value, &imm8)) {
4179
FMOV(Rd, imm8);
4180
if (negate) {
4181
FNEG(Rd, Rd);
4182
}
4183
} else {
4184
_assert_msg_(scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value);
4185
u32 ival;
4186
if (negate) {
4187
value = -value;
4188
}
4189
memcpy(&ival, &value, sizeof(ival));
4190
m_emit->MOVI2R(scratch, ival);
4191
FMOV(Rd, scratch);
4192
}
4193
}
4194
4195
// TODO: Quite a few values could be generated easily using the MOVI instruction and friends.
4196
void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bool negate) {
4197
_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);
4198
int ival;
4199
memcpy(&ival, &value, 4);
4200
uint8_t imm8;
4201
if (ival == 0) { // Make sure to not catch negative zero here
4202
// Prefer MOVI 0, which may have no latency on some CPUs.
4203
MOVI(32, Rd, 0);
4204
if (negate)
4205
FNEG(32, Rd, Rd);
4206
} else if (negate && FPImm8FromFloat(-value, &imm8)) {
4207
FMOV(32, Rd, imm8);
4208
} else if (FPImm8FromFloat(value, &imm8)) {
4209
FMOV(32, Rd, imm8);
4210
if (negate) {
4211
FNEG(32, Rd, Rd);
4212
}
4213
} else if (TryAnyMOVI(32, Rd, ival)) {
4214
if (negate) {
4215
FNEG(32, Rd, Rd);
4216
}
4217
} else if (TryAnyMOVI(32, Rd, ival ^ 0x80000000)) {
4218
if (!negate) {
4219
FNEG(32, Rd, Rd);
4220
}
4221
} else {
4222
_assert_msg_(scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value);
4223
if (negate) {
4224
ival ^= 0x80000000;
4225
}
4226
m_emit->MOVI2R(scratch, ival);
4227
DUP(32, Rd, scratch);
4228
}
4229
}
4230
4231
bool ARM64FloatEmitter::TryMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
4232
if (size == 8) {
4233
// Can always do 8.
4234
MOVI(size, Rd, elementValue & 0xFF);
4235
return true;
4236
} else if (size == 16) {
4237
if ((elementValue & 0xFF00) == 0) {
4238
MOVI(size, Rd, elementValue & 0xFF, 0);
4239
return true;
4240
} else if ((elementValue & 0x00FF) == 0) {
4241
MOVI(size, Rd, (elementValue >> 8) & 0xFF, 8);
4242
return true;
4243
} else if ((elementValue & 0xFF00) == 0xFF00) {
4244
MVNI(size, Rd, ~elementValue & 0xFF, 0);
4245
return true;
4246
} else if ((elementValue & 0x00FF) == 0x00FF) {
4247
MVNI(size, Rd, (~elementValue >> 8) & 0xFF, 8);
4248
return true;
4249
}
4250
4251
return false;
4252
} else if (size == 32) {
4253
for (int shift = 0; shift < 32; shift += 8) {
4254
uint32_t mask = 0xFFFFFFFF &~ (0xFF << shift);
4255
if ((elementValue & mask) == 0) {
4256
MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift);
4257
return true;
4258
} else if ((elementValue & mask) == mask) {
4259
MVNI(size, Rd, (~elementValue >> shift) & 0xFF, shift);
4260
return true;
4261
}
4262
}
4263
4264
// Maybe an MSL shift will work?
4265
for (int shift = 8; shift <= 16; shift += 8) {
4266
uint32_t mask = 0xFFFFFFFF & ~(0xFF << shift);
4267
uint32_t ones = (1 << shift) - 1;
4268
uint32_t notOnes = 0xFFFFFF00 << shift;
4269
if ((elementValue & mask) == ones) {
4270
MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
4271
return true;
4272
} else if ((elementValue & mask) == notOnes) {
4273
MVNI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);
4274
return true;
4275
}
4276
}
4277
4278
return false;
4279
} else if (size == 64) {
4280
uint8_t imm8 = 0;
4281
for (int i = 0; i < 8; ++i) {
4282
uint8_t byte = (elementValue >> (i * 8)) & 0xFF;
4283
if (byte != 0 && byte != 0xFF)
4284
return false;
4285
4286
if (byte == 0xFF)
4287
imm8 |= 1 << i;
4288
}
4289
4290
// Didn't run into any partial bytes, so size 64 is doable.
4291
MOVI(size, Rd, imm8);
4292
return true;
4293
}
4294
return false;
4295
}
4296
4297
bool ARM64FloatEmitter::TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {
4298
// Try the original size first in case that's more optimal.
4299
if (TryMOVI(size, Rd, elementValue))
4300
return true;
4301
4302
uint64_t value = elementValue;
4303
if (size != 64) {
4304
uint64_t masked = elementValue & ((1ULL << size) - 1ULL);
4305
for (int i = size; i < 64; ++i) {
4306
value |= masked << i;
4307
}
4308
}
4309
4310
for (int attempt = 8; attempt <= 64; attempt += attempt) {
4311
// Original size was already attempted above.
4312
if (attempt != size) {
4313
if (TryMOVI(attempt, Rd, value))
4314
return true;
4315
}
4316
}
4317
4318
return false;
4319
}
4320
4321
void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {
4322
u32 val;
4323
bool shift;
4324
if (IsImmArithmetic(imm, &val, &shift)) {
4325
SUBS(Rd, Rn, val, shift);
4326
} else {
4327
_assert_msg_(scratch != INVALID_REG, "SUBSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);
4328
MOVI2R(scratch, imm);
4329
SUBS(Rd, Rn, scratch);
4330
}
4331
}
4332
4333
void ARM64CodeBlock::PoisonMemory(int offset) {
4334
// So we can adjust region to writable space. Might be zero.
4335
ptrdiff_t writable = m_writable - m_code;
4336
4337
u32 *ptr = (u32 *)(region + offset + writable);
4338
u32 *maxptr = (u32 *)(region + region_size - offset + writable);
4339
// If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything
4340
// Less than optimal, but there would be nothing we could do but throw a runtime warning anyway.
4341
// AArch64: 0xD4200000 = BRK 0
4342
while (ptr < maxptr)
4343
*ptr++ = 0xD4200000;
4344
}
4345
4346
} // namespace
4347
4348