Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/ArmEmitter.cpp
5656 views
1
// Copyright (C) 2003 Dolphin Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official SVN repository and contact information can be found at
16
// http://code.google.com/p/dolphin-emu/
17
18
#include "ppsspp_config.h"
19
20
#include <stdarg.h>
21
#include <stddef.h>
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25
26
#if PPSSPP_PLATFORM(IOS)
27
#include <libkern/OSCacheControl.h>
28
#include <sys/mman.h>
29
#endif
30
31
#include "Common/Log.h"
32
#include "Common/MemoryUtil.h"
33
#include "Common/ArmEmitter.h"
34
#include "Common/CPUDetect.h"
35
36
#ifdef _WIN32
37
#include "CommonWindows.h"
38
#endif
39
40
// Want it in release builds too
41
#ifdef __ANDROID__
42
#undef _dbg_assert_msg_
43
#define _dbg_assert_msg_ _assert_msg_
44
#endif
45
46
namespace ArmGen
47
{
48
49
inline u32 RotR(u32 a, int amount) {
50
if (!amount) return a;
51
return (a >> amount) | (a << (32 - amount));
52
}
53
54
inline u32 RotL(u32 a, int amount) {
55
if (!amount) return a;
56
return (a << amount) | (a >> (32 - amount));
57
}
58
59
bool TryMakeOperand2(u32 imm, Operand2 &op2) {
60
// Just brute force it.
61
for (int i = 0; i < 16; i++) {
62
int mask = RotR(0xFF, i * 2);
63
if ((imm & mask) == imm) {
64
op2 = Operand2((u8)(RotL(imm, i * 2)), (u8)i);
65
return true;
66
}
67
}
68
return false;
69
}
70
71
bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse)
72
{
73
if (!TryMakeOperand2(imm, op2)) {
74
*inverse = true;
75
return TryMakeOperand2(~imm, op2);
76
} else {
77
*inverse = false;
78
return true;
79
}
80
}
81
82
bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated)
83
{
84
if (!TryMakeOperand2(imm, op2)) {
85
*negated = true;
86
return TryMakeOperand2(-imm, op2);
87
} else {
88
*negated = false;
89
return true;
90
}
91
}
92
93
Operand2 AssumeMakeOperand2(u32 imm) {
94
Operand2 op2;
95
bool result = TryMakeOperand2(imm, op2);
96
_dbg_assert_msg_(result, "Could not make assumed Operand2.");
97
if (!result) {
98
// Make double sure that we get it logged.
99
ERROR_LOG(Log::JIT, "Could not make assumed Operand2.");
100
}
101
return op2;
102
}
103
104
bool ARMXEmitter::TrySetValue_TwoOp(ARMReg reg, u32 val)
105
{
106
int ops = 0;
107
for (int i = 0; i < 16; i++)
108
{
109
if ((val >> (i*2)) & 0x3)
110
{
111
ops++;
112
i+=3;
113
}
114
}
115
if (ops > 2)
116
return false;
117
118
bool first = true;
119
for (int i = 0; i < 16; i++, val >>=2) {
120
if (val & 0x3) {
121
first ? MOV(reg, Operand2((u8)val, (u8)((16-i) & 0xF)))
122
: ORR(reg, reg, Operand2((u8)val, (u8)((16-i) & 0xF)));
123
first = false;
124
i+=3;
125
val >>= 6;
126
}
127
}
128
return true;
129
}
130
131
bool TryMakeFloatIMM8(u32 val, Operand2 &op2)
132
{
133
if ((val & 0x0007FFFF) == 0)
134
{
135
// VFP Encoding for Imms: <7> Not(<6>) Repeat(<6>,5) <5:0> Zeros(19)
136
bool bit6 = (val & 0x40000000) == 0x40000000;
137
bool canEncode = true;
138
for (u32 mask = 0x20000000; mask >= 0x02000000; mask >>= 1)
139
{
140
if (((val & mask) == mask) == bit6)
141
canEncode = false;
142
}
143
if (canEncode)
144
{
145
u32 imm8 = (val & 0x80000000) >> 24; // sign bit
146
imm8 |= (!bit6 << 6);
147
imm8 |= (val & 0x01F80000) >> 19;
148
op2 = IMM(imm8);
149
return true;
150
}
151
}
152
153
return false;
154
}
155
156
void ARMXEmitter::MOVI2FR(ARMReg dest, float val, bool negate)
157
{
158
union {float f; u32 u;} conv;
159
conv.f = negate ? -val : val;
160
MOVI2R(dest, conv.u);
161
}
162
163
void ARMXEmitter::MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate)
164
{
165
union {float f; u32 u;} conv;
166
conv.f = negate ? -val : val;
167
// Try moving directly first if mantisse is empty
168
Operand2 op2;
169
if (TryMakeFloatIMM8(conv.u, op2))
170
VMOV(dest, op2);
171
else
172
{
173
MOVI2R(tempReg, conv.u);
174
VMOV(dest, tempReg);
175
}
176
// Otherwise, possible to use a literal pool and VLDR directly (+- 1020)
177
}
178
179
void ARMXEmitter::MOVI2F_neon(ARMReg dest, float val, ARMReg tempReg, bool negate)
180
{
181
union {float f; u32 u;} conv;
182
conv.f = negate ? -val : val;
183
// Try moving directly first if mantisse is empty
184
Operand2 op2;
185
if (TryMakeFloatIMM8(conv.u, op2))
186
VMOV_neon(F_32, dest, conv.u);
187
else
188
{
189
MOVI2R(tempReg, conv.u);
190
VDUP(F_32, dest, tempReg);
191
}
192
// Otherwise, possible to use a literal pool and VLD1 directly (+- 1020)
193
}
194
195
void ARMXEmitter::ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
196
{
197
if (!TryADDI2R(rd, rs, val)) {
198
MOVI2R(scratch, val);
199
ADD(rd, rs, scratch);
200
}
201
}
202
203
bool ARMXEmitter::TryADDI2R(ARMReg rd, ARMReg rs, u32 val)
204
{
205
if (val == 0) {
206
if (rd != rs)
207
MOV(rd, rs);
208
return true;
209
}
210
Operand2 op2;
211
bool negated;
212
if (TryMakeOperand2_AllowNegation(val, op2, &negated)) {
213
if (!negated)
214
ADD(rd, rs, op2);
215
else
216
SUB(rd, rs, op2);
217
return true;
218
} else {
219
// Try 16-bit additions and subtractions - easy to test for.
220
// Should also try other rotations...
221
if ((val & 0xFFFF0000) == 0) {
222
// Decompose into two additions.
223
ADD(rd, rs, Operand2((u8)(val >> 8), 12)); // rotation right by 12*2 == rotation left by 8
224
ADD(rd, rd, Operand2((u8)(val), 0));
225
return true;
226
} else if ((((u32)-(s32)val) & 0xFFFF0000) == 0) {
227
val = (u32)-(s32)val;
228
SUB(rd, rs, Operand2((u8)(val >> 8), 12));
229
SUB(rd, rd, Operand2((u8)(val), 0));
230
return true;
231
} else {
232
return false;
233
}
234
}
235
}
236
237
void ARMXEmitter::SUBI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
238
{
239
if (!TrySUBI2R(rd, rs, val)) {
240
MOVI2R(scratch, val);
241
SUB(rd, rs, scratch);
242
}
243
}
244
245
bool ARMXEmitter::TrySUBI2R(ARMReg rd, ARMReg rs, u32 val)
246
{
247
// Just add a negative.
248
return TryADDI2R(rd, rs, (u32)-(s32)val);
249
}
250
251
void ARMXEmitter::ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
252
{
253
if (!TryANDI2R(rd, rs, val)) {
254
MOVI2R(scratch, val);
255
AND(rd, rs, scratch);
256
}
257
}
258
259
bool ARMXEmitter::TryANDI2R(ARMReg rd, ARMReg rs, u32 val)
260
{
261
Operand2 op2;
262
bool inverse;
263
if (val == 0) {
264
// Avoid the ALU, may improve pipeline.
265
MOV(rd, 0);
266
return true;
267
} else if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) {
268
if (!inverse) {
269
AND(rd, rs, op2);
270
} else {
271
BIC(rd, rs, op2);
272
}
273
return true;
274
} else {
275
#if PPSSPP_ARCH(ARMV7)
276
// Check if we have a single pattern of sequential bits.
277
int seq = -1;
278
for (int i = 0; i < 32; ++i) {
279
if (((val >> i) & 1) == 0) {
280
if (seq == -1) {
281
// The width is all bits previous to this, set to 1.
282
seq = i;
283
}
284
} else if (seq != -1) {
285
// Uh oh, more than one sequence.
286
seq = -2;
287
}
288
}
289
290
if (seq > 0) {
291
UBFX(rd, rs, 0, seq);
292
return true;
293
}
294
#endif
295
296
int ops = 0;
297
for (int i = 0; i < 32; i += 2) {
298
u8 bits = RotR(val, i) & 0xFF;
299
// If either low bit is not set, we need to use a BIC for them.
300
if ((bits & 3) != 3) {
301
++ops;
302
i += 8 - 2;
303
}
304
}
305
306
// The worst case is 4 (e.g. 0x55555555.)
307
#if PPSSPP_ARCH(ARMV7)
308
if (ops > 3) {
309
return false;
310
}
311
#endif
312
bool first = true;
313
for (int i = 0; i < 32; i += 2) {
314
u8 bits = RotR(val, i) & 0xFF;
315
if ((bits & 3) != 3) {
316
u8 rotation = i == 0 ? 0 : 16 - i / 2;
317
if (first) {
318
BIC(rd, rs, Operand2(~bits, rotation));
319
first = false;
320
} else {
321
BIC(rd, rd, Operand2(~bits, rotation));
322
}
323
// Well, we took care of these other bits while we were at it.
324
i += 8 - 2;
325
}
326
}
327
return true;
328
}
329
}
330
331
void ARMXEmitter::CMPI2R(ARMReg rs, u32 val, ARMReg scratch)
332
{
333
if (!TryCMPI2R(rs, val)) {
334
MOVI2R(scratch, val);
335
CMP(rs, scratch);
336
}
337
}
338
339
bool ARMXEmitter::TryCMPI2R(ARMReg rs, u32 val)
340
{
341
Operand2 op2;
342
bool negated;
343
if (TryMakeOperand2_AllowNegation(val, op2, &negated)) {
344
if (!negated)
345
CMP(rs, op2);
346
else
347
CMN(rs, op2);
348
return true;
349
} else {
350
return false;
351
}
352
}
353
354
void ARMXEmitter::TSTI2R(ARMReg rs, u32 val, ARMReg scratch)
355
{
356
if (!TryTSTI2R(rs, val)) {
357
MOVI2R(scratch, val);
358
TST(rs, scratch);
359
}
360
}
361
362
bool ARMXEmitter::TryTSTI2R(ARMReg rs, u32 val)
363
{
364
Operand2 op2;
365
if (TryMakeOperand2(val, op2)) {
366
TST(rs, op2);
367
return true;
368
} else {
369
return false;
370
}
371
}
372
373
void ARMXEmitter::ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
374
{
375
if (!TryORI2R(rd, rs, val)) {
376
MOVI2R(scratch, val);
377
ORR(rd, rs, scratch);
378
}
379
}
380
381
bool ARMXEmitter::TryORI2R(ARMReg rd, ARMReg rs, u32 val)
382
{
383
Operand2 op2;
384
if (val == 0) {
385
// Avoid the ALU, may improve pipeline.
386
if (rd != rs) {
387
MOV(rd, rs);
388
}
389
return true;
390
} else if (TryMakeOperand2(val, op2)) {
391
ORR(rd, rs, op2);
392
return true;
393
} else {
394
int ops = 0;
395
for (int i = 0; i < 32; i += 2) {
396
u8 bits = RotR(val, i) & 0xFF;
397
// If either low bit is set, we need to use a ORR for them.
398
if ((bits & 3) != 0) {
399
++ops;
400
i += 8 - 2;
401
}
402
}
403
404
// The worst case is 4 (e.g. 0x55555555.) But MVN can make it 2. Not sure if better.
405
bool inversed;
406
if (TryMakeOperand2_AllowInverse(val, op2, &inversed) && ops >= 3) {
407
return false;
408
#if PPSSPP_ARCH(ARMV7)
409
} else if (ops > 3) {
410
return false;
411
#endif
412
}
413
414
bool first = true;
415
for (int i = 0; i < 32; i += 2) {
416
u8 bits = RotR(val, i) & 0xFF;
417
if ((bits & 3) != 0) {
418
u8 rotation = i == 0 ? 0 : 16 - i / 2;
419
if (first) {
420
ORR(rd, rs, Operand2(bits, rotation));
421
first = false;
422
} else {
423
ORR(rd, rd, Operand2(bits, rotation));
424
}
425
// Well, we took care of these other bits while we were at it.
426
i += 8 - 2;
427
}
428
}
429
return true;
430
}
431
}
432
433
void ARMXEmitter::EORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
434
{
435
if (!TryEORI2R(rd, rs, val)) {
436
MOVI2R(scratch, val);
437
EOR(rd, rs, scratch);
438
}
439
}
440
441
bool ARMXEmitter::TryEORI2R(ARMReg rd, ARMReg rs, u32 val)
442
{
443
Operand2 op2;
444
if (val == 0) {
445
if (rd != rs) {
446
MOV(rd, rs);
447
}
448
return true;
449
} else if (TryMakeOperand2(val, op2)) {
450
EOR(rd, rs, op2);
451
return true;
452
} else {
453
return false;
454
}
455
}
456
457
void ARMXEmitter::FlushLitPool()
458
{
459
for (LiteralPool& pool : currentLitPool) {
460
// Search for duplicates
461
for (LiteralPool& old_pool : currentLitPool) {
462
if (old_pool.val == pool.val)
463
pool.loc = old_pool.loc;
464
}
465
466
// Write the constant to Literal Pool
467
if (!pool.loc)
468
{
469
pool.loc = (intptr_t)code;
470
Write32(pool.val);
471
}
472
s32 offset = (s32)(pool.loc - (intptr_t)pool.ldr_address - 8);
473
474
// Backpatch the LDR
475
*(u32*)pool.ldr_address |= (offset >= 0) << 23 | abs(offset);
476
}
477
// TODO: Save a copy of previous pools in case they are still in range.
478
currentLitPool.clear();
479
}
480
481
void ARMXEmitter::AddNewLit(u32 val)
482
{
483
LiteralPool pool_item;
484
pool_item.loc = 0;
485
pool_item.val = val;
486
pool_item.ldr_address = code;
487
currentLitPool.push_back(pool_item);
488
}
489
490
void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize)
491
{
492
Operand2 op2;
493
bool inverse;
494
495
#if PPSSPP_ARCH(ARMV7)
496
// Unused
497
if (!optimize)
498
{
499
// For backpatching on ARMv7
500
MOVW(reg, val & 0xFFFF);
501
MOVT(reg, val, true);
502
return;
503
}
504
#endif
505
506
if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) {
507
inverse ? MVN(reg, op2) : MOV(reg, op2);
508
} else {
509
#if PPSSPP_ARCH(ARMV7)
510
// Use MOVW+MOVT for ARMv7+
511
MOVW(reg, val & 0xFFFF);
512
if(val & 0xFFFF0000)
513
MOVT(reg, val, true);
514
#else
515
if (!TrySetValue_TwoOp(reg,val)) {
516
bool first = true;
517
for (int i = 0; i < 32; i += 2) {
518
u8 bits = RotR(val, i) & 0xFF;
519
if ((bits & 3) != 0) {
520
u8 rotation = i == 0 ? 0 : 16 - i / 2;
521
if (first) {
522
MOV(reg, Operand2(bits, rotation));
523
first = false;
524
} else {
525
ORR(reg, reg, Operand2(bits, rotation));
526
}
527
// Well, we took care of these other bits while we were at it.
528
i += 8 - 2;
529
}
530
}
531
// Use literal pool for ARMv6.
532
// Disabled for now as it is crashfing since Vertex Decoder JIT
533
// AddNewLit(val);
534
// LDR(reg, R_PC); // To be backpatched later
535
}
536
#endif
537
}
538
}
539
540
static const char *const armRegStrings[] = {
541
"r0","r1","r2","r3",
542
"r4","r5","r6","r7",
543
"r8","r9","r10","r11",
544
"r12","r13","r14","PC",
545
546
"s0", "s1", "s2", "s3",
547
"s4", "s5", "s6", "s7",
548
"s8", "s9", "s10", "s11",
549
"s12", "s13", "s14", "s15",
550
551
"s16", "s17", "s18", "s19",
552
"s20", "s21", "s22", "s23",
553
"s24", "s25", "s26", "s27",
554
"s28", "s29", "s30", "s31",
555
556
"d0", "d1", "d2", "d3",
557
"d4", "d5", "d6", "d7",
558
"d8", "d9", "d10", "d11",
559
"d12", "d13", "d14", "d15",
560
561
"d16", "d17", "d18", "d19",
562
"d20", "d21", "d22", "d23",
563
"d24", "d25", "d26", "d27",
564
"d28", "d29", "d30", "d31",
565
566
"q0", "q1", "q2", "q3",
567
"q4", "q5", "q6", "q7",
568
"q8", "q9", "q10", "q11",
569
"q12", "q13", "q14", "q15",
570
};
571
572
const char *ARMRegAsString(ARMReg reg) {
573
if ((unsigned int)reg >= sizeof(armRegStrings)/sizeof(armRegStrings[0]))
574
return "(bad)";
575
return armRegStrings[(int)reg];
576
}
577
578
void ARMXEmitter::QuickCallFunction(ARMReg reg, const void *func) {
579
if (BLInRange(func)) {
580
BL(func);
581
} else {
582
MOVP2R(reg, func);
583
BL(reg);
584
}
585
}
586
587
void ARMXEmitter::SetCodePointer(u8 *ptr, u8 *writePtr)
588
{
589
code = ptr;
590
startcode = code;
591
lastCacheFlushEnd = ptr;
592
}
593
594
const u8 *ARMXEmitter::GetCodePointer() const
595
{
596
return code;
597
}
598
599
u8 *ARMXEmitter::GetWritableCodePtr()
600
{
601
return code;
602
}
603
604
void ARMXEmitter::ReserveCodeSpace(u32 bytes)
605
{
606
for (u32 i = 0; i < bytes/4; i++)
607
Write32(0xE1200070); //bkpt 0
608
}
609
610
const u8 *ARMXEmitter::AlignCode16()
611
{
612
ReserveCodeSpace((-(intptr_t)code) & 15);
613
return code;
614
}
615
616
const u8 *ARMXEmitter::NopAlignCode16() {
617
int bytes = ((-(intptr_t)code) & 15);
618
for (int i = 0; i < bytes / 4; i++) {
619
Write32(0xE320F000); // one of many possible nops
620
}
621
return code;
622
}
623
624
const u8 *ARMXEmitter::AlignCodePage()
625
{
626
ReserveCodeSpace((-(intptr_t)code) & 4095);
627
return code;
628
}
629
630
void ARMXEmitter::FlushIcache()
631
{
632
FlushIcacheSection(lastCacheFlushEnd, code);
633
lastCacheFlushEnd = code;
634
}
635
636
void ARMXEmitter::FlushIcacheSection(u8 *start, u8 *end)
637
{
638
#if PPSSPP_PLATFORM(IOS)
639
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
640
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
641
#elif PPSSPP_PLATFORM(WINDOWS)
642
FlushInstructionCache(GetCurrentProcess(), start, end - start);
643
#elif PPSSPP_ARCH(ARM)
644
645
#if defined(__clang__) || defined(__ANDROID__)
646
__clear_cache(start, end);
647
#else
648
__builtin___clear_cache(start, end);
649
#endif
650
651
#endif
652
}
653
654
void ARMXEmitter::SetCC(CCFlags cond)
655
{
656
condition = cond << 28;
657
}
658
659
void ARMXEmitter::NOP(int count)
660
{
661
for (int i = 0; i < count; i++) {
662
Write32(condition | 0x01A00000);
663
}
664
}
665
666
void ARMXEmitter::SETEND(bool BE)
667
{
668
//SETEND is non-conditional
669
Write32(0xF1010000 | (BE << 9));
670
}
671
void ARMXEmitter::BKPT(u16 arg)
672
{
673
Write32(condition | 0x01200070 | (arg << 4 & 0x000FFF00) | (arg & 0x0000000F));
674
}
675
void ARMXEmitter::YIELD()
676
{
677
Write32(condition | 0x0320F001);
678
}
679
680
FixupBranch ARMXEmitter::B()
681
{
682
FixupBranch branch;
683
branch.type = 0; // Zero for B
684
branch.ptr = code;
685
branch.condition = condition;
686
//We'll write NOP here for now.
687
Write32(condition | 0x01A00000);
688
return branch;
689
}
690
FixupBranch ARMXEmitter::BL()
691
{
692
FixupBranch branch;
693
branch.type = 1; // Zero for B
694
branch.ptr = code;
695
branch.condition = condition;
696
//We'll write NOP here for now.
697
Write32(condition | 0x01A00000);
698
return branch;
699
}
700
701
FixupBranch ARMXEmitter::B_CC(CCFlags Cond)
702
{
703
FixupBranch branch;
704
branch.type = 0; // Zero for B
705
branch.ptr = code;
706
branch.condition = Cond << 28;
707
//We'll write NOP here for now.
708
Write32(condition | 0x01A00000);
709
return branch;
710
}
711
void ARMXEmitter::B_CC(CCFlags Cond, const void *fnptr)
712
{
713
ptrdiff_t distance = (intptr_t)fnptr - ((intptr_t)(code) + 8);
714
_assert_msg_(distance > -0x2000000 && distance < 0x2000000,
715
"B_CC out of range (%p calls %p)", code, fnptr);
716
717
Write32((Cond << 28) | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF));
718
}
719
FixupBranch ARMXEmitter::BL_CC(CCFlags Cond)
720
{
721
FixupBranch branch;
722
branch.type = 1; // Zero for B
723
branch.ptr = code;
724
branch.condition = Cond << 28;
725
//We'll write NOP here for now.
726
Write32(condition | 0x01A00000);
727
return branch;
728
}
729
void ARMXEmitter::SetJumpTarget(FixupBranch const &branch)
730
{
731
ptrdiff_t distance = ((intptr_t)(code) - 8) - (intptr_t)branch.ptr;
732
_assert_msg_(distance > -0x2000000 && distance < 0x2000000,
733
"SetJumpTarget out of range (%p calls %p)", code, branch.ptr);
734
u32 instr = (u32)(branch.condition | ((distance >> 2) & 0x00FFFFFF));
735
instr |= branch.type == 0 ? /* B */ 0x0A000000 : /* BL */ 0x0B000000;
736
*(u32*)branch.ptr = instr;
737
}
738
void ARMXEmitter::B(const void *fnptr)
739
{
740
ptrdiff_t distance = (intptr_t)fnptr - (intptr_t(code) + 8);
741
_assert_msg_(distance > -0x2000000 && distance < 0x2000000,
742
"B out of range (%p calls %p)", code, fnptr);
743
744
Write32(condition | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF));
745
}
746
747
void ARMXEmitter::B(ARMReg src)
748
{
749
Write32(condition | 0x012FFF10 | src);
750
}
751
752
bool ARMXEmitter::BLInRange(const void *fnptr) const {
753
ptrdiff_t distance = (intptr_t)fnptr - (intptr_t(code) + 8);
754
if (distance <= -0x2000000 || distance >= 0x2000000)
755
return false;
756
else
757
return true;
758
}
759
760
void ARMXEmitter::BL(const void *fnptr)
761
{
762
ptrdiff_t distance = (intptr_t)fnptr - (intptr_t(code) + 8);
763
_assert_msg_(distance > -0x2000000 && distance < 0x2000000,
764
"BL out of range (%p calls %p)", code, fnptr);
765
Write32(condition | 0x0B000000 | ((distance >> 2) & 0x00FFFFFF));
766
}
767
void ARMXEmitter::BL(ARMReg src)
768
{
769
Write32(condition | 0x012FFF30 | src);
770
}
771
772
void ARMXEmitter::PUSH(const int num, ...)
773
{
774
u16 RegList = 0;
775
u8 Reg;
776
int i;
777
va_list vl;
778
va_start(vl, num);
779
for (i = 0; i < num; i++) {
780
Reg = va_arg(vl, u32);
781
RegList |= (1 << Reg);
782
}
783
va_end(vl);
784
Write32(condition | (2349 << 16) | RegList);
785
}
786
787
void ARMXEmitter::POP(const int num, ...)
788
{
789
u16 RegList = 0;
790
u8 Reg;
791
int i;
792
va_list vl;
793
va_start(vl, num);
794
for (i=0;i<num;i++)
795
{
796
Reg = va_arg(vl, u32);
797
RegList |= (1 << Reg);
798
}
799
va_end(vl);
800
Write32(condition | (2237 << 16) | RegList);
801
}
802
803
void ARMXEmitter::WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2)
804
{
805
Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | op2.Imm5() | (op << 4) | src);
806
}
807
void ARMXEmitter::WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2)
808
{
809
Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | (op2 << 8) | (op << 4) | src);
810
}
811
812
// IMM, REG, IMMSREG, RSR
813
// -1 for invalid if the instruction doesn't support that
814
const s32 InstOps[][4] = {{16, 0, 0, 0}, // AND(s)
815
{17, 1, 1, 1}, // EOR(s)
816
{18, 2, 2, 2}, // SUB(s)
817
{19, 3, 3, 3}, // RSB(s)
818
{20, 4, 4, 4}, // ADD(s)
819
{21, 5, 5, 5}, // ADC(s)
820
{22, 6, 6, 6}, // SBC(s)
821
{23, 7, 7, 7}, // RSC(s)
822
{24, 8, 8, 8}, // TST
823
{25, 9, 9, 9}, // TEQ
824
{26, 10, 10, 10}, // CMP
825
{27, 11, 11, 11}, // CMN
826
{28, 12, 12, 12}, // ORR(s)
827
{29, 13, 13, 13}, // MOV(s)
828
{30, 14, 14, 14}, // BIC(s)
829
{31, 15, 15, 15}, // MVN(s)
830
{24, -1, -1, -1}, // MOVW
831
{26, -1, -1, -1}, // MOVT
832
};
833
834
const char *InstNames[] = { "AND",
835
"EOR",
836
"SUB",
837
"RSB",
838
"ADD",
839
"ADC",
840
"SBC",
841
"RSC",
842
"TST",
843
"TEQ",
844
"CMP",
845
"CMN",
846
"ORR",
847
"MOV",
848
"BIC",
849
"MVN",
850
"MOVW",
851
"MOVT",
852
};
853
854
void ARMXEmitter::AND (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm); }
855
void ARMXEmitter::ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm, true); }
856
void ARMXEmitter::EOR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm); }
857
void ARMXEmitter::EORS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm, true); }
858
void ARMXEmitter::SUB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm); }
859
void ARMXEmitter::SUBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm, true); }
860
void ARMXEmitter::RSB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm); }
861
void ARMXEmitter::RSBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm, true); }
862
void ARMXEmitter::ADD (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm); }
863
void ARMXEmitter::ADDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm, true); }
864
void ARMXEmitter::ADC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm); }
865
void ARMXEmitter::ADCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm, true); }
866
void ARMXEmitter::SBC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm); }
867
void ARMXEmitter::SBCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm, true); }
868
void ARMXEmitter::RSC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm); }
869
void ARMXEmitter::RSCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm, true); }
870
void ARMXEmitter::TST ( ARMReg Rn, Operand2 Rm) { WriteInstruction(8, R0, Rn, Rm, true); }
871
void ARMXEmitter::TEQ ( ARMReg Rn, Operand2 Rm) { WriteInstruction(9, R0, Rn, Rm, true); }
872
void ARMXEmitter::CMP ( ARMReg Rn, Operand2 Rm) { WriteInstruction(10, R0, Rn, Rm, true); }
873
void ARMXEmitter::CMN ( ARMReg Rn, Operand2 Rm) { WriteInstruction(11, R0, Rn, Rm, true); }
874
void ARMXEmitter::ORR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm); }
875
void ARMXEmitter::ORRS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm, true); }
876
void ARMXEmitter::MOV (ARMReg Rd, Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm); }
877
void ARMXEmitter::MOVS(ARMReg Rd, Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm, true); }
878
void ARMXEmitter::BIC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm); }
879
void ARMXEmitter::BICS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm, true); }
880
void ARMXEmitter::MVN (ARMReg Rd, Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm); }
881
void ARMXEmitter::MVNS(ARMReg Rd, Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm, true); }
882
void ARMXEmitter::MOVW(ARMReg Rd, Operand2 Rm) { WriteInstruction(16, Rd, R0, Rm); }
883
void ARMXEmitter::MOVT(ARMReg Rd, Operand2 Rm, bool TopBits) { WriteInstruction(17, Rd, R0, TopBits ? Rm.Value >> 16 : Rm); }
884
885
void ARMXEmitter::WriteInstruction (u32 Op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags) // This can get renamed later
886
{
887
s32 op = InstOps[Op][Rm.GetType()]; // Type always decided by last operand
888
u32 Data = Rm.GetData();
889
if (Rm.GetType() == TYPE_IMM)
890
{
891
switch (Op)
892
{
893
// MOV cases that support IMM16
894
case 16:
895
case 17:
896
Data = Rm.Imm16();
897
break;
898
default:
899
break;
900
}
901
}
902
if (op == -1)
903
_assert_msg_(false, "%s not yet support %d", InstNames[Op], Rm.GetType());
904
Write32(condition | (op << 21) | (SetFlags ? (1 << 20) : 0) | Rn << 16 | Rd << 12 | Data);
905
}
906
907
// Data Operations
908
void ARMXEmitter::WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2)
909
{
910
Write32(condition | (0x7 << 24) | (Op << 20) | (dest << 16) | (Op2 << 12) | (r1 << 8) | (Op3 << 5) | (1 << 4) | r2);
911
}
912
void ARMXEmitter::UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor)
913
{
914
_assert_msg_(cpu_info.bIDIVa, "Trying to use integer divide on hardware that doesn't support it.");
915
WriteSignedMultiply(3, 0xF, 0, dest, divisor, dividend);
916
}
917
void ARMXEmitter::SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor)
918
{
919
_assert_msg_(cpu_info.bIDIVa, "Trying to use integer divide on hardware that doesn't support it.");
920
WriteSignedMultiply(1, 0xF, 0, dest, divisor, dividend);
921
}
922
923
void ARMXEmitter::LSL (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, false, dest, src, op2);}
924
void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, true, dest, src, op2);}
925
void ARMXEmitter::LSL (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, false, dest, src, op2);}
926
void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, true, dest, src, op2);}
927
void ARMXEmitter::LSR (ARMReg dest, ARMReg src, Operand2 op2) {
928
_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "LSR must have a non-zero shift (use LSL.)");
929
WriteShiftedDataOp(2, false, dest, src, op2);
930
}
931
void ARMXEmitter::LSRS(ARMReg dest, ARMReg src, Operand2 op2) {
932
_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "LSRS must have a non-zero shift (use LSLS.)");
933
WriteShiftedDataOp(2, true, dest, src, op2);
934
}
935
void ARMXEmitter::LSR (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(3, false, dest, src, op2);}
936
void ARMXEmitter::LSRS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(3, true, dest, src, op2);}
937
void ARMXEmitter::ASR (ARMReg dest, ARMReg src, Operand2 op2) {
938
_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "ASR must have a non-zero shift (use LSL.)");
939
WriteShiftedDataOp(4, false, dest, src, op2);
940
}
941
void ARMXEmitter::ASRS(ARMReg dest, ARMReg src, Operand2 op2) {
942
_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "ASRS must have a non-zero shift (use LSLS.)");
943
WriteShiftedDataOp(4, true, dest, src, op2);
944
}
945
void ARMXEmitter::ASR (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(5, false, dest, src, op2);}
946
void ARMXEmitter::ASRS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(5, true, dest, src, op2);}
947
948
void ARMXEmitter::MUL (ARMReg dest, ARMReg src, ARMReg op2)
949
{
950
Write32(condition | (dest << 16) | (src << 8) | (9 << 4) | op2);
951
}
952
void ARMXEmitter::MULS(ARMReg dest, ARMReg src, ARMReg op2)
953
{
954
Write32(condition | (1 << 20) | (dest << 16) | (src << 8) | (9 << 4) | op2);
955
}
956
957
void ARMXEmitter::Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) {
958
Write32(condition | (op << 20) | (destHi << 16) | (destLo << 12) | (rm << 8) | (9 << 4) | rn);
959
}
960
961
void ARMXEmitter::UMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
962
{
963
Write4OpMultiply(0x8, destLo, destHi, rn, rm);
964
}
965
966
void ARMXEmitter::SMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
967
{
968
Write4OpMultiply(0xC, destLo, destHi, rn, rm);
969
}
970
971
void ARMXEmitter::UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
972
{
973
Write4OpMultiply(0xA, destLo, destHi, rn, rm);
974
}
975
976
void ARMXEmitter::SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
977
{
978
Write4OpMultiply(0xE, destLo, destHi, rn, rm);
979
}
980
981
void ARMXEmitter::UBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width)
982
{
983
Write32(condition | (0x7E0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn);
984
}
985
986
void ARMXEmitter::SBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width)
987
{
988
Write32(condition | (0x7A0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn);
989
}
990
991
void ARMXEmitter::CLZ(ARMReg rd, ARMReg rm)
992
{
993
Write32(condition | (0x16F << 16) | (rd << 12) | (0xF1 << 4) | rm);
994
}
995
996
void ARMXEmitter::PLD(ARMReg rn, int offset, bool forWrite) {
997
_dbg_assert_msg_(offset < 0x3ff && offset > -0x3ff, "PLD: Max 12 bits of offset allowed");
998
999
bool U = offset >= 0;
1000
if (offset < 0) offset = -offset;
1001
bool R = !forWrite;
1002
// Conditions not allowed
1003
Write32((0xF5 << 24) | (U << 23) | (R << 22) | (1 << 20) | ((int)rn << 16) | (0xF << 12) | offset);
1004
}
1005
1006
1007
void ARMXEmitter::BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width)
1008
{
1009
u32 msb = (lsb + width - 1);
1010
if (msb > 31) msb = 31;
1011
Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | rn);
1012
}
1013
1014
void ARMXEmitter::BFC(ARMReg rd, u8 lsb, u8 width)
1015
{
1016
u32 msb = (lsb + width - 1);
1017
if (msb > 31) msb = 31;
1018
Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | 15);
1019
}
1020
1021
void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2)
1022
{
1023
Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2);
1024
}
1025
1026
void ARMXEmitter::SXTH (ARMReg dest, ARMReg op2, u8 rotation)
1027
{
1028
SXTAH(dest, (ARMReg)15, op2, rotation);
1029
}
1030
void ARMXEmitter::SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation)
1031
{
1032
// bits ten and 11 are the rotation amount, see 8.8.232 for more
1033
// information
1034
Write32(condition | (0x6B << 20) | (src << 16) | (dest << 12) | (rotation << 10) | (7 << 4) | op2);
1035
}
1036
void ARMXEmitter::RBIT(ARMReg dest, ARMReg src)
1037
{
1038
Write32(condition | (0x6F << 20) | (0xF << 16) | (dest << 12) | (0xF3 << 4) | src);
1039
}
1040
void ARMXEmitter::REV (ARMReg dest, ARMReg src)
1041
{
1042
Write32(condition | (0x6BF << 16) | (dest << 12) | (0xF3 << 4) | src);
1043
}
1044
void ARMXEmitter::REV16(ARMReg dest, ARMReg src)
1045
{
1046
Write32(condition | (0x6BF << 16) | (dest << 12) | (0xFB << 4) | src);
1047
}
1048
1049
void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, Operand2 op2)
1050
{
1051
Write32(condition | (0x320F << 12) | (write_nzcvq << 19) | (write_g << 18) | op2.Imm12Mod());
1052
}
1053
void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, ARMReg src)
1054
{
1055
Write32(condition | (0x120F << 12) | (write_nzcvq << 19) | (write_g << 18) | src);
1056
}
1057
void ARMXEmitter::MRS (ARMReg dest)
1058
{
1059
Write32(condition | (16 << 20) | (15 << 16) | (dest << 12));
1060
}
1061
void ARMXEmitter::LDREX(ARMReg dest, ARMReg base)
1062
{
1063
Write32(condition | (25 << 20) | (base << 16) | (dest << 12) | 0xF9F);
1064
}
1065
void ARMXEmitter::STREX(ARMReg result, ARMReg base, ARMReg op)
1066
{
1067
_assert_msg_((result != base && result != op), "STREX dest can't be other two registers");
1068
Write32(condition | (24 << 20) | (base << 16) | (result << 12) | (0xF9 << 4) | op);
1069
}
1070
void ARMXEmitter::DMB ()
1071
{
1072
Write32(0xF57FF05E);
1073
}
1074
void ARMXEmitter::SVC(Operand2 op)
1075
{
1076
Write32(condition | (0x0F << 24) | op.Imm24());
1077
}
1078
1079
// IMM, REG, IMMSREG, RSR
1080
// -1 for invalid if the instruction doesn't support that
1081
const s32 LoadStoreOps[][4] = {
1082
{0x40, 0x60, 0x60, -1}, // STR
1083
{0x41, 0x61, 0x61, -1}, // LDR
1084
{0x44, 0x64, 0x64, -1}, // STRB
1085
{0x45, 0x65, 0x65, -1}, // LDRB
1086
// Special encodings
1087
{ 0x4, 0x0, -1, -1}, // STRH
1088
{ 0x5, 0x1, -1, -1}, // LDRH
1089
{ 0x5, 0x1, -1, -1}, // LDRSB
1090
{ 0x5, 0x1, -1, -1}, // LDRSH
1091
};
1092
const char *LoadStoreNames[] = {
1093
"STR",
1094
"LDR",
1095
"STRB",
1096
"LDRB",
1097
"STRH",
1098
"LDRH",
1099
"LDRSB",
1100
"LDRSH",
1101
};
1102
1103
void ARMXEmitter::WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 Rm, bool RegAdd)
1104
{
1105
s32 op = LoadStoreOps[Op][Rm.GetType()]; // Type always decided by last operand
1106
u32 Data;
1107
1108
// Qualcomm chipsets get /really/ angry if you don't use index, even if the offset is zero.
1109
// Some of these encodings require Index at all times anyway. Doesn't really matter.
1110
// bool Index = op2 != 0 ? true : false;
1111
bool Index = true;
1112
bool Add = false;
1113
1114
// Special Encoding (misc addressing mode)
1115
bool SpecialOp = false;
1116
bool Half = false;
1117
bool SignedLoad = false;
1118
1119
if (op == -1)
1120
_assert_msg_(false, "%s does not support %d", LoadStoreNames[Op], Rm.GetType());
1121
1122
switch (Op)
1123
{
1124
case 4: // STRH
1125
SpecialOp = true;
1126
Half = true;
1127
SignedLoad = false;
1128
break;
1129
case 5: // LDRH
1130
SpecialOp = true;
1131
Half = true;
1132
SignedLoad = false;
1133
break;
1134
case 6: // LDRSB
1135
SpecialOp = true;
1136
Half = false;
1137
SignedLoad = true;
1138
break;
1139
case 7: // LDRSH
1140
SpecialOp = true;
1141
Half = true;
1142
SignedLoad = true;
1143
break;
1144
}
1145
switch (Rm.GetType())
1146
{
1147
case TYPE_IMM:
1148
{
1149
s32 Temp = (s32)Rm.Value;
1150
Data = abs(Temp);
1151
// The offset is encoded differently on this one.
1152
if (SpecialOp)
1153
Data = ((Data & 0xF0) << 4) | (Data & 0xF);
1154
if (Temp >= 0) Add = true;
1155
}
1156
break;
1157
case TYPE_REG:
1158
Data = Rm.GetData();
1159
Add = RegAdd;
1160
break;
1161
case TYPE_IMMSREG:
1162
if (!SpecialOp)
1163
{
1164
Data = Rm.GetData();
1165
Add = RegAdd;
1166
break;
1167
}
1168
// Intentional fallthrough: TYPE_IMMSREG not supported for misc addressing.
1169
[[fallthrough]];
1170
default:
1171
// RSR not supported for any of these
1172
// We already have the warning above
1173
BKPT(0x2);
1174
return;
1175
break;
1176
}
1177
if (SpecialOp)
1178
{
1179
// Add SpecialOp things
1180
Data = (0x9 << 4) | (SignedLoad << 6) | (Half << 5) | Data;
1181
}
1182
Write32(condition | (op << 20) | (Index << 24) | (Add << 23) | (Rn << 16) | (Rt << 12) | Data);
1183
}
1184
1185
void ARMXEmitter::LDR (ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(1, dest, base, op2, RegAdd);}
1186
void ARMXEmitter::LDRB(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(3, dest, base, op2, RegAdd);}
1187
void ARMXEmitter::LDRH(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(5, dest, base, op2, RegAdd);}
1188
void ARMXEmitter::LDRSB(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(6, dest, base, op2, RegAdd);}
1189
void ARMXEmitter::LDRSH(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(7, dest, base, op2, RegAdd);}
1190
void ARMXEmitter::STR (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(0, result, base, op2, RegAdd);}
1191
void ARMXEmitter::STRH (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(4, result, base, op2, RegAdd);}
1192
void ARMXEmitter::STRB (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(2, result, base, op2, RegAdd);}
1193
1194
#define VA_TO_REGLIST(RegList, Regnum) \
1195
{ \
1196
u8 Reg; \
1197
va_list vl; \
1198
va_start(vl, Regnum); \
1199
for (int i = 0; i < Regnum; i++) \
1200
{ \
1201
Reg = va_arg(vl, u32); \
1202
RegList |= (1 << Reg); \
1203
} \
1204
va_end(vl); \
1205
}
1206
1207
void ARMXEmitter::WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList)
1208
{
1209
Write32(condition | (op << 20) | (WriteBack << 21) | (dest << 16) | RegList);
1210
}
1211
void ARMXEmitter::WriteVRegStoreOp(u32 op, ARMReg Rn, bool Double, bool WriteBack, ARMReg Vd, u8 numregs)
1212
{
1213
_dbg_assert_msg_(!WriteBack || Rn != R_PC, "VLDM/VSTM cannot use WriteBack with PC (PC is deprecated anyway.)");
1214
Write32(condition | (op << 20) | (WriteBack << 21) | (Rn << 16) | EncodeVd(Vd) | ((0xA | (int)Double) << 8) | (numregs << (int)Double));
1215
}
1216
void ARMXEmitter::STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...)
1217
{
1218
u16 RegList = 0;
1219
VA_TO_REGLIST(RegList, Regnum);
1220
WriteRegStoreOp(0x80 | 0x10 | 0, dest, WriteBack, RegList);
1221
}
1222
void ARMXEmitter::LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...)
1223
{
1224
u16 RegList = 0;
1225
VA_TO_REGLIST(RegList, Regnum);
1226
WriteRegStoreOp(0x80 | 0x08 | 1, dest, WriteBack, RegList);
1227
}
1228
void ARMXEmitter::STMIA(ARMReg dest, bool WriteBack, const int Regnum, ...)
1229
{
1230
u16 RegList = 0;
1231
VA_TO_REGLIST(RegList, Regnum);
1232
WriteRegStoreOp(0x80 | 0x08 | 0, dest, WriteBack, RegList);
1233
}
1234
void ARMXEmitter::LDMIA(ARMReg dest, bool WriteBack, const int Regnum, ...)
1235
{
1236
u16 RegList = 0;
1237
VA_TO_REGLIST(RegList, Regnum);
1238
WriteRegStoreOp(0x80 | 0x08 | 1, dest, WriteBack, RegList);
1239
}
1240
void ARMXEmitter::STM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...)
1241
{
1242
u16 RegList = 0;
1243
VA_TO_REGLIST(RegList, Regnum);
1244
WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 0, dest, WriteBack, RegList);
1245
}
1246
void ARMXEmitter::LDM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...)
1247
{
1248
u16 RegList = 0;
1249
VA_TO_REGLIST(RegList, Regnum);
1250
WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 1, dest, WriteBack, RegList);
1251
}
1252
1253
void ARMXEmitter::STMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList)
1254
{
1255
WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 0, dest, WriteBack, RegList);
1256
}
1257
void ARMXEmitter::LDMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList)
1258
{
1259
WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 1, dest, WriteBack, RegList);
1260
}
1261
1262
#undef VA_TO_REGLIST
1263
1264
// NEON Specific
1265
void ARMXEmitter::VABD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1266
{
1267
_assert_msg_(Vd >= D0, "Pass invalid register to VABD(float)");
1268
bool register_quad = Vd >= Q0;
1269
1270
// Gets encoded as a double register
1271
Vd = SubBase(Vd);
1272
Vn = SubBase(Vn);
1273
Vm = SubBase(Vm);
1274
1275
Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
1276
| ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \
1277
| ((Vm & 0x10) << 2) | (Vm & 0xF));
1278
}
1279
void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1280
{
1281
_assert_msg_(Vd >= D0, "Pass invalid register to VADD(integer)");
1282
1283
bool register_quad = Vd >= Q0;
1284
1285
// Gets encoded as a double register
1286
Vd = SubBase(Vd);
1287
Vn = SubBase(Vn);
1288
Vm = SubBase(Vm);
1289
1290
Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
1291
| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \
1292
| ((Vm & 0x10) << 1) | (Vm & 0xF));
1293
1294
}
1295
void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1296
{
1297
_assert_msg_(Vd >= Q0, "Pass invalid register to VSUB(integer)");
1298
1299
// Gets encoded as a double register
1300
Vd = SubBase(Vd);
1301
Vn = SubBase(Vn);
1302
Vm = SubBase(Vm);
1303
1304
Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
1305
| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (1 << 6) \
1306
| ((Vm & 0x10) << 2) | (Vm & 0xF));
1307
}
1308
1309
extern const VFPEnc VFPOps[16][2] = {
1310
{{0xE0, 0xA0}, { -1, -1}}, // 0: VMLA
1311
{{0xE1, 0xA4}, { -1, -1}}, // 1: VNMLA
1312
{{0xE0, 0xA4}, { -1, -1}}, // 2: VMLS
1313
{{0xE1, 0xA0}, { -1, -1}}, // 3: VNMLS
1314
{{0xE3, 0xA0}, { -1, -1}}, // 4: VADD
1315
{{0xE3, 0xA4}, { -1, -1}}, // 5: VSUB
1316
{{0xE2, 0xA0}, { -1, -1}}, // 6: VMUL
1317
{{0xE2, 0xA4}, { -1, -1}}, // 7: VNMUL
1318
{{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 8: VABS(Vn(0x0) used for encoding)
1319
{{0xE8, 0xA0}, { -1, -1}}, // 9: VDIV
1320
{{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 10: VNEG(Vn(0x1) used for encoding)
1321
{{0xEB, 0xAC}, { -1, -1}}, // 11: VSQRT (Vn(0x1) used for encoding)
1322
{{0xEB, 0xA4}, { -1, -1}}, // 12: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding)
1323
{{0xEB, 0xAC}, { -1, -1}}, // 13: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding)
1324
{{ -1, -1}, {0x3B, 0x30}}, // 14: VABSi
1325
};
1326
1327
const char *VFPOpNames[16] = {
1328
"VMLA",
1329
"VNMLA",
1330
"VMLS",
1331
"VNMLS",
1332
"VADD",
1333
"VSUB",
1334
"VMUL",
1335
"VNMUL",
1336
"VABS",
1337
"VDIV",
1338
"VNEG",
1339
"VSQRT",
1340
"VCMP",
1341
"VCMPE",
1342
"VABSi",
1343
};
1344
1345
u32 EncodeVd(ARMReg Vd)
1346
{
1347
bool quad_reg = Vd >= Q0;
1348
bool double_reg = Vd >= D0;
1349
1350
ARMReg Reg = SubBase(Vd);
1351
1352
if (quad_reg)
1353
return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12);
1354
else {
1355
if (double_reg)
1356
return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12);
1357
else
1358
return ((Reg & 0x1) << 22) | ((Reg & 0x1E) << 11);
1359
}
1360
}
1361
u32 EncodeVn(ARMReg Vn)
1362
{
1363
bool quad_reg = Vn >= Q0;
1364
bool double_reg = Vn >= D0;
1365
1366
ARMReg Reg = SubBase(Vn);
1367
if (quad_reg)
1368
return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3);
1369
else {
1370
if (double_reg)
1371
return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3);
1372
else
1373
return ((Reg & 0x1E) << 15) | ((Reg & 0x1) << 7);
1374
}
1375
}
1376
u32 EncodeVm(ARMReg Vm)
1377
{
1378
bool quad_reg = Vm >= Q0;
1379
bool double_reg = Vm >= D0;
1380
1381
ARMReg Reg = SubBase(Vm);
1382
1383
if (quad_reg)
1384
return ((Reg & 0x10) << 1) | (Reg & 0xF);
1385
else {
1386
if (double_reg)
1387
return ((Reg & 0x10) << 1) | (Reg & 0xF);
1388
else
1389
return ((Reg & 0x1) << 5) | (Reg >> 1);
1390
}
1391
}
1392
1393
u32 encodedSize(u32 value)
1394
{
1395
if (value & I_8)
1396
return 0;
1397
else if (value & I_16)
1398
return 1;
1399
else if ((value & I_32) || (value & F_32))
1400
return 2;
1401
else if (value & I_64)
1402
return 3;
1403
else
1404
_dbg_assert_msg_(false, "Passed invalid size to integer NEON instruction");
1405
return 0;
1406
}
1407
1408
ARMReg SubBase(ARMReg Reg)
1409
{
1410
if (Reg >= S0)
1411
{
1412
if (Reg >= D0)
1413
{
1414
if (Reg >= Q0)
1415
return (ARMReg)((Reg - Q0) * 2); // Always gets encoded as a double register
1416
return (ARMReg)(Reg - D0);
1417
}
1418
return (ARMReg)(Reg - S0);
1419
}
1420
return Reg;
1421
}
1422
1423
ARMReg DScalar(ARMReg dreg, int subScalar) {
1424
int dr = (int)(SubBase(dreg)) & 0xF;
1425
int scalar = ((subScalar << 4) | dr);
1426
ARMReg ret = (ARMReg)(D0 + scalar);
1427
// ILOG("Scalar: %i D0: %i AR: %i", scalar, (int)D0, (int)ret);
1428
return ret;
1429
}
1430
1431
// Convert to a DScalar
1432
ARMReg QScalar(ARMReg qreg, int subScalar) {
1433
int dr = (int)(SubBase(qreg)) & 0xF;
1434
if (subScalar & 2) {
1435
dr++;
1436
}
1437
int scalar = (((subScalar & 1) << 4) | dr);
1438
ARMReg ret = (ARMReg)(D0 + scalar);
1439
return ret;
1440
}
1441
1442
void ARMXEmitter::WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1443
{
1444
bool quad_reg = Vd >= Q0;
1445
bool double_reg = Vd >= D0 && Vd < Q0;
1446
1447
VFPEnc enc = VFPOps[Op][quad_reg];
1448
if (enc.opc1 == -1 && enc.opc2 == -1)
1449
_assert_msg_(false, "%s does not support %s", VFPOpNames[Op], quad_reg ? "NEON" : "VFP");
1450
u32 VdEnc = EncodeVd(Vd);
1451
u32 VnEnc = EncodeVn(Vn);
1452
u32 VmEnc = EncodeVm(Vm);
1453
u32 cond = quad_reg ? (0xF << 28) : condition;
1454
1455
Write32(cond | (enc.opc1 << 20) | VnEnc | VdEnc | (enc.opc2 << 4) | (quad_reg << 6) | (double_reg << 8) | VmEnc);
1456
}
1457
void ARMXEmitter::VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(0, Vd, Vn, Vm); }
1458
void ARMXEmitter::VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); }
1459
void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); }
1460
void ARMXEmitter::VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); }
1461
void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); }
1462
void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(5, Vd, Vn, Vm); }
1463
void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); }
1464
void ARMXEmitter::VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(7, Vd, Vn, Vm); }
1465
void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D0, Vm); }
1466
void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(9, Vd, Vn, Vm); }
1467
void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D1, Vm); }
1468
void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(11, Vd, D1, Vm); }
1469
void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(12, Vd, D4, Vm); }
1470
void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(13, Vd, D4, Vm); }
1471
void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp(12, Vd, D5, D0); }
1472
void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp(13, Vd, D5, D0); }
1473
1474
void ARMXEmitter::VLDMIA(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)
1475
{
1476
WriteVRegStoreOp(0x80 | 0x40 | 0x8 | 1, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);
1477
}
1478
1479
void ARMXEmitter::VSTMIA(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)
1480
{
1481
WriteVRegStoreOp(0x80 | 0x40 | 0x8, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);
1482
}
1483
1484
void ARMXEmitter::VLDMDB(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)
1485
{
1486
_dbg_assert_msg_(WriteBack, "Writeback is required for VLDMDB");
1487
WriteVRegStoreOp(0x80 | 0x040 | 0x10 | 1, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);
1488
}
1489
1490
void ARMXEmitter::VSTMDB(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)
1491
{
1492
_dbg_assert_msg_(WriteBack, "Writeback is required for VSTMDB");
1493
WriteVRegStoreOp(0x80 | 0x040 | 0x10, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);
1494
}
1495
1496
void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset)
1497
{
1498
_assert_msg_(Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR");
1499
_assert_msg_(Base <= R15, "Passed invalid Base register to VLDR");
1500
1501
bool Add = offset >= 0 ? true : false;
1502
u32 imm = abs(offset);
1503
1504
_assert_msg_((imm & 0xC03) == 0, "VLDR: Offset needs to be word aligned and small enough");
1505
1506
if (imm & 0xC03)
1507
ERROR_LOG(Log::JIT, "VLDR: Bad offset %08x", imm);
1508
1509
bool single_reg = Dest < D0;
1510
1511
Dest = SubBase(Dest);
1512
1513
if (single_reg)
1514
{
1515
Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \
1516
| ((Dest & 0x1E) << 11) | (10 << 8) | (imm >> 2));
1517
}
1518
else
1519
{
1520
Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \
1521
| ((Dest & 0xF) << 12) | (11 << 8) | (imm >> 2));
1522
}
1523
}
1524
void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, s16 offset)
1525
{
1526
_assert_msg_(Src >= S0 && Src <= D31, "Passed invalid src register to VSTR");
1527
_assert_msg_(Base <= R15, "Passed invalid base register to VSTR");
1528
1529
bool Add = offset >= 0 ? true : false;
1530
u32 imm = abs(offset);
1531
1532
_assert_msg_((imm & 0xC03) == 0, "VSTR: Offset needs to be word aligned and small enough");
1533
1534
if (imm & 0xC03)
1535
ERROR_LOG(Log::JIT, "VSTR: Bad offset %08x", imm);
1536
1537
bool single_reg = Src < D0;
1538
1539
Src = SubBase(Src);
1540
1541
if (single_reg)
1542
{
1543
Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x1) << 22) | (Base << 16) \
1544
| ((Src & 0x1E) << 11) | (10 << 8) | (imm >> 2));
1545
}
1546
else
1547
{
1548
Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x10) << 18) | (Base << 16) \
1549
| ((Src & 0xF) << 12) | (11 << 8) | (imm >> 2));
1550
}
1551
}
1552
1553
void ARMXEmitter::VMRS_APSR() {
1554
Write32(condition | 0x0EF10A10 | (15 << 12));
1555
}
1556
void ARMXEmitter::VMRS(ARMReg Rt) {
1557
Write32(condition | (0xEF << 20) | (1 << 16) | (Rt << 12) | 0xA10);
1558
}
1559
void ARMXEmitter::VMSR(ARMReg Rt) {
1560
Write32(condition | (0xEE << 20) | (1 << 16) | (Rt << 12) | 0xA10);
1561
}
1562
1563
void ARMXEmitter::VMOV(ARMReg Dest, Operand2 op2)
1564
{
1565
int sz = Dest >= D0 ? (1 << 8) : 0;
1566
Write32(condition | (0xEB << 20) | EncodeVd(Dest) | (5 << 9) | sz | op2.Imm8VFP());
1567
}
1568
1569
void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, u32 imm)
1570
{
1571
_assert_msg_(Vd >= D0, "VMOV_neon #imm must target a double or quad");
1572
bool register_quad = Vd >= Q0;
1573
1574
int cmode = 0;
1575
int op = 0;
1576
Operand2 op2 = IMM(0);
1577
1578
u32 imm8 = imm & 0xFF;
1579
imm8 = imm8 | (imm8 << 8) | (imm8 << 16) | (imm8 << 24);
1580
1581
if (Size == I_8) {
1582
imm = imm8;
1583
} else if (Size == I_16) {
1584
imm &= 0xFFFF;
1585
imm = imm | (imm << 16);
1586
}
1587
1588
if ((imm & 0x000000FF) == imm) {
1589
op = 0;
1590
cmode = 0 << 1;
1591
op2 = IMM(imm);
1592
} else if ((imm & 0x0000FF00) == imm) {
1593
op = 0;
1594
cmode = 1 << 1;
1595
op2 = IMM(imm >> 8);
1596
} else if ((imm & 0x00FF0000) == imm) {
1597
op = 0;
1598
cmode = 2 << 1;
1599
op2 = IMM(imm >> 16);
1600
} else if ((imm & 0xFF000000) == imm) {
1601
op = 0;
1602
cmode = 3 << 1;
1603
op2 = IMM(imm >> 24);
1604
} else if ((imm & 0x00FF00FF) == imm && (imm >> 16) == (imm & 0x00FF)) {
1605
op = 0;
1606
cmode = 4 << 1;
1607
op2 = IMM(imm & 0xFF);
1608
} else if ((imm & 0xFF00FF00) == imm && (imm >> 16) == (imm & 0xFF00)) {
1609
op = 0;
1610
cmode = 5 << 1;
1611
op2 = IMM(imm & 0xFF);
1612
} else if ((imm & 0x0000FFFF) == (imm | 0x000000FF)) {
1613
op = 0;
1614
cmode = (6 << 1) | 0;
1615
op2 = IMM(imm >> 8);
1616
} else if ((imm & 0x00FFFFFF) == (imm | 0x0000FFFF)) {
1617
op = 0;
1618
cmode = (6 << 1) | 1;
1619
op2 = IMM(imm >> 16);
1620
} else if (imm == imm8) {
1621
op = 0;
1622
cmode = (7 << 1) | 0;
1623
op2 = IMM(imm & 0xFF);
1624
} else if (TryMakeFloatIMM8(imm, op2)) {
1625
op = 0;
1626
cmode = (7 << 1) | 1;
1627
} else {
1628
// 64-bit constant form - technically we could take a u64.
1629
bool canEncode = true;
1630
u8 imm8 = 0;
1631
for (int i = 0, i8 = 0; i < 32; i += 8, ++i8) {
1632
u8 b = (imm >> i) & 0xFF;
1633
if (b == 0xFF) {
1634
imm8 |= 1 << i8;
1635
} else if (b != 0x00) {
1636
canEncode = false;
1637
}
1638
}
1639
if (canEncode) {
1640
// We don't want zeros in the second lane.
1641
op = 1;
1642
cmode = 7 << 1;
1643
op2 = IMM(imm8 | (imm8 << 4));
1644
} else {
1645
_assert_msg_(false, "VMOV_neon #imm invalid constant value");
1646
}
1647
}
1648
1649
// No condition allowed.
1650
Write32((15 << 28) | (0x28 << 20) | EncodeVd(Vd) | (cmode << 8) | (register_quad << 6) | (op << 5) | (1 << 4) | op2.Imm8ASIMD());
1651
}
1652
1653
void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane)
1654
{
1655
int opc1 = 0;
1656
int opc2 = 0;
1657
1658
switch (Size & ~(I_SIGNED | I_UNSIGNED))
1659
{
1660
case I_8: opc1 = 2 | (lane >> 2); opc2 = lane & 3; break;
1661
case I_16: opc1 = lane >> 1; opc2 = 1 | ((lane & 1) << 1); break;
1662
case I_32:
1663
case F_32:
1664
_assert_msg_((Size & I_UNSIGNED) == 0, "Cannot use UNSIGNED for I_32 or F_32");
1665
opc1 = lane & 1;
1666
break;
1667
default:
1668
_assert_msg_(false, "VMOV_neon unsupported size");
1669
}
1670
1671
if (Vd < S0 && Rt >= D0 && Rt < Q0)
1672
{
1673
// Oh, reading to reg, our params are backwards.
1674
ARMReg Src = Rt;
1675
ARMReg Dest = Vd;
1676
1677
_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED | F_32 | I_32)) != 0, "Must specify I_SIGNED or I_UNSIGNED in VMOV, unless F_32/I_32");
1678
int U = (Size & I_UNSIGNED) ? (1 << 23) : 0;
1679
1680
Write32(condition | (0xE1 << 20) | U | (opc1 << 21) | EncodeVn(Src) | (Dest << 12) | (0xB << 8) | (opc2 << 5) | (1 << 4));
1681
}
1682
else if (Rt < S0 && Vd >= D0 && Vd < Q0)
1683
{
1684
ARMReg Src = Rt;
1685
ARMReg Dest = Vd;
1686
Write32(condition | (0xE0 << 20) | (opc1 << 21) | EncodeVn(Dest) | (Src << 12) | (0xB << 8) | (opc2 << 5) | (1 << 4));
1687
}
1688
else
1689
_assert_msg_(false, "VMOV_neon unsupported arguments (Dx -> Rx or Rx -> Dx)");
1690
}
1691
1692
void ARMXEmitter::VMOV(ARMReg Vd, ARMReg Rt, ARMReg Rt2)
1693
{
1694
if (Vd < S0 && Rt < S0 && Rt2 >= D0)
1695
{
1696
// Oh, reading to regs, our params are backwards.
1697
ARMReg Src = Rt2;
1698
ARMReg Dest1 = Vd;
1699
ARMReg Dest2 = Rt;
1700
Write32(condition | (0xC5 << 20) | (Dest2 << 16) | (Dest1 << 12) | (0xB << 8) | EncodeVm(Src) | (1 << 4));
1701
}
1702
else if (Vd >= D0 && Rt < S0 && Rt2 < S0)
1703
{
1704
ARMReg Dest = Vd;
1705
ARMReg Src1 = Rt;
1706
ARMReg Src2 = Rt2;
1707
Write32(condition | (0xC4 << 20) | (Src2 << 16) | (Src1 << 12) | (0xB << 8) | EncodeVm(Dest) | (1 << 4));
1708
}
1709
else
1710
_assert_msg_(false, "VMOV_neon requires either Dm, Rt, Rt2 or Rt, Rt2, Dm.");
1711
}
1712
1713
void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high)
1714
{
1715
_assert_msg_(Src < S0, "This VMOV doesn't support SRC other than ARM Reg");
1716
_assert_msg_(Dest >= D0, "This VMOV doesn't support DEST other than VFP");
1717
1718
Dest = SubBase(Dest);
1719
1720
Write32(condition | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \
1721
| (0xB << 8) | ((Dest & 0x10) << 3) | (1 << 4));
1722
}
1723
1724
void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src)
1725
{
1726
if (Dest == Src) {
1727
WARN_LOG(Log::JIT, "VMOV %s, %s - same register", ARMRegAsString(Src), ARMRegAsString(Dest));
1728
}
1729
if (Dest > R15)
1730
{
1731
if (Src < S0)
1732
{
1733
if (Dest < D0)
1734
{
1735
// Moving to a Neon register FROM ARM Reg
1736
Dest = (ARMReg)(Dest - S0);
1737
Write32(condition | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \
1738
| (0xA << 8) | ((Dest & 0x1) << 7) | (1 << 4));
1739
return;
1740
}
1741
else
1742
{
1743
// Move 64bit from Arm reg
1744
_assert_msg_(false, "This VMOV doesn't support moving 64bit ARM to NEON");
1745
return;
1746
}
1747
}
1748
}
1749
else
1750
{
1751
if (Src > R15)
1752
{
1753
if (Src < D0)
1754
{
1755
// Moving to ARM Reg from Neon Register
1756
Src = (ARMReg)(Src - S0);
1757
Write32(condition | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \
1758
| (0xA << 8) | ((Src & 0x1) << 7) | (1 << 4));
1759
return;
1760
}
1761
else
1762
{
1763
// Move 64bit To Arm reg
1764
_assert_msg_(false, "This VMOV doesn't support moving 64bit ARM From NEON");
1765
return;
1766
}
1767
}
1768
else
1769
{
1770
// Move Arm reg to Arm reg
1771
_assert_msg_(false, "VMOV doesn't support moving ARM registers");
1772
}
1773
}
1774
// Moving NEON registers
1775
int SrcSize = Src < D0 ? 1 : Src < Q0 ? 2 : 4;
1776
int DestSize = Dest < D0 ? 1 : Dest < Q0 ? 2 : 4;
1777
bool Single = DestSize == 1;
1778
bool Quad = DestSize == 4;
1779
1780
_assert_msg_(SrcSize == DestSize, "VMOV doesn't support moving different register sizes");
1781
if (SrcSize != DestSize) {
1782
ERROR_LOG(Log::JIT, "SrcSize: %i (%s) DestDize: %i (%s)", SrcSize, ARMRegAsString(Src), DestSize, ARMRegAsString(Dest));
1783
}
1784
1785
Dest = SubBase(Dest);
1786
Src = SubBase(Src);
1787
1788
if (Single)
1789
{
1790
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \
1791
| (0x5 << 9) | (1 << 6) | ((Src & 0x1) << 5) | ((Src & 0x1E) >> 1));
1792
}
1793
else
1794
{
1795
// Double and quad
1796
if (Quad)
1797
{
1798
// Gets encoded as a Double register
1799
Write32((0xF2 << 24) | ((Dest & 0x10) << 18) | (2 << 20) | ((Src & 0xF) << 16) \
1800
| ((Dest & 0xF) << 12) | (1 << 8) | ((Src & 0x10) << 3) | (1 << 6) \
1801
| ((Src & 0x10) << 1) | (1 << 4) | (Src & 0xF));
1802
1803
}
1804
else
1805
{
1806
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \
1807
| (0x2D << 6) | ((Src & 0x10) << 1) | (Src & 0xF));
1808
}
1809
}
1810
}
1811
1812
void ARMXEmitter::VCVT(ARMReg Dest, ARMReg Source, int flags)
1813
{
1814
bool single_reg = (Dest < D0) && (Source < D0);
1815
bool single_double = !single_reg && (Source < D0 || Dest < D0);
1816
bool single_to_double = Source < D0;
1817
int op = ((flags & TO_INT) ? (flags & ROUND_TO_ZERO) : (flags & IS_SIGNED)) ? 1 : 0;
1818
int op2 = ((flags & TO_INT) ? (flags & IS_SIGNED) : 0) ? 1 : 0;
1819
Dest = SubBase(Dest);
1820
Source = SubBase(Source);
1821
1822
if (single_double)
1823
{
1824
// S32<->F64
1825
if (flags & TO_INT)
1826
{
1827
if (single_to_double)
1828
{
1829
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) \
1830
| ((Dest & 0xF) << 12) | (op << 7) | (0x2D << 6) | ((Source & 0x1) << 5) | (Source >> 1));
1831
} else {
1832
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
1833
| ((Dest & 0x1E) << 11) | (op << 7) | (0x2D << 6) | ((Source & 0x10) << 1) | (Source & 0xF));
1834
}
1835
}
1836
// F32<->F64
1837
else {
1838
if (single_to_double)
1839
{
1840
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | (0x7 << 16) \
1841
| ((Dest & 0xF) << 12) | (0x2F << 6) | ((Source & 0x1) << 5) | (Source >> 1));
1842
} else {
1843
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | (0x7 << 16) \
1844
| ((Dest & 0x1E) << 11) | (0x2B << 6) | ((Source & 0x10) << 1) | (Source & 0xF));
1845
}
1846
}
1847
} else if (single_reg) {
1848
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
1849
| ((Dest & 0x1E) << 11) | (op << 7) | (0x29 << 6) | ((Source & 0x1) << 5) | (Source >> 1));
1850
} else {
1851
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
1852
| ((Dest & 0xF) << 12) | (1 << 8) | (op << 7) | (0x29 << 6) | ((Source & 0x10) << 1) | (Source & 0xF));
1853
}
1854
}
1855
1856
void ARMXEmitter::VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1857
{
1858
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1859
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1860
bool register_quad = Vd >= Q0;
1861
1862
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \
1863
| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x71 << 4) | (register_quad << 6) | EncodeVm(Vm));
1864
}
1865
1866
void ARMXEmitter::VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1867
{
1868
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
1869
_dbg_assert_msg_(Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__);
1870
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
1871
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1872
1873
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \
1874
| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x50 << 4) | EncodeVm(Vm));
1875
}
1876
1877
void ARMXEmitter::VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1878
{
1879
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1880
bool register_quad = Vd >= Q0;
1881
1882
if (Size & F_32)
1883
Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD << 8) | EncodeVm(Vm));
1884
else
1885
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \
1886
| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | (register_quad << 6) | EncodeVm(Vm));
1887
}
1888
1889
void ARMXEmitter::VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1890
{
1891
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
1892
_dbg_assert_msg_(Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__);
1893
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
1894
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1895
1896
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \
1897
| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | EncodeVm(Vm));
1898
}
1899
1900
void ARMXEmitter::VABS(u32 Size, ARMReg Vd, ARMReg Vm)
1901
{
1902
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1903
bool register_quad = Vd >= Q0;
1904
1905
Write32((0xF3 << 24) | (0xB1 << 16) | (encodedSize(Size) << 18) | EncodeVd(Vd) \
1906
| ((Size & F_32 ? 1 : 0) << 10) | (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm));
1907
}
1908
1909
void ARMXEmitter::VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1910
{
1911
// Only Float
1912
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1913
bool register_quad = Vd >= Q0;
1914
1915
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) \
1916
| (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
1917
}
1918
1919
void ARMXEmitter::VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1920
{
1921
// Only Float
1922
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1923
bool register_quad = Vd >= Q0;
1924
1925
Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) \
1926
| (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
1927
}
1928
1929
void ARMXEmitter::VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1930
{
1931
VACGE(Vd, Vm, Vn);
1932
}
1933
1934
void ARMXEmitter::VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1935
{
1936
VACGT(Vd, Vn, Vm);
1937
}
1938
1939
void ARMXEmitter::VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1940
{
1941
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1942
1943
bool register_quad = Vd >= Q0;
1944
1945
if (Size & F_32)
1946
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD << 8) | (register_quad << 6) | EncodeVm(Vm));
1947
else
1948
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \
1949
| (0x8 << 8) | (register_quad << 6) | EncodeVm(Vm));
1950
}
1951
1952
void ARMXEmitter::VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1953
{
1954
_dbg_assert_msg_(Vd < Q0, "Pass invalid register to %s", __FUNCTION__);
1955
_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);
1956
_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);
1957
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1958
1959
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \
1960
| EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm));
1961
}
1962
1963
void ARMXEmitter::VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1964
{
1965
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
1966
_dbg_assert_msg_(Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__);
1967
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
1968
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1969
1970
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \
1971
| EncodeVd(Vd) | EncodeVm(Vm));
1972
}
1973
void ARMXEmitter::VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1974
{
1975
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
1976
_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);
1977
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
1978
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1979
1980
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \
1981
| EncodeVd(Vd) | (1 << 8) | EncodeVm(Vm));
1982
}
1983
void ARMXEmitter::VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1984
{
1985
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1986
_dbg_assert_msg_(!(Vd == Vn && Vn == Vm), "All operands the same for %s is a nop", __FUNCTION__);
1987
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1988
bool register_quad = Vd >= Q0;
1989
1990
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
1991
}
1992
void ARMXEmitter::VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1993
{
1994
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1995
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1996
bool register_quad = Vd >= Q0;
1997
1998
Write32((0xF2 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
1999
}
2000
void ARMXEmitter::VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2001
{
2002
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s: %i", __FUNCTION__, Vd);
2003
bool register_quad = Vd >= Q0;
2004
2005
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2006
}
2007
void ARMXEmitter::VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2008
{
2009
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2010
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2011
bool register_quad = Vd >= Q0;
2012
2013
Write32((0xF3 << 24) | (3 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2014
}
2015
void ARMXEmitter::VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2016
{
2017
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2018
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2019
bool register_quad = Vd >= Q0;
2020
2021
Write32((0xF3 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2022
}
2023
void ARMXEmitter::VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2024
{
2025
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2026
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2027
bool register_quad = Vd >= Q0;
2028
2029
Write32((0xF3 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2030
}
2031
void ARMXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2032
{
2033
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2034
2035
bool register_quad = Vd >= Q0;
2036
if (Size & F_32)
2037
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2038
else
2039
Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \
2040
| (0x81 << 4) | (register_quad << 6) | EncodeVm(Vm));
2041
2042
}
2043
void ARMXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vm)
2044
{
2045
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2046
2047
bool register_quad = Vd >= Q0;
2048
2049
Write32((0xF2 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2050
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm));
2051
}
2052
void ARMXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2053
{
2054
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2055
2056
bool register_quad = Vd >= Q0;
2057
if (Size & F_32)
2058
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2059
else
2060
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \
2061
| (0x31 << 4) | (register_quad << 6) | EncodeVm(Vm));
2062
}
2063
void ARMXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vm)
2064
{
2065
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2066
2067
bool register_quad = Vd >= Q0;
2068
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2069
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x8 << 4) | (register_quad << 6) | EncodeVm(Vm));
2070
}
2071
void ARMXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2072
{
2073
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2074
2075
bool register_quad = Vd >= Q0;
2076
if (Size & F_32)
2077
Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2078
else
2079
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \
2080
| (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm));
2081
}
2082
void ARMXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vm)
2083
{
2084
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2085
2086
bool register_quad = Vd >= Q0;
2087
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2088
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (register_quad << 6) | EncodeVm(Vm));
2089
}
2090
void ARMXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2091
{
2092
VCGE(Size, Vd, Vm, Vn);
2093
}
2094
void ARMXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vm)
2095
{
2096
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2097
2098
bool register_quad = Vd >= Q0;
2099
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2100
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (3 << 7) | (register_quad << 6) | EncodeVm(Vm));
2101
}
2102
void ARMXEmitter::VCLS(u32 Size, ARMReg Vd, ARMReg Vm)
2103
{
2104
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2105
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2106
2107
bool register_quad = Vd >= Q0;
2108
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \
2109
| EncodeVd(Vd) | (1 << 10) | (register_quad << 6) | EncodeVm(Vm));
2110
}
2111
void ARMXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2112
{
2113
VCGT(Size, Vd, Vm, Vn);
2114
}
2115
void ARMXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vm)
2116
{
2117
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2118
2119
bool register_quad = Vd >= Q0;
2120
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2121
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x20 << 4) | (register_quad << 6) | EncodeVm(Vm));
2122
}
2123
void ARMXEmitter::VCLZ(u32 Size, ARMReg Vd, ARMReg Vm)
2124
{
2125
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2126
2127
bool register_quad = Vd >= Q0;
2128
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \
2129
| EncodeVd(Vd) | (0x48 << 4) | (register_quad << 6) | EncodeVm(Vm));
2130
}
2131
void ARMXEmitter::VCNT(u32 Size, ARMReg Vd, ARMReg Vm)
2132
{
2133
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2134
_dbg_assert_msg_(Size & I_8, "Can only use I_8 with %s", __FUNCTION__);
2135
2136
bool register_quad = Vd >= Q0;
2137
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \
2138
| EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm));
2139
}
2140
void ARMXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index)
2141
{
2142
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2143
_dbg_assert_msg_(Vm >= D0, "Pass invalid register to %s", __FUNCTION__);
2144
2145
bool register_quad = Vd >= Q0;
2146
u32 imm4 = 0;
2147
if (Size & I_8)
2148
imm4 = (index << 1) | 1;
2149
else if (Size & I_16)
2150
imm4 = (index << 2) | 2;
2151
else if (Size & (I_32 | F_32))
2152
imm4 = (index << 3) | 4;
2153
Write32((0xF3 << 24) | (0xB << 20) | (imm4 << 16) \
2154
| EncodeVd(Vd) | (0xC << 8) | (register_quad << 6) | EncodeVm(Vm));
2155
}
2156
void ARMXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Rt)
2157
{
2158
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2159
_dbg_assert_msg_(Rt < S0, "Pass invalid register to %s", __FUNCTION__);
2160
2161
bool register_quad = Vd >= Q0;
2162
Vd = SubBase(Vd);
2163
u8 sizeEncoded = 0;
2164
if (Size & I_8)
2165
sizeEncoded = 2;
2166
else if (Size & I_16)
2167
sizeEncoded = 1;
2168
else if (Size & I_32)
2169
sizeEncoded = 0;
2170
2171
Write32((0xEE << 24) | (0x8 << 20) | ((sizeEncoded & 2) << 21) | (register_quad << 21) \
2172
| ((Vd & 0xF) << 16) | (Rt << 12) | (0xB1 << 4) | ((Vd & 0x10) << 3) | ((sizeEncoded & 1) << 5));
2173
}
2174
void ARMXEmitter::VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index)
2175
{
2176
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2177
bool register_quad = Vd >= Q0;
2178
2179
Write32((0xF2 << 24) | (0xB << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (index & 0xF) \
2180
| (register_quad << 6) | EncodeVm(Vm));
2181
}
2182
void ARMXEmitter::VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2183
{
2184
_dbg_assert_msg_(Size == F_32, "Passed invalid size to FP-only NEON instruction");
2185
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2186
_dbg_assert_msg_(cpu_info.bVFPv4, "Can't use %s when CPU doesn't support it", __FUNCTION__);
2187
bool register_quad = Vd >= Q0;
2188
2189
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2190
}
2191
void ARMXEmitter::VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2192
{
2193
_dbg_assert_msg_(Size == F_32, "Passed invalid size to FP-only NEON instruction");
2194
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2195
_dbg_assert_msg_(cpu_info.bVFPv4, "Can't use %s when CPU doesn't support it", __FUNCTION__);
2196
bool register_quad = Vd >= Q0;
2197
2198
Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2199
}
2200
void ARMXEmitter::VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2201
{
2202
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2203
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2204
2205
bool register_quad = Vd >= Q0;
2206
2207
Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \
2208
| EncodeVn(Vn) | EncodeVd(Vd) | (register_quad << 6) | EncodeVm(Vm));
2209
}
2210
void ARMXEmitter::VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2211
{
2212
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2213
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2214
2215
bool register_quad = Vd >= Q0;
2216
2217
Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \
2218
| EncodeVn(Vn) | EncodeVd(Vd) | (1 << 9) | (register_quad << 6) | EncodeVm(Vm));
2219
}
2220
void ARMXEmitter::VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2221
{
2222
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2223
2224
bool register_quad = Vd >= Q0;
2225
2226
if (Size & F_32)
2227
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2228
else
2229
Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \
2230
| EncodeVn(Vn) | EncodeVd(Vd) | (0x60 << 4) | (register_quad << 6) | EncodeVm(Vm));
2231
}
2232
void ARMXEmitter::VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2233
{
2234
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2235
2236
bool register_quad = Vd >= Q0;
2237
2238
if (Size & F_32)
2239
Write32((0xF2 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2240
else
2241
Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \
2242
| EncodeVn(Vn) | EncodeVd(Vd) | (0x61 << 4) | (register_quad << 6) | EncodeVm(Vm));
2243
}
2244
void ARMXEmitter::VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2245
{
2246
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2247
2248
bool register_quad = Vd >= Q0;
2249
2250
if (Size & F_32)
2251
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2252
else
2253
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm));
2254
}
2255
void ARMXEmitter::VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2256
{
2257
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2258
2259
bool register_quad = Vd >= Q0;
2260
2261
if (Size & F_32)
2262
Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2263
else
2264
Write32((0xF2 << 24) | (1 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm));
2265
}
2266
void ARMXEmitter::VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2267
{
2268
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2269
_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);
2270
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
2271
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2272
2273
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \
2274
| EncodeVn(Vn) | EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm));
2275
}
2276
void ARMXEmitter::VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2277
{
2278
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2279
_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);
2280
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
2281
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2282
2283
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \
2284
| EncodeVn(Vn) | EncodeVd(Vd) | (0xA0 << 4) | EncodeVm(Vm));
2285
}
2286
void ARMXEmitter::VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2287
{
2288
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2289
2290
bool register_quad = Vd >= Q0;
2291
2292
if (Size & F_32)
2293
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2294
else
2295
Write32((0xF2 << 24) | ((Size & I_POLYNOMIAL) ? (1 << 24) : 0) | (encodedSize(Size) << 20) | \
2296
EncodeVn(Vn) | EncodeVd(Vd) | (0x91 << 4) | (register_quad << 6) | EncodeVm(Vm));
2297
}
2298
void ARMXEmitter::VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2299
{
2300
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2301
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2302
2303
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2304
(0xC0 << 4) | ((Size & I_POLYNOMIAL) ? 1 << 9 : 0) | EncodeVm(Vm));
2305
}
2306
void ARMXEmitter::VMLA_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2307
{
2308
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2309
2310
bool register_quad = Vd >= Q0;
2311
2312
// No idea if the Non-Q case here works. Not really that interested.
2313
if (Size & F_32)
2314
Write32((0xF2 << 24) | (register_quad << 24) | (1 << 23) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x14 << 4) | EncodeVm(Vm));
2315
else
2316
_dbg_assert_msg_(false, "VMLA_scalar only supports float atm");
2317
//else
2318
// Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (1 << 6) | EncodeVm(Vm));
2319
// Unsigned support missing
2320
}
2321
void ARMXEmitter::VMUL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2322
{
2323
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2324
2325
bool register_quad = Vd >= Q0;
2326
2327
int VmEnc = EncodeVm(Vm);
2328
// No idea if the Non-Q case here works. Not really that interested.
2329
if (Size & F_32) // Q flag
2330
Write32((0xF2 << 24) | (register_quad << 24) | (1 << 23) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x94 << 4) | VmEnc);
2331
else
2332
_dbg_assert_msg_(false, "VMUL_scalar only supports float atm");
2333
2334
// Write32((0xF2 << 24) | ((Size & I_POLYNOMIAL) ? (1 << 24) : 0) | (1 << 23) | (encodedSize(Size) << 20) |
2335
// EncodeVn(Vn) | EncodeVd(Vd) | (0x84 << 4) | (register_quad << 6) | EncodeVm(Vm));
2336
// Unsigned support missing
2337
}
2338
2339
void ARMXEmitter::VMVN(ARMReg Vd, ARMReg Vm)
2340
{
2341
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2342
2343
bool register_quad = Vd >= Q0;
2344
2345
Write32((0xF3B << 20) | \
2346
EncodeVd(Vd) | (0xB << 7) | (register_quad << 6) | EncodeVm(Vm));
2347
}
2348
2349
void ARMXEmitter::VNEG(u32 Size, ARMReg Vd, ARMReg Vm)
2350
{
2351
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2352
2353
bool register_quad = Vd >= Q0;
2354
2355
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) | \
2356
EncodeVd(Vd) | ((Size & F_32) ? 1 << 10 : 0) | (0xE << 6) | (register_quad << 6) | EncodeVm(Vm));
2357
}
2358
void ARMXEmitter::VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2359
{
2360
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2361
2362
bool register_quad = Vd >= Q0;
2363
2364
Write32((0xF2 << 24) | (3 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2365
}
2366
void ARMXEmitter::VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2367
{
2368
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2369
_dbg_assert_msg_(!(Vd == Vn && Vn == Vm), "All operands the same for %s is a nop", __FUNCTION__);
2370
2371
bool register_quad = Vd >= Q0;
2372
2373
Write32((0xF2 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2374
}
2375
void ARMXEmitter::VPADAL(u32 Size, ARMReg Vd, ARMReg Vm)
2376
{
2377
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2378
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2379
2380
bool register_quad = Vd >= Q0;
2381
2382
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \
2383
(0x60 << 4) | ((Size & I_UNSIGNED) ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm));
2384
}
2385
void ARMXEmitter::VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2386
{
2387
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2388
2389
if (Size & F_32)
2390
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD0 << 4) | EncodeVm(Vm));
2391
else
2392
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2393
(0xB1 << 4) | EncodeVm(Vm));
2394
}
2395
void ARMXEmitter::VPADDL(u32 Size, ARMReg Vd, ARMReg Vm)
2396
{
2397
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2398
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2399
2400
bool register_quad = Vd >= Q0;
2401
2402
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \
2403
(0x20 << 4) | (Size & I_UNSIGNED ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm));
2404
}
2405
void ARMXEmitter::VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2406
{
2407
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2408
2409
if (Size & F_32)
2410
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | EncodeVm(Vm));
2411
else
2412
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2413
(0xA0 << 4) | EncodeVm(Vm));
2414
}
2415
void ARMXEmitter::VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2416
{
2417
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2418
2419
if (Size & F_32)
2420
Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | EncodeVm(Vm));
2421
else
2422
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2423
(0xA1 << 4) | EncodeVm(Vm));
2424
}
2425
void ARMXEmitter::VQABS(u32 Size, ARMReg Vd, ARMReg Vm)
2426
{
2427
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2428
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2429
2430
bool register_quad = Vd >= Q0;
2431
2432
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \
2433
(0x70 << 4) | (register_quad << 6) | EncodeVm(Vm));
2434
}
2435
void ARMXEmitter::VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2436
{
2437
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2438
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2439
2440
bool register_quad = Vd >= Q0;
2441
2442
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2443
(0x1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2444
}
2445
void ARMXEmitter::VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2446
{
2447
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2448
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2449
2450
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2451
(0x90 << 4) | EncodeVm(Vm));
2452
}
2453
void ARMXEmitter::VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2454
{
2455
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2456
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2457
2458
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2459
(0xB0 << 4) | EncodeVm(Vm));
2460
}
2461
void ARMXEmitter::VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2462
{
2463
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2464
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2465
2466
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2467
(0xB0 << 4) | EncodeVm(Vm));
2468
}
2469
void ARMXEmitter::VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2470
{
2471
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2472
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2473
2474
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2475
(0xD0 << 4) | EncodeVm(Vm));
2476
}
2477
void ARMXEmitter::VQNEG(u32 Size, ARMReg Vd, ARMReg Vm)
2478
{
2479
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2480
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2481
2482
bool register_quad = Vd >= Q0;
2483
2484
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \
2485
(0x78 << 4) | (register_quad << 6) | EncodeVm(Vm));
2486
}
2487
void ARMXEmitter::VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2488
{
2489
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2490
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2491
2492
Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2493
(0xB0 << 4) | EncodeVm(Vm));
2494
}
2495
void ARMXEmitter::VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2496
{
2497
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2498
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2499
2500
bool register_quad = Vd >= Q0;
2501
2502
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2503
(0x51 << 4) | (register_quad << 6) | EncodeVm(Vm));
2504
}
2505
void ARMXEmitter::VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2506
{
2507
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2508
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2509
2510
bool register_quad = Vd >= Q0;
2511
2512
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2513
(0x41 << 4) | (register_quad << 6) | EncodeVm(Vm));
2514
}
2515
void ARMXEmitter::VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2516
{
2517
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2518
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2519
2520
bool register_quad = Vd >= Q0;
2521
2522
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2523
(0x21 << 4) | (register_quad << 6) | EncodeVm(Vm));
2524
}
2525
void ARMXEmitter::VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2526
{
2527
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2528
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2529
2530
Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2531
(0x40 << 4) | EncodeVm(Vm));
2532
}
2533
void ARMXEmitter::VRECPE(u32 Size, ARMReg Vd, ARMReg Vm)
2534
{
2535
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2536
2537
bool register_quad = Vd >= Q0;
2538
2539
Write32((0xF3 << 24) | (0xB << 20) | (0xB << 16) | EncodeVd(Vd) | \
2540
(0x40 << 4) | (Size & F_32 ? 1 << 8 : 0) | (register_quad << 6) | EncodeVm(Vm));
2541
}
2542
void ARMXEmitter::VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2543
{
2544
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2545
2546
bool register_quad = Vd >= Q0;
2547
2548
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2549
}
2550
void ARMXEmitter::VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2551
{
2552
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2553
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2554
2555
bool register_quad = Vd >= Q0;
2556
2557
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2558
(0x10 << 4) | (register_quad << 6) | EncodeVm(Vm));
2559
}
2560
void ARMXEmitter::VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2561
{
2562
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2563
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2564
2565
bool register_quad = Vd >= Q0;
2566
2567
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2568
(0x50 << 4) | (register_quad << 6) | EncodeVm(Vm));
2569
}
2570
void ARMXEmitter::VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm)
2571
{
2572
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2573
2574
bool register_quad = Vd >= Q0;
2575
Vd = SubBase(Vd);
2576
Vm = SubBase(Vm);
2577
2578
Write32((0xF3 << 24) | (0xB << 20) | ((Vd & 0x10) << 18) | (0xB << 16)
2579
| ((Vd & 0xF) << 12) | (9 << 7) | (Size & F_32 ? (1 << 8) : 0) | (register_quad << 6)
2580
| ((Vm & 0x10) << 1) | (Vm & 0xF));
2581
}
2582
void ARMXEmitter::VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2583
{
2584
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2585
2586
bool register_quad = Vd >= Q0;
2587
2588
Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | \
2589
(0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2590
}
2591
void ARMXEmitter::VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2592
{
2593
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2594
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2595
2596
Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2597
(0x60 << 4) | EncodeVm(Vm));
2598
}
2599
void ARMXEmitter::VSHL(u32 Size, ARMReg Vd, ARMReg Vm, ARMReg Vn)
2600
{
2601
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2602
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2603
2604
bool register_quad = Vd >= Q0;
2605
2606
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2607
(0x40 << 4) | (register_quad << 6) | EncodeVm(Vm));
2608
}
2609
2610
static int EncodeSizeShift(u32 Size, int amount, bool inverse, bool halve) {
2611
int sz = 0;
2612
switch (Size & 0xF) {
2613
case I_8: sz = 8; break;
2614
case I_16: sz = 16; break;
2615
case I_32: sz = 32; break;
2616
case I_64: sz = 64; break;
2617
}
2618
if (inverse && halve) {
2619
_dbg_assert_msg_(amount <= sz / 2, "Amount %d too large for narrowing shift (max %d)", amount, sz/2);
2620
return (sz / 2) + (sz / 2) - amount;
2621
} else if (inverse) {
2622
return sz + (sz - amount);
2623
} else {
2624
return sz + amount;
2625
}
2626
}
2627
2628
void ARMXEmitter::EncodeShiftByImm(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount, u8 opcode, bool register_quad, bool inverse, bool halve) {
2629
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2630
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2631
int imm7 = EncodeSizeShift(Size, shiftAmount, inverse, halve);
2632
int L = (imm7 >> 6) & 1;
2633
int U = (Size & I_UNSIGNED) ? 1 : 0;
2634
u32 value = (0xF2 << 24) | (U << 24) | (1 << 23) | ((imm7 & 0x3f) << 16) | EncodeVd(Vd) | (opcode << 8) | (L << 7) | (register_quad << 6) | (1 << 4) | EncodeVm(Vm);
2635
Write32(value);
2636
}
2637
2638
void ARMXEmitter::VSHL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {
2639
EncodeShiftByImm((Size & ~I_UNSIGNED), Vd, Vm, shiftAmount, 0x5, Vd >= Q0, false, false);
2640
}
2641
2642
void ARMXEmitter::VSHLL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {
2643
if ((u32)shiftAmount == (8 * (Size & 0xF))) {
2644
// Entirely different encoding (A2) for size == shift! Bleh.
2645
int sz = 0;
2646
switch (Size & 0xF) {
2647
case I_8: sz = 0; break;
2648
case I_16: sz = 1; break;
2649
case I_32: sz = 2; break;
2650
case I_64:
2651
_dbg_assert_msg_(false, "Cannot VSHLL 64-bit elements");
2652
}
2653
int imm6 = 0x32 | (sz << 2);
2654
u32 value = (0xF3 << 24) | (1 << 23) | (imm6 << 16) | EncodeVd(Vd) | (0x3 << 8) | EncodeVm(Vm);
2655
Write32(value);
2656
} else {
2657
EncodeShiftByImm((Size & ~I_UNSIGNED), Vd, Vm, shiftAmount, 0xA, false, false, false);
2658
}
2659
}
2660
2661
void ARMXEmitter::VSHR(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {
2662
EncodeShiftByImm(Size, Vd, Vm, shiftAmount, 0x0, Vd >= Q0, true, false);
2663
}
2664
2665
void ARMXEmitter::VSHRN(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {
2666
// Reduce Size by 1 to encode correctly.
2667
EncodeShiftByImm(Size, Vd, Vm, shiftAmount, 0x8, false, true, true);
2668
}
2669
2670
void ARMXEmitter::VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2671
{
2672
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2673
2674
bool register_quad = Vd >= Q0;
2675
2676
if (Size & F_32)
2677
Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | \
2678
(0xD0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2679
else
2680
Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2681
(0x80 << 4) | (register_quad << 6) | EncodeVm(Vm));
2682
}
2683
void ARMXEmitter::VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2684
{
2685
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2686
2687
Write32((0xF2 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2688
(0x60 << 4) | EncodeVm(Vm));
2689
}
2690
void ARMXEmitter::VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2691
{
2692
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2693
2694
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2695
(0x20 << 4) | EncodeVm(Vm));
2696
}
2697
void ARMXEmitter::VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2698
{
2699
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2700
2701
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2702
(0x30 << 4) | EncodeVm(Vm));
2703
}
2704
void ARMXEmitter::VSWP(ARMReg Vd, ARMReg Vm)
2705
{
2706
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2707
2708
bool register_quad = Vd >= Q0;
2709
2710
Write32((0xF3 << 24) | (0xB << 20) | (1 << 17) | EncodeVd(Vd) | \
2711
(register_quad << 6) | EncodeVm(Vm));
2712
}
2713
void ARMXEmitter::VTRN(u32 Size, ARMReg Vd, ARMReg Vm)
2714
{
2715
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2716
2717
bool register_quad = Vd >= Q0;
2718
2719
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \
2720
(1 << 7) | (register_quad << 6) | EncodeVm(Vm));
2721
}
2722
void ARMXEmitter::VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2723
{
2724
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2725
2726
bool register_quad = Vd >= Q0;
2727
2728
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2729
(0x81 << 4) | (register_quad << 6) | EncodeVm(Vm));
2730
}
2731
void ARMXEmitter::VUZP(u32 Size, ARMReg Vd, ARMReg Vm)
2732
{
2733
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2734
2735
bool register_quad = Vd >= Q0;
2736
2737
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \
2738
(0x10 << 4) | (register_quad << 6) | EncodeVm(Vm));
2739
}
2740
void ARMXEmitter::VZIP(u32 Size, ARMReg Vd, ARMReg Vm)
2741
{
2742
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2743
2744
bool register_quad = Vd >= Q0;
2745
2746
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \
2747
(0x18 << 4) | (register_quad << 6) | EncodeVm(Vm));
2748
}
2749
2750
void ARMXEmitter::VMOVL(u32 Size, ARMReg Vd, ARMReg Vm)
2751
{
2752
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2753
_dbg_assert_msg_(Vm >= D0 && Vm <= D31, "Pass invalid register to %s", __FUNCTION__);
2754
_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED)) != 0, "Must specify I_SIGNED or I_UNSIGNED in VMOVL");
2755
2756
bool unsign = (Size & I_UNSIGNED) != 0;
2757
int imm3 = 0;
2758
if (Size & I_8) imm3 = 1;
2759
if (Size & I_16) imm3 = 2;
2760
if (Size & I_32) imm3 = 4;
2761
2762
Write32((0xF2 << 24) | (unsign << 24) | (1 << 23) | (imm3 << 19) | EncodeVd(Vd) | \
2763
(0xA1 << 4) | EncodeVm(Vm));
2764
}
2765
2766
void ARMXEmitter::VMOVN(u32 Size, ARMReg Vd, ARMReg Vm)
2767
{
2768
_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);
2769
_dbg_assert_msg_(Vd >= D0 && Vd <= D31, "Pass invalid register to %s", __FUNCTION__);
2770
_dbg_assert_msg_((Size & I_8) == 0, "%s cannot narrow from I_8", __FUNCTION__);
2771
2772
// For consistency with assembler syntax and VMOVL - encode one size down.
2773
u32 halfSize = encodedSize(Size) - 1;
2774
2775
Write32((0xF3B << 20) | (halfSize << 18) | (1 << 17) | EncodeVd(Vd) | (1 << 9) | EncodeVm(Vm));
2776
}
2777
2778
void ARMXEmitter::VQMOVN(u32 Size, ARMReg Vd, ARMReg Vm)
2779
{
2780
_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);
2781
_dbg_assert_msg_(Vd >= D0 && Vd <= D31, "Pass invalid register to %s", __FUNCTION__);
2782
_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED)) != 0, "Must specify I_SIGNED or I_UNSIGNED in %s NEON", __FUNCTION__);
2783
_dbg_assert_msg_((Size & I_8) == 0, "%s cannot narrow from I_8", __FUNCTION__);
2784
2785
u32 halfSize = encodedSize(Size) - 1;
2786
u32 op = (1 << 7) | (Size & I_UNSIGNED ? 1 << 6 : 0);
2787
2788
Write32((0xF3B << 20) | (halfSize << 18) | (1 << 17) | EncodeVd(Vd) | (1 << 9) | op | EncodeVm(Vm));
2789
}
2790
2791
void ARMXEmitter::VQMOVUN(u32 Size, ARMReg Vd, ARMReg Vm)
2792
{
2793
_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);
2794
_dbg_assert_msg_(Vd >= D0 && Vd <= D31, "Pass invalid register to %s", __FUNCTION__);
2795
_dbg_assert_msg_((Size & I_8) == 0, "%s cannot narrow from I_8", __FUNCTION__);
2796
2797
u32 halfSize = encodedSize(Size) - 1;
2798
u32 op = (1 << 6);
2799
2800
Write32((0xF3B << 20) | (halfSize << 18) | (1 << 17) | EncodeVd(Vd) | (1 << 9) | op | EncodeVm(Vm));
2801
}
2802
2803
void ARMXEmitter::VCVT(u32 Size, ARMReg Vd, ARMReg Vm)
2804
{
2805
_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED)) != 0, "Must specify I_SIGNED or I_UNSIGNED in VCVT NEON");
2806
2807
bool register_quad = Vd >= Q0;
2808
bool toInteger = (Size & I_32) != 0;
2809
bool isUnsigned = (Size & I_UNSIGNED) != 0;
2810
int op = (toInteger << 1) | (int)isUnsigned;
2811
2812
Write32((0xF3 << 24) | (0xBB << 16) | EncodeVd(Vd) | (0x3 << 9) | (op << 7) | (register_quad << 6) | EncodeVm(Vm));
2813
}
2814
2815
static int RegCountToType(int nRegs, NEONAlignment align) {
2816
switch (nRegs) {
2817
case 1:
2818
_dbg_assert_msg_(!((int)align & 1), "align & 1 must be == 0");
2819
return 7;
2820
case 2:
2821
_dbg_assert_msg_(!((int)align == 3), "align must be != 3");
2822
return 10;
2823
case 3:
2824
_dbg_assert_msg_(!((int)align & 1), "align & 1 must be == 0");
2825
return 6;
2826
case 4:
2827
return 2;
2828
default:
2829
_dbg_assert_msg_(false, "Invalid number of registers passed to vector load/store");
2830
return 0;
2831
}
2832
}
2833
2834
void ARMXEmitter::WriteVLDST1(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm)
2835
{
2836
u32 spacing = RegCountToType(regCount, align); // Only support loading to 1 reg
2837
// Gets encoded as a double register
2838
Vd = SubBase(Vd);
2839
2840
Write32((0xF4 << 24) | ((Vd & 0x10) << 18) | (load << 21) | (Rn << 16)
2841
| ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6)
2842
| (align << 4) | Rm);
2843
}
2844
2845
void ARMXEmitter::VLD1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm) {
2846
WriteVLDST1(true, Size, Vd, Rn, regCount, align, Rm);
2847
}
2848
2849
void ARMXEmitter::VST1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm) {
2850
WriteVLDST1(false, Size, Vd, Rn, regCount, align, Rm);
2851
}
2852
2853
void ARMXEmitter::WriteVLDST1_lane(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm)
2854
{
2855
bool register_quad = Vd >= Q0;
2856
2857
Vd = SubBase(Vd);
2858
// Support quad lanes by converting to D lanes
2859
if (register_quad && lane > 1) {
2860
Vd = (ARMReg)((int)Vd + 1);
2861
lane -= 2;
2862
}
2863
int encSize = encodedSize(Size);
2864
int index_align = 0;
2865
switch (encSize) {
2866
case 0: index_align = lane << 1; break;
2867
case 1: index_align = lane << 2; if (aligned) index_align |= 1; break;
2868
case 2: index_align = lane << 3; if (aligned) index_align |= 3; break;
2869
default:
2870
break;
2871
}
2872
2873
Write32((0xF4 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (load << 21) | (Rn << 16)
2874
| ((Vd & 0xF) << 12) | (encSize << 10)
2875
| (index_align << 4) | Rm);
2876
}
2877
2878
void ARMXEmitter::VLD1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm) {
2879
WriteVLDST1_lane(true, Size, Vd, Rn, lane, aligned, Rm);
2880
}
2881
2882
void ARMXEmitter::VST1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm) {
2883
WriteVLDST1_lane(false, Size, Vd, Rn, lane, aligned, Rm);
2884
}
2885
2886
void ARMXEmitter::VLD1_all_lanes(u32 Size, ARMReg Vd, ARMReg Rn, bool aligned, ARMReg Rm) {
2887
bool register_quad = Vd >= Q0;
2888
2889
Vd = SubBase(Vd);
2890
2891
int T = register_quad; // two D registers
2892
2893
Write32((0xF4 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (1 << 21) | (Rn << 16)
2894
| ((Vd & 0xF) << 12) | (0xC << 8) | (encodedSize(Size) << 6)
2895
| (T << 5) | (aligned << 4) | Rm);
2896
}
2897
2898
/*
2899
void ARMXEmitter::VLD2(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm)
2900
{
2901
u32 spacing = 0x8; // Single spaced registers
2902
// Gets encoded as a double register
2903
Vd = SubBase(Vd);
2904
2905
Write32((0xF4 << 24) | ((Vd & 0x10) << 18) | (1 << 21) | (Rn << 16)
2906
| ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6)
2907
| (align << 4) | Rm);
2908
}
2909
*/
2910
2911
void ARMXEmitter::WriteVimm(ARMReg Vd, int cmode, u8 imm, int op) {
2912
bool register_quad = Vd >= Q0;
2913
2914
Write32((0xF28 << 20) | ((imm >> 7) << 24) | (((imm >> 4) & 0x7) << 16) | (imm & 0xF) |
2915
EncodeVd(Vd) | (register_quad << 6) | (op << 5) | (1 << 4) | ((cmode & 0xF) << 8));
2916
}
2917
2918
void ARMXEmitter::VMOV_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {
2919
// Only let through the modes that apply.
2920
switch (type) {
2921
case VIMM___x___x:
2922
case VIMM__x___x_:
2923
case VIMM_x___x__:
2924
case VIMMx___x___:
2925
if (Size != I_32)
2926
goto error;
2927
WriteVimm(Vd, (int)type, imm, 0);
2928
break;
2929
case VIMM_x_x_x_x:
2930
case VIMMx_x_x_x_:
2931
if (Size != I_16)
2932
goto error;
2933
WriteVimm(Vd, (int)type, imm, 0);
2934
break;
2935
case VIMMxxxxxxxx: // replicate the byte
2936
if (Size != I_8)
2937
goto error;
2938
WriteVimm(Vd, (int)type, imm, 0);
2939
break;
2940
case VIMMbits2bytes:
2941
if (Size != I_64)
2942
goto error;
2943
WriteVimm(Vd, (int)type, imm, 1);
2944
break;
2945
default:
2946
goto error;
2947
}
2948
return;
2949
2950
error:
2951
_dbg_assert_msg_(false, "Bad Size or type specified in %s: Size %i Type %i", __FUNCTION__, (int)Size, type);
2952
}
2953
2954
void ARMXEmitter::VMOV_immf(ARMReg Vd, float value) { // This only works with a select few values. I've hardcoded 1.0f.
2955
u8 bits = 0;
2956
2957
if (value == 0.0f) {
2958
VEOR(Vd, Vd, Vd);
2959
return;
2960
}
2961
2962
// TODO: Do something more sophisticated here.
2963
if (value == 1.5f) {
2964
bits = 0x78;
2965
} else if (value == 1.0f) {
2966
bits = 0x70;
2967
} else if (value == -1.0f) {
2968
bits = 0xF0;
2969
} else {
2970
_dbg_assert_msg_(false, "%s: Invalid floating point immediate", __FUNCTION__);
2971
}
2972
WriteVimm(Vd, VIMMf000f000, bits, 0);
2973
}
2974
2975
void ARMXEmitter::VORR_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {
2976
// Only let through the modes that apply.
2977
switch (type) {
2978
case VIMM___x___x:
2979
case VIMM__x___x_:
2980
case VIMM_x___x__:
2981
case VIMMx___x___:
2982
if (Size != I_32)
2983
goto error;
2984
WriteVimm(Vd, (int)type | 1, imm, 0);
2985
break;
2986
case VIMM_x_x_x_x:
2987
case VIMMx_x_x_x_:
2988
if (Size != I_16)
2989
goto error;
2990
WriteVimm(Vd, (int)type | 1, imm, 0);
2991
break;
2992
default:
2993
goto error;
2994
}
2995
return;
2996
error:
2997
_dbg_assert_msg_(false, "Bad Size or type specified in VORR_imm");
2998
}
2999
3000
void ARMXEmitter::VBIC_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {
3001
// Only let through the modes that apply.
3002
switch (type) {
3003
case VIMM___x___x:
3004
case VIMM__x___x_:
3005
case VIMM_x___x__:
3006
case VIMMx___x___:
3007
if (Size != I_32)
3008
goto error;
3009
WriteVimm(Vd, (int)type | 1, imm, 1);
3010
break;
3011
case VIMM_x_x_x_x:
3012
case VIMMx_x_x_x_:
3013
if (Size != I_16)
3014
goto error;
3015
WriteVimm(Vd, (int)type | 1, imm, 1);
3016
break;
3017
default:
3018
goto error;
3019
}
3020
return;
3021
error:
3022
_dbg_assert_msg_(false, "Bad Size or type specified in VBIC_imm");
3023
}
3024
3025
3026
void ARMXEmitter::VMVN_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {
3027
// Only let through the modes that apply.
3028
switch (type) {
3029
case VIMM___x___x:
3030
case VIMM__x___x_:
3031
case VIMM_x___x__:
3032
case VIMMx___x___:
3033
if (Size != I_32)
3034
goto error;
3035
WriteVimm(Vd, (int)type, imm, 1);
3036
break;
3037
case VIMM_x_x_x_x:
3038
case VIMMx_x_x_x_:
3039
if (Size != I_16)
3040
goto error;
3041
WriteVimm(Vd, (int)type, imm, 1);
3042
break;
3043
default:
3044
goto error;
3045
}
3046
return;
3047
error:
3048
_dbg_assert_msg_(false, "Bad Size or type specified in VMVN_imm");
3049
}
3050
3051
3052
void ARMXEmitter::VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm)
3053
{
3054
bool register_quad = Vd >= Q0;
3055
Vd = SubBase(Vd);
3056
Vm = SubBase(Vm);
3057
3058
Write32((0xF3 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (0x3 << 20)
3059
| (encodedSize(Size) << 18) | ((Vd & 0xF) << 12) | (size << 7)
3060
| (register_quad << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF));
3061
}
3062
3063
void ARMXEmitter::VREV64(u32 Size, ARMReg Vd, ARMReg Vm)
3064
{
3065
VREVX(0, Size, Vd, Vm);
3066
}
3067
3068
void ARMXEmitter::VREV32(u32 Size, ARMReg Vd, ARMReg Vm)
3069
{
3070
VREVX(1, Size, Vd, Vm);
3071
}
3072
3073
void ARMXEmitter::VREV16(u32 Size, ARMReg Vd, ARMReg Vm)
3074
{
3075
VREVX(2, Size, Vd, Vm);
3076
}
3077
3078
// See page A8-878 in ARMv7-A Architecture Reference Manual
3079
3080
// Dest is a Q register, Src is a D register.
3081
void ARMXEmitter::VCVTF32F16(ARMReg Dest, ARMReg Src) {
3082
_assert_msg_(cpu_info.bVFPv4, "Can't use half-float conversions when you don't support VFPv4");
3083
if (Dest < Q0 || Dest > Q15 || Src < D0 || Src > D15) {
3084
// Invalid!
3085
}
3086
3087
Dest = SubBase(Dest);
3088
Src = SubBase(Src);
3089
3090
int op = 1;
3091
Write32((0xF3B6 << 16) | ((Dest & 0x10) << 18) | ((Dest & 0xF) << 12) | 0x600 | (op << 8) | ((Src & 0x10) << 1) | (Src & 0xF));
3092
}
3093
3094
// UNTESTED
3095
// Dest is a D register, Src is a Q register.
3096
void ARMXEmitter::VCVTF16F32(ARMReg Dest, ARMReg Src) {
3097
_assert_msg_(cpu_info.bVFPv4, "Can't use half-float conversions when you don't support VFPv4");
3098
if (Dest < D0 || Dest > D15 || Src < Q0 || Src > Q15) {
3099
// Invalid!
3100
}
3101
Dest = SubBase(Dest);
3102
Src = SubBase(Src);
3103
int op = 0;
3104
Write32((0xF3B6 << 16) | ((Dest & 0x10) << 18) | ((Dest & 0xF) << 12) | 0x600 | (op << 8) | ((Src & 0x10) << 1) | (Src & 0xF));
3105
}
3106
3107
// Always clear code space with breakpoints, so that if someone accidentally executes
3108
// uninitialized, it just breaks into the debugger.
3109
void ARMXCodeBlock::PoisonMemory(int offset) {
3110
// TODO: this isn't right for ARM!
3111
memset(region + offset, 0xCC, region_size - offset);
3112
ResetCodePtr(offset);
3113
}
3114
3115
}
3116
3117