CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/ArmEmitter.cpp
Views: 1401
1
// Copyright (C) 2003 Dolphin Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official SVN repository and contact information can be found at
16
// http://code.google.com/p/dolphin-emu/
17
18
#include "ppsspp_config.h"
19
20
#include <stdarg.h>
21
#include <stddef.h>
22
#include <stdio.h>
23
#include <stdlib.h>
24
#include <string.h>
25
26
#if PPSSPP_PLATFORM(IOS)
27
#include <libkern/OSCacheControl.h>
28
#include <sys/mman.h>
29
#endif
30
31
#include "Common/Log.h"
32
#include "Common/MemoryUtil.h"
33
#include "Common/ArmEmitter.h"
34
#include "Common/CPUDetect.h"
35
36
#ifdef _WIN32
37
#include "CommonWindows.h"
38
#endif
39
40
// Want it in release builds too
41
#ifdef __ANDROID__
42
#undef _dbg_assert_msg_
43
#define _dbg_assert_msg_ _assert_msg_
44
#endif
45
46
namespace ArmGen
47
{
48
49
inline u32 RotR(u32 a, int amount) {
50
if (!amount) return a;
51
return (a >> amount) | (a << (32 - amount));
52
}
53
54
inline u32 RotL(u32 a, int amount) {
55
if (!amount) return a;
56
return (a << amount) | (a >> (32 - amount));
57
}
58
59
bool TryMakeOperand2(u32 imm, Operand2 &op2) {
60
// Just brute force it.
61
for (int i = 0; i < 16; i++) {
62
int mask = RotR(0xFF, i * 2);
63
if ((imm & mask) == imm) {
64
op2 = Operand2((u8)(RotL(imm, i * 2)), (u8)i);
65
return true;
66
}
67
}
68
return false;
69
}
70
71
bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse)
72
{
73
if (!TryMakeOperand2(imm, op2)) {
74
*inverse = true;
75
return TryMakeOperand2(~imm, op2);
76
} else {
77
*inverse = false;
78
return true;
79
}
80
}
81
82
bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated)
83
{
84
if (!TryMakeOperand2(imm, op2)) {
85
*negated = true;
86
return TryMakeOperand2(-imm, op2);
87
} else {
88
*negated = false;
89
return true;
90
}
91
}
92
93
Operand2 AssumeMakeOperand2(u32 imm) {
94
Operand2 op2;
95
bool result = TryMakeOperand2(imm, op2);
96
_dbg_assert_msg_(result, "Could not make assumed Operand2.");
97
if (!result) {
98
// Make double sure that we get it logged.
99
ERROR_LOG(Log::JIT, "Could not make assumed Operand2.");
100
}
101
return op2;
102
}
103
104
bool ARMXEmitter::TrySetValue_TwoOp(ARMReg reg, u32 val)
105
{
106
int ops = 0;
107
for (int i = 0; i < 16; i++)
108
{
109
if ((val >> (i*2)) & 0x3)
110
{
111
ops++;
112
i+=3;
113
}
114
}
115
if (ops > 2)
116
return false;
117
118
bool first = true;
119
for (int i = 0; i < 16; i++, val >>=2) {
120
if (val & 0x3) {
121
first ? MOV(reg, Operand2((u8)val, (u8)((16-i) & 0xF)))
122
: ORR(reg, reg, Operand2((u8)val, (u8)((16-i) & 0xF)));
123
first = false;
124
i+=3;
125
val >>= 6;
126
}
127
}
128
return true;
129
}
130
131
bool TryMakeFloatIMM8(u32 val, Operand2 &op2)
132
{
133
if ((val & 0x0007FFFF) == 0)
134
{
135
// VFP Encoding for Imms: <7> Not(<6>) Repeat(<6>,5) <5:0> Zeros(19)
136
bool bit6 = (val & 0x40000000) == 0x40000000;
137
bool canEncode = true;
138
for (u32 mask = 0x20000000; mask >= 0x02000000; mask >>= 1)
139
{
140
if (((val & mask) == mask) == bit6)
141
canEncode = false;
142
}
143
if (canEncode)
144
{
145
u32 imm8 = (val & 0x80000000) >> 24; // sign bit
146
imm8 |= (!bit6 << 6);
147
imm8 |= (val & 0x01F80000) >> 19;
148
op2 = IMM(imm8);
149
return true;
150
}
151
}
152
153
return false;
154
}
155
156
void ARMXEmitter::MOVI2FR(ARMReg dest, float val, bool negate)
157
{
158
union {float f; u32 u;} conv;
159
conv.f = negate ? -val : val;
160
MOVI2R(dest, conv.u);
161
}
162
163
void ARMXEmitter::MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate)
164
{
165
union {float f; u32 u;} conv;
166
conv.f = negate ? -val : val;
167
// Try moving directly first if mantisse is empty
168
Operand2 op2;
169
if (TryMakeFloatIMM8(conv.u, op2))
170
VMOV(dest, op2);
171
else
172
{
173
MOVI2R(tempReg, conv.u);
174
VMOV(dest, tempReg);
175
}
176
// Otherwise, possible to use a literal pool and VLDR directly (+- 1020)
177
}
178
179
void ARMXEmitter::MOVI2F_neon(ARMReg dest, float val, ARMReg tempReg, bool negate)
180
{
181
union {float f; u32 u;} conv;
182
conv.f = negate ? -val : val;
183
// Try moving directly first if mantisse is empty
184
Operand2 op2;
185
if (TryMakeFloatIMM8(conv.u, op2))
186
VMOV_neon(F_32, dest, conv.u);
187
else
188
{
189
MOVI2R(tempReg, conv.u);
190
VDUP(F_32, dest, tempReg);
191
}
192
// Otherwise, possible to use a literal pool and VLD1 directly (+- 1020)
193
}
194
195
void ARMXEmitter::ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
196
{
197
if (!TryADDI2R(rd, rs, val)) {
198
MOVI2R(scratch, val);
199
ADD(rd, rs, scratch);
200
}
201
}
202
203
bool ARMXEmitter::TryADDI2R(ARMReg rd, ARMReg rs, u32 val)
204
{
205
if (val == 0) {
206
if (rd != rs)
207
MOV(rd, rs);
208
return true;
209
}
210
Operand2 op2;
211
bool negated;
212
if (TryMakeOperand2_AllowNegation(val, op2, &negated)) {
213
if (!negated)
214
ADD(rd, rs, op2);
215
else
216
SUB(rd, rs, op2);
217
return true;
218
} else {
219
// Try 16-bit additions and subtractions - easy to test for.
220
// Should also try other rotations...
221
if ((val & 0xFFFF0000) == 0) {
222
// Decompose into two additions.
223
ADD(rd, rs, Operand2((u8)(val >> 8), 12)); // rotation right by 12*2 == rotation left by 8
224
ADD(rd, rd, Operand2((u8)(val), 0));
225
return true;
226
} else if ((((u32)-(s32)val) & 0xFFFF0000) == 0) {
227
val = (u32)-(s32)val;
228
SUB(rd, rs, Operand2((u8)(val >> 8), 12));
229
SUB(rd, rd, Operand2((u8)(val), 0));
230
return true;
231
} else {
232
return false;
233
}
234
}
235
}
236
237
void ARMXEmitter::SUBI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
238
{
239
if (!TrySUBI2R(rd, rs, val)) {
240
MOVI2R(scratch, val);
241
SUB(rd, rs, scratch);
242
}
243
}
244
245
bool ARMXEmitter::TrySUBI2R(ARMReg rd, ARMReg rs, u32 val)
246
{
247
// Just add a negative.
248
return TryADDI2R(rd, rs, (u32)-(s32)val);
249
}
250
251
void ARMXEmitter::ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
252
{
253
if (!TryANDI2R(rd, rs, val)) {
254
MOVI2R(scratch, val);
255
AND(rd, rs, scratch);
256
}
257
}
258
259
bool ARMXEmitter::TryANDI2R(ARMReg rd, ARMReg rs, u32 val)
260
{
261
Operand2 op2;
262
bool inverse;
263
if (val == 0) {
264
// Avoid the ALU, may improve pipeline.
265
MOV(rd, 0);
266
return true;
267
} else if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) {
268
if (!inverse) {
269
AND(rd, rs, op2);
270
} else {
271
BIC(rd, rs, op2);
272
}
273
return true;
274
} else {
275
#if PPSSPP_ARCH(ARMV7)
276
// Check if we have a single pattern of sequential bits.
277
int seq = -1;
278
for (int i = 0; i < 32; ++i) {
279
if (((val >> i) & 1) == 0) {
280
if (seq == -1) {
281
// The width is all bits previous to this, set to 1.
282
seq = i;
283
}
284
} else if (seq != -1) {
285
// Uh oh, more than one sequence.
286
seq = -2;
287
}
288
}
289
290
if (seq > 0) {
291
UBFX(rd, rs, 0, seq);
292
return true;
293
}
294
#endif
295
296
int ops = 0;
297
for (int i = 0; i < 32; i += 2) {
298
u8 bits = RotR(val, i) & 0xFF;
299
// If either low bit is not set, we need to use a BIC for them.
300
if ((bits & 3) != 3) {
301
++ops;
302
i += 8 - 2;
303
}
304
}
305
306
// The worst case is 4 (e.g. 0x55555555.)
307
#if PPSSPP_ARCH(ARMV7)
308
if (ops > 3) {
309
return false;
310
}
311
#endif
312
bool first = true;
313
for (int i = 0; i < 32; i += 2) {
314
u8 bits = RotR(val, i) & 0xFF;
315
if ((bits & 3) != 3) {
316
u8 rotation = i == 0 ? 0 : 16 - i / 2;
317
if (first) {
318
BIC(rd, rs, Operand2(~bits, rotation));
319
first = false;
320
} else {
321
BIC(rd, rd, Operand2(~bits, rotation));
322
}
323
// Well, we took care of these other bits while we were at it.
324
i += 8 - 2;
325
}
326
}
327
return true;
328
}
329
}
330
331
void ARMXEmitter::CMPI2R(ARMReg rs, u32 val, ARMReg scratch)
332
{
333
if (!TryCMPI2R(rs, val)) {
334
MOVI2R(scratch, val);
335
CMP(rs, scratch);
336
}
337
}
338
339
bool ARMXEmitter::TryCMPI2R(ARMReg rs, u32 val)
340
{
341
Operand2 op2;
342
bool negated;
343
if (TryMakeOperand2_AllowNegation(val, op2, &negated)) {
344
if (!negated)
345
CMP(rs, op2);
346
else
347
CMN(rs, op2);
348
return true;
349
} else {
350
return false;
351
}
352
}
353
354
void ARMXEmitter::TSTI2R(ARMReg rs, u32 val, ARMReg scratch)
355
{
356
if (!TryTSTI2R(rs, val)) {
357
MOVI2R(scratch, val);
358
TST(rs, scratch);
359
}
360
}
361
362
bool ARMXEmitter::TryTSTI2R(ARMReg rs, u32 val)
363
{
364
Operand2 op2;
365
if (TryMakeOperand2(val, op2)) {
366
TST(rs, op2);
367
return true;
368
} else {
369
return false;
370
}
371
}
372
373
void ARMXEmitter::ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
374
{
375
if (!TryORI2R(rd, rs, val)) {
376
MOVI2R(scratch, val);
377
ORR(rd, rs, scratch);
378
}
379
}
380
381
bool ARMXEmitter::TryORI2R(ARMReg rd, ARMReg rs, u32 val)
382
{
383
Operand2 op2;
384
if (val == 0) {
385
// Avoid the ALU, may improve pipeline.
386
if (rd != rs) {
387
MOV(rd, rs);
388
}
389
return true;
390
} else if (TryMakeOperand2(val, op2)) {
391
ORR(rd, rs, op2);
392
return true;
393
} else {
394
int ops = 0;
395
for (int i = 0; i < 32; i += 2) {
396
u8 bits = RotR(val, i) & 0xFF;
397
// If either low bit is set, we need to use a ORR for them.
398
if ((bits & 3) != 0) {
399
++ops;
400
i += 8 - 2;
401
}
402
}
403
404
// The worst case is 4 (e.g. 0x55555555.) But MVN can make it 2. Not sure if better.
405
bool inversed;
406
if (TryMakeOperand2_AllowInverse(val, op2, &inversed) && ops >= 3) {
407
return false;
408
#if PPSSPP_ARCH(ARMV7)
409
} else if (ops > 3) {
410
return false;
411
#endif
412
}
413
414
bool first = true;
415
for (int i = 0; i < 32; i += 2) {
416
u8 bits = RotR(val, i) & 0xFF;
417
if ((bits & 3) != 0) {
418
u8 rotation = i == 0 ? 0 : 16 - i / 2;
419
if (first) {
420
ORR(rd, rs, Operand2(bits, rotation));
421
first = false;
422
} else {
423
ORR(rd, rd, Operand2(bits, rotation));
424
}
425
// Well, we took care of these other bits while we were at it.
426
i += 8 - 2;
427
}
428
}
429
return true;
430
}
431
}
432
433
void ARMXEmitter::EORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)
434
{
435
if (!TryEORI2R(rd, rs, val)) {
436
MOVI2R(scratch, val);
437
EOR(rd, rs, scratch);
438
}
439
}
440
441
bool ARMXEmitter::TryEORI2R(ARMReg rd, ARMReg rs, u32 val)
442
{
443
Operand2 op2;
444
if (val == 0) {
445
if (rd != rs) {
446
MOV(rd, rs);
447
}
448
return true;
449
} else if (TryMakeOperand2(val, op2)) {
450
EOR(rd, rs, op2);
451
return true;
452
} else {
453
return false;
454
}
455
}
456
457
void ARMXEmitter::FlushLitPool()
458
{
459
for (LiteralPool& pool : currentLitPool) {
460
// Search for duplicates
461
for (LiteralPool& old_pool : currentLitPool) {
462
if (old_pool.val == pool.val)
463
pool.loc = old_pool.loc;
464
}
465
466
// Write the constant to Literal Pool
467
if (!pool.loc)
468
{
469
pool.loc = (intptr_t)code;
470
Write32(pool.val);
471
}
472
s32 offset = (s32)(pool.loc - (intptr_t)pool.ldr_address - 8);
473
474
// Backpatch the LDR
475
*(u32*)pool.ldr_address |= (offset >= 0) << 23 | abs(offset);
476
}
477
// TODO: Save a copy of previous pools in case they are still in range.
478
currentLitPool.clear();
479
}
480
481
void ARMXEmitter::AddNewLit(u32 val)
482
{
483
LiteralPool pool_item;
484
pool_item.loc = 0;
485
pool_item.val = val;
486
pool_item.ldr_address = code;
487
currentLitPool.push_back(pool_item);
488
}
489
490
void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize)
491
{
492
Operand2 op2;
493
bool inverse;
494
495
#if PPSSPP_ARCH(ARMV7)
496
// Unused
497
if (!optimize)
498
{
499
// For backpatching on ARMv7
500
MOVW(reg, val & 0xFFFF);
501
MOVT(reg, val, true);
502
return;
503
}
504
#endif
505
506
if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) {
507
inverse ? MVN(reg, op2) : MOV(reg, op2);
508
} else {
509
#if PPSSPP_ARCH(ARMV7)
510
// Use MOVW+MOVT for ARMv7+
511
MOVW(reg, val & 0xFFFF);
512
if(val & 0xFFFF0000)
513
MOVT(reg, val, true);
514
#else
515
if (!TrySetValue_TwoOp(reg,val)) {
516
bool first = true;
517
for (int i = 0; i < 32; i += 2) {
518
u8 bits = RotR(val, i) & 0xFF;
519
if ((bits & 3) != 0) {
520
u8 rotation = i == 0 ? 0 : 16 - i / 2;
521
if (first) {
522
MOV(reg, Operand2(bits, rotation));
523
first = false;
524
} else {
525
ORR(reg, reg, Operand2(bits, rotation));
526
}
527
// Well, we took care of these other bits while we were at it.
528
i += 8 - 2;
529
}
530
}
531
// Use literal pool for ARMv6.
532
// Disabled for now as it is crashfing since Vertex Decoder JIT
533
// AddNewLit(val);
534
// LDR(reg, R_PC); // To be backpatched later
535
}
536
#endif
537
}
538
}
539
540
static const char *const armRegStrings[] = {
541
"r0","r1","r2","r3",
542
"r4","r5","r6","r7",
543
"r8","r9","r10","r11",
544
"r12","r13","r14","PC",
545
546
"s0", "s1", "s2", "s3",
547
"s4", "s5", "s6", "s7",
548
"s8", "s9", "s10", "s11",
549
"s12", "s13", "s14", "s15",
550
551
"s16", "s17", "s18", "s19",
552
"s20", "s21", "s22", "s23",
553
"s24", "s25", "s26", "s27",
554
"s28", "s29", "s30", "s31",
555
556
"d0", "d1", "d2", "d3",
557
"d4", "d5", "d6", "d7",
558
"d8", "d9", "d10", "d11",
559
"d12", "d13", "d14", "d15",
560
561
"d16", "d17", "d18", "d19",
562
"d20", "d21", "d22", "d23",
563
"d24", "d25", "d26", "d27",
564
"d28", "d29", "d30", "d31",
565
566
"q0", "q1", "q2", "q3",
567
"q4", "q5", "q6", "q7",
568
"q8", "q9", "q10", "q11",
569
"q12", "q13", "q14", "q15",
570
};
571
572
const char *ARMRegAsString(ARMReg reg) {
573
if ((unsigned int)reg >= sizeof(armRegStrings)/sizeof(armRegStrings[0]))
574
return "(bad)";
575
return armRegStrings[(int)reg];
576
}
577
578
void ARMXEmitter::QuickCallFunction(ARMReg reg, const void *func) {
579
if (BLInRange(func)) {
580
BL(func);
581
} else {
582
MOVP2R(reg, func);
583
BL(reg);
584
}
585
}
586
587
void ARMXEmitter::SetCodePointer(u8 *ptr, u8 *writePtr)
588
{
589
code = ptr;
590
startcode = code;
591
lastCacheFlushEnd = ptr;
592
}
593
594
const u8 *ARMXEmitter::GetCodePointer() const
595
{
596
return code;
597
}
598
599
u8 *ARMXEmitter::GetWritableCodePtr()
600
{
601
return code;
602
}
603
604
void ARMXEmitter::ReserveCodeSpace(u32 bytes)
605
{
606
for (u32 i = 0; i < bytes/4; i++)
607
Write32(0xE1200070); //bkpt 0
608
}
609
610
const u8 *ARMXEmitter::AlignCode16()
611
{
612
ReserveCodeSpace((-(intptr_t)code) & 15);
613
return code;
614
}
615
616
const u8 *ARMXEmitter::NopAlignCode16() {
617
int bytes = ((-(intptr_t)code) & 15);
618
for (int i = 0; i < bytes / 4; i++) {
619
Write32(0xE320F000); // one of many possible nops
620
}
621
return code;
622
}
623
624
const u8 *ARMXEmitter::AlignCodePage()
625
{
626
ReserveCodeSpace((-(intptr_t)code) & 4095);
627
return code;
628
}
629
630
void ARMXEmitter::FlushIcache()
631
{
632
FlushIcacheSection(lastCacheFlushEnd, code);
633
lastCacheFlushEnd = code;
634
}
635
636
void ARMXEmitter::FlushIcacheSection(u8 *start, u8 *end)
637
{
638
#if PPSSPP_PLATFORM(IOS)
639
// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);
640
sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);
641
#elif PPSSPP_PLATFORM(WINDOWS)
642
FlushInstructionCache(GetCurrentProcess(), start, end - start);
643
#elif PPSSPP_ARCH(ARM)
644
645
#if defined(__clang__) || defined(__ANDROID__)
646
__clear_cache(start, end);
647
#else
648
__builtin___clear_cache(start, end);
649
#endif
650
651
#endif
652
}
653
654
void ARMXEmitter::SetCC(CCFlags cond)
655
{
656
condition = cond << 28;
657
}
658
659
void ARMXEmitter::NOP(int count)
660
{
661
for (int i = 0; i < count; i++) {
662
Write32(condition | 0x01A00000);
663
}
664
}
665
666
void ARMXEmitter::SETEND(bool BE)
667
{
668
//SETEND is non-conditional
669
Write32(0xF1010000 | (BE << 9));
670
}
671
void ARMXEmitter::BKPT(u16 arg)
672
{
673
Write32(condition | 0x01200070 | (arg << 4 & 0x000FFF00) | (arg & 0x0000000F));
674
}
675
void ARMXEmitter::YIELD()
676
{
677
Write32(condition | 0x0320F001);
678
}
679
680
FixupBranch ARMXEmitter::B()
681
{
682
FixupBranch branch;
683
branch.type = 0; // Zero for B
684
branch.ptr = code;
685
branch.condition = condition;
686
//We'll write NOP here for now.
687
Write32(condition | 0x01A00000);
688
return branch;
689
}
690
FixupBranch ARMXEmitter::BL()
691
{
692
FixupBranch branch;
693
branch.type = 1; // Zero for B
694
branch.ptr = code;
695
branch.condition = condition;
696
//We'll write NOP here for now.
697
Write32(condition | 0x01A00000);
698
return branch;
699
}
700
701
FixupBranch ARMXEmitter::B_CC(CCFlags Cond)
702
{
703
FixupBranch branch;
704
branch.type = 0; // Zero for B
705
branch.ptr = code;
706
branch.condition = Cond << 28;
707
//We'll write NOP here for now.
708
Write32(condition | 0x01A00000);
709
return branch;
710
}
711
void ARMXEmitter::B_CC(CCFlags Cond, const void *fnptr)
712
{
713
ptrdiff_t distance = (intptr_t)fnptr - ((intptr_t)(code) + 8);
714
_assert_msg_(distance > -0x2000000 && distance < 0x2000000,
715
"B_CC out of range (%p calls %p)", code, fnptr);
716
717
Write32((Cond << 28) | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF));
718
}
719
FixupBranch ARMXEmitter::BL_CC(CCFlags Cond)
720
{
721
FixupBranch branch;
722
branch.type = 1; // Zero for B
723
branch.ptr = code;
724
branch.condition = Cond << 28;
725
//We'll write NOP here for now.
726
Write32(condition | 0x01A00000);
727
return branch;
728
}
729
void ARMXEmitter::SetJumpTarget(FixupBranch const &branch)
730
{
731
ptrdiff_t distance = ((intptr_t)(code) - 8) - (intptr_t)branch.ptr;
732
_assert_msg_(distance > -0x2000000 && distance < 0x2000000,
733
"SetJumpTarget out of range (%p calls %p)", code, branch.ptr);
734
u32 instr = (u32)(branch.condition | ((distance >> 2) & 0x00FFFFFF));
735
instr |= branch.type == 0 ? /* B */ 0x0A000000 : /* BL */ 0x0B000000;
736
*(u32*)branch.ptr = instr;
737
}
738
void ARMXEmitter::B(const void *fnptr)
739
{
740
ptrdiff_t distance = (intptr_t)fnptr - (intptr_t(code) + 8);
741
_assert_msg_(distance > -0x2000000 && distance < 0x2000000,
742
"B out of range (%p calls %p)", code, fnptr);
743
744
Write32(condition | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF));
745
}
746
747
void ARMXEmitter::B(ARMReg src)
748
{
749
Write32(condition | 0x012FFF10 | src);
750
}
751
752
bool ARMXEmitter::BLInRange(const void *fnptr) const {
753
ptrdiff_t distance = (intptr_t)fnptr - (intptr_t(code) + 8);
754
if (distance <= -0x2000000 || distance >= 0x2000000)
755
return false;
756
else
757
return true;
758
}
759
760
void ARMXEmitter::BL(const void *fnptr)
761
{
762
ptrdiff_t distance = (intptr_t)fnptr - (intptr_t(code) + 8);
763
_assert_msg_(distance > -0x2000000 && distance < 0x2000000,
764
"BL out of range (%p calls %p)", code, fnptr);
765
Write32(condition | 0x0B000000 | ((distance >> 2) & 0x00FFFFFF));
766
}
767
void ARMXEmitter::BL(ARMReg src)
768
{
769
Write32(condition | 0x012FFF30 | src);
770
}
771
772
void ARMXEmitter::PUSH(const int num, ...)
773
{
774
u16 RegList = 0;
775
u8 Reg;
776
int i;
777
va_list vl;
778
va_start(vl, num);
779
for (i = 0; i < num; i++) {
780
Reg = va_arg(vl, u32);
781
RegList |= (1 << Reg);
782
}
783
va_end(vl);
784
Write32(condition | (2349 << 16) | RegList);
785
}
786
787
void ARMXEmitter::POP(const int num, ...)
788
{
789
u16 RegList = 0;
790
u8 Reg;
791
int i;
792
va_list vl;
793
va_start(vl, num);
794
for (i=0;i<num;i++)
795
{
796
Reg = va_arg(vl, u32);
797
RegList |= (1 << Reg);
798
}
799
va_end(vl);
800
Write32(condition | (2237 << 16) | RegList);
801
}
802
803
void ARMXEmitter::WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2)
804
{
805
Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | op2.Imm5() | (op << 4) | src);
806
}
807
void ARMXEmitter::WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2)
808
{
809
Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | (op2 << 8) | (op << 4) | src);
810
}
811
812
// IMM, REG, IMMSREG, RSR
813
// -1 for invalid if the instruction doesn't support that
814
const s32 InstOps[][4] = {{16, 0, 0, 0}, // AND(s)
815
{17, 1, 1, 1}, // EOR(s)
816
{18, 2, 2, 2}, // SUB(s)
817
{19, 3, 3, 3}, // RSB(s)
818
{20, 4, 4, 4}, // ADD(s)
819
{21, 5, 5, 5}, // ADC(s)
820
{22, 6, 6, 6}, // SBC(s)
821
{23, 7, 7, 7}, // RSC(s)
822
{24, 8, 8, 8}, // TST
823
{25, 9, 9, 9}, // TEQ
824
{26, 10, 10, 10}, // CMP
825
{27, 11, 11, 11}, // CMN
826
{28, 12, 12, 12}, // ORR(s)
827
{29, 13, 13, 13}, // MOV(s)
828
{30, 14, 14, 14}, // BIC(s)
829
{31, 15, 15, 15}, // MVN(s)
830
{24, -1, -1, -1}, // MOVW
831
{26, -1, -1, -1}, // MOVT
832
};
833
834
const char *InstNames[] = { "AND",
835
"EOR",
836
"SUB",
837
"RSB",
838
"ADD",
839
"ADC",
840
"SBC",
841
"RSC",
842
"TST",
843
"TEQ",
844
"CMP",
845
"CMN",
846
"ORR",
847
"MOV",
848
"BIC",
849
"MVN",
850
"MOVW",
851
"MOVT",
852
};
853
854
void ARMXEmitter::AND (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm); }
855
void ARMXEmitter::ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm, true); }
856
void ARMXEmitter::EOR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm); }
857
void ARMXEmitter::EORS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm, true); }
858
void ARMXEmitter::SUB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm); }
859
void ARMXEmitter::SUBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm, true); }
860
void ARMXEmitter::RSB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm); }
861
void ARMXEmitter::RSBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm, true); }
862
void ARMXEmitter::ADD (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm); }
863
void ARMXEmitter::ADDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm, true); }
864
void ARMXEmitter::ADC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm); }
865
void ARMXEmitter::ADCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm, true); }
866
void ARMXEmitter::SBC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm); }
867
void ARMXEmitter::SBCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm, true); }
868
void ARMXEmitter::RSC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm); }
869
void ARMXEmitter::RSCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm, true); }
870
void ARMXEmitter::TST ( ARMReg Rn, Operand2 Rm) { WriteInstruction(8, R0, Rn, Rm, true); }
871
void ARMXEmitter::TEQ ( ARMReg Rn, Operand2 Rm) { WriteInstruction(9, R0, Rn, Rm, true); }
872
void ARMXEmitter::CMP ( ARMReg Rn, Operand2 Rm) { WriteInstruction(10, R0, Rn, Rm, true); }
873
void ARMXEmitter::CMN ( ARMReg Rn, Operand2 Rm) { WriteInstruction(11, R0, Rn, Rm, true); }
874
void ARMXEmitter::ORR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm); }
875
void ARMXEmitter::ORRS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm, true); }
876
void ARMXEmitter::MOV (ARMReg Rd, Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm); }
877
void ARMXEmitter::MOVS(ARMReg Rd, Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm, true); }
878
void ARMXEmitter::BIC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm); }
879
void ARMXEmitter::BICS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm, true); }
880
void ARMXEmitter::MVN (ARMReg Rd, Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm); }
881
void ARMXEmitter::MVNS(ARMReg Rd, Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm, true); }
882
void ARMXEmitter::MOVW(ARMReg Rd, Operand2 Rm) { WriteInstruction(16, Rd, R0, Rm); }
883
void ARMXEmitter::MOVT(ARMReg Rd, Operand2 Rm, bool TopBits) { WriteInstruction(17, Rd, R0, TopBits ? Rm.Value >> 16 : Rm); }
884
885
void ARMXEmitter::WriteInstruction (u32 Op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags) // This can get renamed later
886
{
887
s32 op = InstOps[Op][Rm.GetType()]; // Type always decided by last operand
888
u32 Data = Rm.GetData();
889
if (Rm.GetType() == TYPE_IMM)
890
{
891
switch (Op)
892
{
893
// MOV cases that support IMM16
894
case 16:
895
case 17:
896
Data = Rm.Imm16();
897
break;
898
default:
899
break;
900
}
901
}
902
if (op == -1)
903
_assert_msg_(false, "%s not yet support %d", InstNames[Op], Rm.GetType());
904
Write32(condition | (op << 21) | (SetFlags ? (1 << 20) : 0) | Rn << 16 | Rd << 12 | Data);
905
}
906
907
// Data Operations
908
void ARMXEmitter::WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2)
909
{
910
Write32(condition | (0x7 << 24) | (Op << 20) | (dest << 16) | (Op2 << 12) | (r1 << 8) | (Op3 << 5) | (1 << 4) | r2);
911
}
912
void ARMXEmitter::UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor)
913
{
914
_assert_msg_(cpu_info.bIDIVa, "Trying to use integer divide on hardware that doesn't support it.");
915
WriteSignedMultiply(3, 0xF, 0, dest, divisor, dividend);
916
}
917
void ARMXEmitter::SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor)
918
{
919
_assert_msg_(cpu_info.bIDIVa, "Trying to use integer divide on hardware that doesn't support it.");
920
WriteSignedMultiply(1, 0xF, 0, dest, divisor, dividend);
921
}
922
923
void ARMXEmitter::LSL (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, false, dest, src, op2);}
924
void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, true, dest, src, op2);}
925
void ARMXEmitter::LSL (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, false, dest, src, op2);}
926
void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, true, dest, src, op2);}
927
void ARMXEmitter::LSR (ARMReg dest, ARMReg src, Operand2 op2) {
928
_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "LSR must have a non-zero shift (use LSL.)");
929
WriteShiftedDataOp(2, false, dest, src, op2);
930
}
931
void ARMXEmitter::LSRS(ARMReg dest, ARMReg src, Operand2 op2) {
932
_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "LSRS must have a non-zero shift (use LSLS.)");
933
WriteShiftedDataOp(2, true, dest, src, op2);
934
}
935
void ARMXEmitter::LSR (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(3, false, dest, src, op2);}
936
void ARMXEmitter::LSRS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(3, true, dest, src, op2);}
937
void ARMXEmitter::ASR (ARMReg dest, ARMReg src, Operand2 op2) {
938
_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "ASR must have a non-zero shift (use LSL.)");
939
WriteShiftedDataOp(4, false, dest, src, op2);
940
}
941
void ARMXEmitter::ASRS(ARMReg dest, ARMReg src, Operand2 op2) {
942
_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "ASRS must have a non-zero shift (use LSLS.)");
943
WriteShiftedDataOp(4, true, dest, src, op2);
944
}
945
void ARMXEmitter::ASR (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(5, false, dest, src, op2);}
946
void ARMXEmitter::ASRS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(5, true, dest, src, op2);}
947
948
void ARMXEmitter::MUL (ARMReg dest, ARMReg src, ARMReg op2)
949
{
950
Write32(condition | (dest << 16) | (src << 8) | (9 << 4) | op2);
951
}
952
void ARMXEmitter::MULS(ARMReg dest, ARMReg src, ARMReg op2)
953
{
954
Write32(condition | (1 << 20) | (dest << 16) | (src << 8) | (9 << 4) | op2);
955
}
956
957
void ARMXEmitter::Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) {
958
Write32(condition | (op << 20) | (destHi << 16) | (destLo << 12) | (rm << 8) | (9 << 4) | rn);
959
}
960
961
void ARMXEmitter::UMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
962
{
963
Write4OpMultiply(0x8, destLo, destHi, rn, rm);
964
}
965
966
void ARMXEmitter::SMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
967
{
968
Write4OpMultiply(0xC, destLo, destHi, rn, rm);
969
}
970
971
void ARMXEmitter::UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
972
{
973
Write4OpMultiply(0xA, destLo, destHi, rn, rm);
974
}
975
976
void ARMXEmitter::SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)
977
{
978
Write4OpMultiply(0xE, destLo, destHi, rn, rm);
979
}
980
981
void ARMXEmitter::UBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width)
982
{
983
Write32(condition | (0x7E0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn);
984
}
985
986
void ARMXEmitter::SBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width)
987
{
988
Write32(condition | (0x7A0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn);
989
}
990
991
void ARMXEmitter::CLZ(ARMReg rd, ARMReg rm)
992
{
993
Write32(condition | (0x16F << 16) | (rd << 12) | (0xF1 << 4) | rm);
994
}
995
996
void ARMXEmitter::PLD(ARMReg rn, int offset, bool forWrite) {
997
_dbg_assert_msg_(offset < 0x3ff && offset > -0x3ff, "PLD: Max 12 bits of offset allowed");
998
999
bool U = offset >= 0;
1000
if (offset < 0) offset = -offset;
1001
bool R = !forWrite;
1002
// Conditions not allowed
1003
Write32((0xF5 << 24) | (U << 23) | (R << 22) | (1 << 20) | ((int)rn << 16) | (0xF << 12) | offset);
1004
}
1005
1006
1007
void ARMXEmitter::BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width)
1008
{
1009
u32 msb = (lsb + width - 1);
1010
if (msb > 31) msb = 31;
1011
Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | rn);
1012
}
1013
1014
void ARMXEmitter::BFC(ARMReg rd, u8 lsb, u8 width)
1015
{
1016
u32 msb = (lsb + width - 1);
1017
if (msb > 31) msb = 31;
1018
Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | 15);
1019
}
1020
1021
void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2)
1022
{
1023
Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2);
1024
}
1025
1026
void ARMXEmitter::SXTH (ARMReg dest, ARMReg op2, u8 rotation)
1027
{
1028
SXTAH(dest, (ARMReg)15, op2, rotation);
1029
}
1030
void ARMXEmitter::SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation)
1031
{
1032
// bits ten and 11 are the rotation amount, see 8.8.232 for more
1033
// information
1034
Write32(condition | (0x6B << 20) | (src << 16) | (dest << 12) | (rotation << 10) | (7 << 4) | op2);
1035
}
1036
void ARMXEmitter::RBIT(ARMReg dest, ARMReg src)
1037
{
1038
Write32(condition | (0x6F << 20) | (0xF << 16) | (dest << 12) | (0xF3 << 4) | src);
1039
}
1040
void ARMXEmitter::REV (ARMReg dest, ARMReg src)
1041
{
1042
Write32(condition | (0x6BF << 16) | (dest << 12) | (0xF3 << 4) | src);
1043
}
1044
void ARMXEmitter::REV16(ARMReg dest, ARMReg src)
1045
{
1046
Write32(condition | (0x6BF << 16) | (dest << 12) | (0xFB << 4) | src);
1047
}
1048
1049
void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, Operand2 op2)
1050
{
1051
Write32(condition | (0x320F << 12) | (write_nzcvq << 19) | (write_g << 18) | op2.Imm12Mod());
1052
}
1053
void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, ARMReg src)
1054
{
1055
Write32(condition | (0x120F << 12) | (write_nzcvq << 19) | (write_g << 18) | src);
1056
}
1057
void ARMXEmitter::MRS (ARMReg dest)
1058
{
1059
Write32(condition | (16 << 20) | (15 << 16) | (dest << 12));
1060
}
1061
void ARMXEmitter::LDREX(ARMReg dest, ARMReg base)
1062
{
1063
Write32(condition | (25 << 20) | (base << 16) | (dest << 12) | 0xF9F);
1064
}
1065
void ARMXEmitter::STREX(ARMReg result, ARMReg base, ARMReg op)
1066
{
1067
_assert_msg_((result != base && result != op), "STREX dest can't be other two registers");
1068
Write32(condition | (24 << 20) | (base << 16) | (result << 12) | (0xF9 << 4) | op);
1069
}
1070
void ARMXEmitter::DMB ()
1071
{
1072
Write32(0xF57FF05E);
1073
}
1074
void ARMXEmitter::SVC(Operand2 op)
1075
{
1076
Write32(condition | (0x0F << 24) | op.Imm24());
1077
}
1078
1079
// IMM, REG, IMMSREG, RSR
1080
// -1 for invalid if the instruction doesn't support that
1081
const s32 LoadStoreOps[][4] = {
1082
{0x40, 0x60, 0x60, -1}, // STR
1083
{0x41, 0x61, 0x61, -1}, // LDR
1084
{0x44, 0x64, 0x64, -1}, // STRB
1085
{0x45, 0x65, 0x65, -1}, // LDRB
1086
// Special encodings
1087
{ 0x4, 0x0, -1, -1}, // STRH
1088
{ 0x5, 0x1, -1, -1}, // LDRH
1089
{ 0x5, 0x1, -1, -1}, // LDRSB
1090
{ 0x5, 0x1, -1, -1}, // LDRSH
1091
};
1092
const char *LoadStoreNames[] = {
1093
"STR",
1094
"LDR",
1095
"STRB",
1096
"LDRB",
1097
"STRH",
1098
"LDRH",
1099
"LDRSB",
1100
"LDRSH",
1101
};
1102
1103
void ARMXEmitter::WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 Rm, bool RegAdd)
1104
{
1105
s32 op = LoadStoreOps[Op][Rm.GetType()]; // Type always decided by last operand
1106
u32 Data;
1107
1108
// Qualcomm chipsets get /really/ angry if you don't use index, even if the offset is zero.
1109
// Some of these encodings require Index at all times anyway. Doesn't really matter.
1110
// bool Index = op2 != 0 ? true : false;
1111
bool Index = true;
1112
bool Add = false;
1113
1114
// Special Encoding (misc addressing mode)
1115
bool SpecialOp = false;
1116
bool Half = false;
1117
bool SignedLoad = false;
1118
1119
if (op == -1)
1120
_assert_msg_(false, "%s does not support %d", LoadStoreNames[Op], Rm.GetType());
1121
1122
switch (Op)
1123
{
1124
case 4: // STRH
1125
SpecialOp = true;
1126
Half = true;
1127
SignedLoad = false;
1128
break;
1129
case 5: // LDRH
1130
SpecialOp = true;
1131
Half = true;
1132
SignedLoad = false;
1133
break;
1134
case 6: // LDRSB
1135
SpecialOp = true;
1136
Half = false;
1137
SignedLoad = true;
1138
break;
1139
case 7: // LDRSH
1140
SpecialOp = true;
1141
Half = true;
1142
SignedLoad = true;
1143
break;
1144
}
1145
switch (Rm.GetType())
1146
{
1147
case TYPE_IMM:
1148
{
1149
s32 Temp = (s32)Rm.Value;
1150
Data = abs(Temp);
1151
// The offset is encoded differently on this one.
1152
if (SpecialOp)
1153
Data = ((Data & 0xF0) << 4) | (Data & 0xF);
1154
if (Temp >= 0) Add = true;
1155
}
1156
break;
1157
case TYPE_REG:
1158
Data = Rm.GetData();
1159
Add = RegAdd;
1160
break;
1161
case TYPE_IMMSREG:
1162
if (!SpecialOp)
1163
{
1164
Data = Rm.GetData();
1165
Add = RegAdd;
1166
break;
1167
}
1168
// Intentional fallthrough: TYPE_IMMSREG not supported for misc addressing.
1169
default:
1170
// RSR not supported for any of these
1171
// We already have the warning above
1172
BKPT(0x2);
1173
return;
1174
break;
1175
}
1176
if (SpecialOp)
1177
{
1178
// Add SpecialOp things
1179
Data = (0x9 << 4) | (SignedLoad << 6) | (Half << 5) | Data;
1180
}
1181
Write32(condition | (op << 20) | (Index << 24) | (Add << 23) | (Rn << 16) | (Rt << 12) | Data);
1182
}
1183
1184
void ARMXEmitter::LDR (ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(1, dest, base, op2, RegAdd);}
1185
void ARMXEmitter::LDRB(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(3, dest, base, op2, RegAdd);}
1186
void ARMXEmitter::LDRH(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(5, dest, base, op2, RegAdd);}
1187
void ARMXEmitter::LDRSB(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(6, dest, base, op2, RegAdd);}
1188
void ARMXEmitter::LDRSH(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(7, dest, base, op2, RegAdd);}
1189
void ARMXEmitter::STR (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(0, result, base, op2, RegAdd);}
1190
void ARMXEmitter::STRH (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(4, result, base, op2, RegAdd);}
1191
void ARMXEmitter::STRB (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(2, result, base, op2, RegAdd);}
1192
1193
#define VA_TO_REGLIST(RegList, Regnum) \
1194
{ \
1195
u8 Reg; \
1196
va_list vl; \
1197
va_start(vl, Regnum); \
1198
for (int i = 0; i < Regnum; i++) \
1199
{ \
1200
Reg = va_arg(vl, u32); \
1201
RegList |= (1 << Reg); \
1202
} \
1203
va_end(vl); \
1204
}
1205
1206
void ARMXEmitter::WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList)
1207
{
1208
Write32(condition | (op << 20) | (WriteBack << 21) | (dest << 16) | RegList);
1209
}
1210
void ARMXEmitter::WriteVRegStoreOp(u32 op, ARMReg Rn, bool Double, bool WriteBack, ARMReg Vd, u8 numregs)
1211
{
1212
_dbg_assert_msg_(!WriteBack || Rn != R_PC, "VLDM/VSTM cannot use WriteBack with PC (PC is deprecated anyway.)");
1213
Write32(condition | (op << 20) | (WriteBack << 21) | (Rn << 16) | EncodeVd(Vd) | ((0xA | (int)Double) << 8) | (numregs << (int)Double));
1214
}
1215
void ARMXEmitter::STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...)
1216
{
1217
u16 RegList = 0;
1218
VA_TO_REGLIST(RegList, Regnum);
1219
WriteRegStoreOp(0x80 | 0x10 | 0, dest, WriteBack, RegList);
1220
}
1221
void ARMXEmitter::LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...)
1222
{
1223
u16 RegList = 0;
1224
VA_TO_REGLIST(RegList, Regnum);
1225
WriteRegStoreOp(0x80 | 0x08 | 1, dest, WriteBack, RegList);
1226
}
1227
void ARMXEmitter::STMIA(ARMReg dest, bool WriteBack, const int Regnum, ...)
1228
{
1229
u16 RegList = 0;
1230
VA_TO_REGLIST(RegList, Regnum);
1231
WriteRegStoreOp(0x80 | 0x08 | 0, dest, WriteBack, RegList);
1232
}
1233
void ARMXEmitter::LDMIA(ARMReg dest, bool WriteBack, const int Regnum, ...)
1234
{
1235
u16 RegList = 0;
1236
VA_TO_REGLIST(RegList, Regnum);
1237
WriteRegStoreOp(0x80 | 0x08 | 1, dest, WriteBack, RegList);
1238
}
1239
void ARMXEmitter::STM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...)
1240
{
1241
u16 RegList = 0;
1242
VA_TO_REGLIST(RegList, Regnum);
1243
WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 0, dest, WriteBack, RegList);
1244
}
1245
void ARMXEmitter::LDM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...)
1246
{
1247
u16 RegList = 0;
1248
VA_TO_REGLIST(RegList, Regnum);
1249
WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 1, dest, WriteBack, RegList);
1250
}
1251
1252
void ARMXEmitter::STMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList)
1253
{
1254
WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 0, dest, WriteBack, RegList);
1255
}
1256
void ARMXEmitter::LDMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList)
1257
{
1258
WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 1, dest, WriteBack, RegList);
1259
}
1260
1261
#undef VA_TO_REGLIST
1262
1263
// NEON Specific
1264
void ARMXEmitter::VABD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1265
{
1266
_assert_msg_(Vd >= D0, "Pass invalid register to VABD(float)");
1267
bool register_quad = Vd >= Q0;
1268
1269
// Gets encoded as a double register
1270
Vd = SubBase(Vd);
1271
Vn = SubBase(Vn);
1272
Vm = SubBase(Vm);
1273
1274
Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
1275
| ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \
1276
| ((Vm & 0x10) << 2) | (Vm & 0xF));
1277
}
1278
void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1279
{
1280
_assert_msg_(Vd >= D0, "Pass invalid register to VADD(integer)");
1281
1282
bool register_quad = Vd >= Q0;
1283
1284
// Gets encoded as a double register
1285
Vd = SubBase(Vd);
1286
Vn = SubBase(Vn);
1287
Vm = SubBase(Vm);
1288
1289
Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
1290
| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \
1291
| ((Vm & 0x10) << 1) | (Vm & 0xF));
1292
1293
}
1294
void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1295
{
1296
_assert_msg_(Vd >= Q0, "Pass invalid register to VSUB(integer)");
1297
1298
// Gets encoded as a double register
1299
Vd = SubBase(Vd);
1300
Vn = SubBase(Vn);
1301
Vm = SubBase(Vm);
1302
1303
Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \
1304
| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (1 << 6) \
1305
| ((Vm & 0x10) << 2) | (Vm & 0xF));
1306
}
1307
1308
extern const VFPEnc VFPOps[16][2] = {
1309
{{0xE0, 0xA0}, { -1, -1}}, // 0: VMLA
1310
{{0xE1, 0xA4}, { -1, -1}}, // 1: VNMLA
1311
{{0xE0, 0xA4}, { -1, -1}}, // 2: VMLS
1312
{{0xE1, 0xA0}, { -1, -1}}, // 3: VNMLS
1313
{{0xE3, 0xA0}, { -1, -1}}, // 4: VADD
1314
{{0xE3, 0xA4}, { -1, -1}}, // 5: VSUB
1315
{{0xE2, 0xA0}, { -1, -1}}, // 6: VMUL
1316
{{0xE2, 0xA4}, { -1, -1}}, // 7: VNMUL
1317
{{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 8: VABS(Vn(0x0) used for encoding)
1318
{{0xE8, 0xA0}, { -1, -1}}, // 9: VDIV
1319
{{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 10: VNEG(Vn(0x1) used for encoding)
1320
{{0xEB, 0xAC}, { -1, -1}}, // 11: VSQRT (Vn(0x1) used for encoding)
1321
{{0xEB, 0xA4}, { -1, -1}}, // 12: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding)
1322
{{0xEB, 0xAC}, { -1, -1}}, // 13: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding)
1323
{{ -1, -1}, {0x3B, 0x30}}, // 14: VABSi
1324
};
1325
1326
const char *VFPOpNames[16] = {
1327
"VMLA",
1328
"VNMLA",
1329
"VMLS",
1330
"VNMLS",
1331
"VADD",
1332
"VSUB",
1333
"VMUL",
1334
"VNMUL",
1335
"VABS",
1336
"VDIV",
1337
"VNEG",
1338
"VSQRT",
1339
"VCMP",
1340
"VCMPE",
1341
"VABSi",
1342
};
1343
1344
u32 EncodeVd(ARMReg Vd)
1345
{
1346
bool quad_reg = Vd >= Q0;
1347
bool double_reg = Vd >= D0;
1348
1349
ARMReg Reg = SubBase(Vd);
1350
1351
if (quad_reg)
1352
return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12);
1353
else {
1354
if (double_reg)
1355
return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12);
1356
else
1357
return ((Reg & 0x1) << 22) | ((Reg & 0x1E) << 11);
1358
}
1359
}
1360
u32 EncodeVn(ARMReg Vn)
1361
{
1362
bool quad_reg = Vn >= Q0;
1363
bool double_reg = Vn >= D0;
1364
1365
ARMReg Reg = SubBase(Vn);
1366
if (quad_reg)
1367
return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3);
1368
else {
1369
if (double_reg)
1370
return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3);
1371
else
1372
return ((Reg & 0x1E) << 15) | ((Reg & 0x1) << 7);
1373
}
1374
}
1375
u32 EncodeVm(ARMReg Vm)
1376
{
1377
bool quad_reg = Vm >= Q0;
1378
bool double_reg = Vm >= D0;
1379
1380
ARMReg Reg = SubBase(Vm);
1381
1382
if (quad_reg)
1383
return ((Reg & 0x10) << 1) | (Reg & 0xF);
1384
else {
1385
if (double_reg)
1386
return ((Reg & 0x10) << 1) | (Reg & 0xF);
1387
else
1388
return ((Reg & 0x1) << 5) | (Reg >> 1);
1389
}
1390
}
1391
1392
u32 encodedSize(u32 value)
1393
{
1394
if (value & I_8)
1395
return 0;
1396
else if (value & I_16)
1397
return 1;
1398
else if ((value & I_32) || (value & F_32))
1399
return 2;
1400
else if (value & I_64)
1401
return 3;
1402
else
1403
_dbg_assert_msg_(false, "Passed invalid size to integer NEON instruction");
1404
return 0;
1405
}
1406
1407
ARMReg SubBase(ARMReg Reg)
1408
{
1409
if (Reg >= S0)
1410
{
1411
if (Reg >= D0)
1412
{
1413
if (Reg >= Q0)
1414
return (ARMReg)((Reg - Q0) * 2); // Always gets encoded as a double register
1415
return (ARMReg)(Reg - D0);
1416
}
1417
return (ARMReg)(Reg - S0);
1418
}
1419
return Reg;
1420
}
1421
1422
ARMReg DScalar(ARMReg dreg, int subScalar) {
1423
int dr = (int)(SubBase(dreg)) & 0xF;
1424
int scalar = ((subScalar << 4) | dr);
1425
ARMReg ret = (ARMReg)(D0 + scalar);
1426
// ILOG("Scalar: %i D0: %i AR: %i", scalar, (int)D0, (int)ret);
1427
return ret;
1428
}
1429
1430
// Convert to a DScalar
1431
ARMReg QScalar(ARMReg qreg, int subScalar) {
1432
int dr = (int)(SubBase(qreg)) & 0xF;
1433
if (subScalar & 2) {
1434
dr++;
1435
}
1436
int scalar = (((subScalar & 1) << 4) | dr);
1437
ARMReg ret = (ARMReg)(D0 + scalar);
1438
return ret;
1439
}
1440
1441
void ARMXEmitter::WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1442
{
1443
bool quad_reg = Vd >= Q0;
1444
bool double_reg = Vd >= D0 && Vd < Q0;
1445
1446
VFPEnc enc = VFPOps[Op][quad_reg];
1447
if (enc.opc1 == -1 && enc.opc2 == -1)
1448
_assert_msg_(false, "%s does not support %s", VFPOpNames[Op], quad_reg ? "NEON" : "VFP");
1449
u32 VdEnc = EncodeVd(Vd);
1450
u32 VnEnc = EncodeVn(Vn);
1451
u32 VmEnc = EncodeVm(Vm);
1452
u32 cond = quad_reg ? (0xF << 28) : condition;
1453
1454
Write32(cond | (enc.opc1 << 20) | VnEnc | VdEnc | (enc.opc2 << 4) | (quad_reg << 6) | (double_reg << 8) | VmEnc);
1455
}
1456
void ARMXEmitter::VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(0, Vd, Vn, Vm); }
1457
void ARMXEmitter::VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); }
1458
void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); }
1459
void ARMXEmitter::VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); }
1460
void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); }
1461
void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(5, Vd, Vn, Vm); }
1462
void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); }
1463
void ARMXEmitter::VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(7, Vd, Vn, Vm); }
1464
void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D0, Vm); }
1465
void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(9, Vd, Vn, Vm); }
1466
void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D1, Vm); }
1467
void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(11, Vd, D1, Vm); }
1468
void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(12, Vd, D4, Vm); }
1469
void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(13, Vd, D4, Vm); }
1470
void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp(12, Vd, D5, D0); }
1471
void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp(13, Vd, D5, D0); }
1472
1473
void ARMXEmitter::VLDMIA(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)
1474
{
1475
WriteVRegStoreOp(0x80 | 0x40 | 0x8 | 1, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);
1476
}
1477
1478
void ARMXEmitter::VSTMIA(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)
1479
{
1480
WriteVRegStoreOp(0x80 | 0x40 | 0x8, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);
1481
}
1482
1483
void ARMXEmitter::VLDMDB(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)
1484
{
1485
_dbg_assert_msg_(WriteBack, "Writeback is required for VLDMDB");
1486
WriteVRegStoreOp(0x80 | 0x040 | 0x10 | 1, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);
1487
}
1488
1489
void ARMXEmitter::VSTMDB(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)
1490
{
1491
_dbg_assert_msg_(WriteBack, "Writeback is required for VSTMDB");
1492
WriteVRegStoreOp(0x80 | 0x040 | 0x10, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);
1493
}
1494
1495
void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset)
1496
{
1497
_assert_msg_(Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR");
1498
_assert_msg_(Base <= R15, "Passed invalid Base register to VLDR");
1499
1500
bool Add = offset >= 0 ? true : false;
1501
u32 imm = abs(offset);
1502
1503
_assert_msg_((imm & 0xC03) == 0, "VLDR: Offset needs to be word aligned and small enough");
1504
1505
if (imm & 0xC03)
1506
ERROR_LOG(Log::JIT, "VLDR: Bad offset %08x", imm);
1507
1508
bool single_reg = Dest < D0;
1509
1510
Dest = SubBase(Dest);
1511
1512
if (single_reg)
1513
{
1514
Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \
1515
| ((Dest & 0x1E) << 11) | (10 << 8) | (imm >> 2));
1516
}
1517
else
1518
{
1519
Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \
1520
| ((Dest & 0xF) << 12) | (11 << 8) | (imm >> 2));
1521
}
1522
}
1523
void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, s16 offset)
1524
{
1525
_assert_msg_(Src >= S0 && Src <= D31, "Passed invalid src register to VSTR");
1526
_assert_msg_(Base <= R15, "Passed invalid base register to VSTR");
1527
1528
bool Add = offset >= 0 ? true : false;
1529
u32 imm = abs(offset);
1530
1531
_assert_msg_((imm & 0xC03) == 0, "VSTR: Offset needs to be word aligned and small enough");
1532
1533
if (imm & 0xC03)
1534
ERROR_LOG(Log::JIT, "VSTR: Bad offset %08x", imm);
1535
1536
bool single_reg = Src < D0;
1537
1538
Src = SubBase(Src);
1539
1540
if (single_reg)
1541
{
1542
Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x1) << 22) | (Base << 16) \
1543
| ((Src & 0x1E) << 11) | (10 << 8) | (imm >> 2));
1544
}
1545
else
1546
{
1547
Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x10) << 18) | (Base << 16) \
1548
| ((Src & 0xF) << 12) | (11 << 8) | (imm >> 2));
1549
}
1550
}
1551
1552
void ARMXEmitter::VMRS_APSR() {
1553
Write32(condition | 0x0EF10A10 | (15 << 12));
1554
}
1555
void ARMXEmitter::VMRS(ARMReg Rt) {
1556
Write32(condition | (0xEF << 20) | (1 << 16) | (Rt << 12) | 0xA10);
1557
}
1558
void ARMXEmitter::VMSR(ARMReg Rt) {
1559
Write32(condition | (0xEE << 20) | (1 << 16) | (Rt << 12) | 0xA10);
1560
}
1561
1562
void ARMXEmitter::VMOV(ARMReg Dest, Operand2 op2)
1563
{
1564
int sz = Dest >= D0 ? (1 << 8) : 0;
1565
Write32(condition | (0xEB << 20) | EncodeVd(Dest) | (5 << 9) | sz | op2.Imm8VFP());
1566
}
1567
1568
void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, u32 imm)
1569
{
1570
_assert_msg_(Vd >= D0, "VMOV_neon #imm must target a double or quad");
1571
bool register_quad = Vd >= Q0;
1572
1573
int cmode = 0;
1574
int op = 0;
1575
Operand2 op2 = IMM(0);
1576
1577
u32 imm8 = imm & 0xFF;
1578
imm8 = imm8 | (imm8 << 8) | (imm8 << 16) | (imm8 << 24);
1579
1580
if (Size == I_8) {
1581
imm = imm8;
1582
} else if (Size == I_16) {
1583
imm &= 0xFFFF;
1584
imm = imm | (imm << 16);
1585
}
1586
1587
if ((imm & 0x000000FF) == imm) {
1588
op = 0;
1589
cmode = 0 << 1;
1590
op2 = IMM(imm);
1591
} else if ((imm & 0x0000FF00) == imm) {
1592
op = 0;
1593
cmode = 1 << 1;
1594
op2 = IMM(imm >> 8);
1595
} else if ((imm & 0x00FF0000) == imm) {
1596
op = 0;
1597
cmode = 2 << 1;
1598
op2 = IMM(imm >> 16);
1599
} else if ((imm & 0xFF000000) == imm) {
1600
op = 0;
1601
cmode = 3 << 1;
1602
op2 = IMM(imm >> 24);
1603
} else if ((imm & 0x00FF00FF) == imm && (imm >> 16) == (imm & 0x00FF)) {
1604
op = 0;
1605
cmode = 4 << 1;
1606
op2 = IMM(imm & 0xFF);
1607
} else if ((imm & 0xFF00FF00) == imm && (imm >> 16) == (imm & 0xFF00)) {
1608
op = 0;
1609
cmode = 5 << 1;
1610
op2 = IMM(imm & 0xFF);
1611
} else if ((imm & 0x0000FFFF) == (imm | 0x000000FF)) {
1612
op = 0;
1613
cmode = (6 << 1) | 0;
1614
op2 = IMM(imm >> 8);
1615
} else if ((imm & 0x00FFFFFF) == (imm | 0x0000FFFF)) {
1616
op = 0;
1617
cmode = (6 << 1) | 1;
1618
op2 = IMM(imm >> 16);
1619
} else if (imm == imm8) {
1620
op = 0;
1621
cmode = (7 << 1) | 0;
1622
op2 = IMM(imm & 0xFF);
1623
} else if (TryMakeFloatIMM8(imm, op2)) {
1624
op = 0;
1625
cmode = (7 << 1) | 1;
1626
} else {
1627
// 64-bit constant form - technically we could take a u64.
1628
bool canEncode = true;
1629
u8 imm8 = 0;
1630
for (int i = 0, i8 = 0; i < 32; i += 8, ++i8) {
1631
u8 b = (imm >> i) & 0xFF;
1632
if (b == 0xFF) {
1633
imm8 |= 1 << i8;
1634
} else if (b != 0x00) {
1635
canEncode = false;
1636
}
1637
}
1638
if (canEncode) {
1639
// We don't want zeros in the second lane.
1640
op = 1;
1641
cmode = 7 << 1;
1642
op2 = IMM(imm8 | (imm8 << 4));
1643
} else {
1644
_assert_msg_(false, "VMOV_neon #imm invalid constant value");
1645
}
1646
}
1647
1648
// No condition allowed.
1649
Write32((15 << 28) | (0x28 << 20) | EncodeVd(Vd) | (cmode << 8) | (register_quad << 6) | (op << 5) | (1 << 4) | op2.Imm8ASIMD());
1650
}
1651
1652
void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane)
1653
{
1654
int opc1 = 0;
1655
int opc2 = 0;
1656
1657
switch (Size & ~(I_SIGNED | I_UNSIGNED))
1658
{
1659
case I_8: opc1 = 2 | (lane >> 2); opc2 = lane & 3; break;
1660
case I_16: opc1 = lane >> 1; opc2 = 1 | ((lane & 1) << 1); break;
1661
case I_32:
1662
case F_32:
1663
_assert_msg_((Size & I_UNSIGNED) == 0, "Cannot use UNSIGNED for I_32 or F_32");
1664
opc1 = lane & 1;
1665
break;
1666
default:
1667
_assert_msg_(false, "VMOV_neon unsupported size");
1668
}
1669
1670
if (Vd < S0 && Rt >= D0 && Rt < Q0)
1671
{
1672
// Oh, reading to reg, our params are backwards.
1673
ARMReg Src = Rt;
1674
ARMReg Dest = Vd;
1675
1676
_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED | F_32 | I_32)) != 0, "Must specify I_SIGNED or I_UNSIGNED in VMOV, unless F_32/I_32");
1677
int U = (Size & I_UNSIGNED) ? (1 << 23) : 0;
1678
1679
Write32(condition | (0xE1 << 20) | U | (opc1 << 21) | EncodeVn(Src) | (Dest << 12) | (0xB << 8) | (opc2 << 5) | (1 << 4));
1680
}
1681
else if (Rt < S0 && Vd >= D0 && Vd < Q0)
1682
{
1683
ARMReg Src = Rt;
1684
ARMReg Dest = Vd;
1685
Write32(condition | (0xE0 << 20) | (opc1 << 21) | EncodeVn(Dest) | (Src << 12) | (0xB << 8) | (opc2 << 5) | (1 << 4));
1686
}
1687
else
1688
_assert_msg_(false, "VMOV_neon unsupported arguments (Dx -> Rx or Rx -> Dx)");
1689
}
1690
1691
void ARMXEmitter::VMOV(ARMReg Vd, ARMReg Rt, ARMReg Rt2)
1692
{
1693
if (Vd < S0 && Rt < S0 && Rt2 >= D0)
1694
{
1695
// Oh, reading to regs, our params are backwards.
1696
ARMReg Src = Rt2;
1697
ARMReg Dest1 = Vd;
1698
ARMReg Dest2 = Rt;
1699
Write32(condition | (0xC5 << 20) | (Dest2 << 16) | (Dest1 << 12) | (0xB << 8) | EncodeVm(Src) | (1 << 4));
1700
}
1701
else if (Vd >= D0 && Rt < S0 && Rt2 < S0)
1702
{
1703
ARMReg Dest = Vd;
1704
ARMReg Src1 = Rt;
1705
ARMReg Src2 = Rt2;
1706
Write32(condition | (0xC4 << 20) | (Src2 << 16) | (Src1 << 12) | (0xB << 8) | EncodeVm(Dest) | (1 << 4));
1707
}
1708
else
1709
_assert_msg_(false, "VMOV_neon requires either Dm, Rt, Rt2 or Rt, Rt2, Dm.");
1710
}
1711
1712
void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high)
1713
{
1714
_assert_msg_(Src < S0, "This VMOV doesn't support SRC other than ARM Reg");
1715
_assert_msg_(Dest >= D0, "This VMOV doesn't support DEST other than VFP");
1716
1717
Dest = SubBase(Dest);
1718
1719
Write32(condition | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \
1720
| (0xB << 8) | ((Dest & 0x10) << 3) | (1 << 4));
1721
}
1722
1723
void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src)
1724
{
1725
if (Dest == Src) {
1726
WARN_LOG(Log::JIT, "VMOV %s, %s - same register", ARMRegAsString(Src), ARMRegAsString(Dest));
1727
}
1728
if (Dest > R15)
1729
{
1730
if (Src < S0)
1731
{
1732
if (Dest < D0)
1733
{
1734
// Moving to a Neon register FROM ARM Reg
1735
Dest = (ARMReg)(Dest - S0);
1736
Write32(condition | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \
1737
| (0xA << 8) | ((Dest & 0x1) << 7) | (1 << 4));
1738
return;
1739
}
1740
else
1741
{
1742
// Move 64bit from Arm reg
1743
_assert_msg_(false, "This VMOV doesn't support moving 64bit ARM to NEON");
1744
return;
1745
}
1746
}
1747
}
1748
else
1749
{
1750
if (Src > R15)
1751
{
1752
if (Src < D0)
1753
{
1754
// Moving to ARM Reg from Neon Register
1755
Src = (ARMReg)(Src - S0);
1756
Write32(condition | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \
1757
| (0xA << 8) | ((Src & 0x1) << 7) | (1 << 4));
1758
return;
1759
}
1760
else
1761
{
1762
// Move 64bit To Arm reg
1763
_assert_msg_(false, "This VMOV doesn't support moving 64bit ARM From NEON");
1764
return;
1765
}
1766
}
1767
else
1768
{
1769
// Move Arm reg to Arm reg
1770
_assert_msg_(false, "VMOV doesn't support moving ARM registers");
1771
}
1772
}
1773
// Moving NEON registers
1774
int SrcSize = Src < D0 ? 1 : Src < Q0 ? 2 : 4;
1775
int DestSize = Dest < D0 ? 1 : Dest < Q0 ? 2 : 4;
1776
bool Single = DestSize == 1;
1777
bool Quad = DestSize == 4;
1778
1779
_assert_msg_(SrcSize == DestSize, "VMOV doesn't support moving different register sizes");
1780
if (SrcSize != DestSize) {
1781
ERROR_LOG(Log::JIT, "SrcSize: %i (%s) DestDize: %i (%s)", SrcSize, ARMRegAsString(Src), DestSize, ARMRegAsString(Dest));
1782
}
1783
1784
Dest = SubBase(Dest);
1785
Src = SubBase(Src);
1786
1787
if (Single)
1788
{
1789
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \
1790
| (0x5 << 9) | (1 << 6) | ((Src & 0x1) << 5) | ((Src & 0x1E) >> 1));
1791
}
1792
else
1793
{
1794
// Double and quad
1795
if (Quad)
1796
{
1797
// Gets encoded as a Double register
1798
Write32((0xF2 << 24) | ((Dest & 0x10) << 18) | (2 << 20) | ((Src & 0xF) << 16) \
1799
| ((Dest & 0xF) << 12) | (1 << 8) | ((Src & 0x10) << 3) | (1 << 6) \
1800
| ((Src & 0x10) << 1) | (1 << 4) | (Src & 0xF));
1801
1802
}
1803
else
1804
{
1805
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \
1806
| (0x2D << 6) | ((Src & 0x10) << 1) | (Src & 0xF));
1807
}
1808
}
1809
}
1810
1811
void ARMXEmitter::VCVT(ARMReg Dest, ARMReg Source, int flags)
1812
{
1813
bool single_reg = (Dest < D0) && (Source < D0);
1814
bool single_double = !single_reg && (Source < D0 || Dest < D0);
1815
bool single_to_double = Source < D0;
1816
int op = ((flags & TO_INT) ? (flags & ROUND_TO_ZERO) : (flags & IS_SIGNED)) ? 1 : 0;
1817
int op2 = ((flags & TO_INT) ? (flags & IS_SIGNED) : 0) ? 1 : 0;
1818
Dest = SubBase(Dest);
1819
Source = SubBase(Source);
1820
1821
if (single_double)
1822
{
1823
// S32<->F64
1824
if (flags & TO_INT)
1825
{
1826
if (single_to_double)
1827
{
1828
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) \
1829
| ((Dest & 0xF) << 12) | (op << 7) | (0x2D << 6) | ((Source & 0x1) << 5) | (Source >> 1));
1830
} else {
1831
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
1832
| ((Dest & 0x1E) << 11) | (op << 7) | (0x2D << 6) | ((Source & 0x10) << 1) | (Source & 0xF));
1833
}
1834
}
1835
// F32<->F64
1836
else {
1837
if (single_to_double)
1838
{
1839
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | (0x7 << 16) \
1840
| ((Dest & 0xF) << 12) | (0x2F << 6) | ((Source & 0x1) << 5) | (Source >> 1));
1841
} else {
1842
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | (0x7 << 16) \
1843
| ((Dest & 0x1E) << 11) | (0x2B << 6) | ((Source & 0x10) << 1) | (Source & 0xF));
1844
}
1845
}
1846
} else if (single_reg) {
1847
Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
1848
| ((Dest & 0x1E) << 11) | (op << 7) | (0x29 << 6) | ((Source & 0x1) << 5) | (Source >> 1));
1849
} else {
1850
Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \
1851
| ((Dest & 0xF) << 12) | (1 << 8) | (op << 7) | (0x29 << 6) | ((Source & 0x10) << 1) | (Source & 0xF));
1852
}
1853
}
1854
1855
void ARMXEmitter::VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1856
{
1857
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1858
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1859
bool register_quad = Vd >= Q0;
1860
1861
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \
1862
| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x71 << 4) | (register_quad << 6) | EncodeVm(Vm));
1863
}
1864
1865
void ARMXEmitter::VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1866
{
1867
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
1868
_dbg_assert_msg_(Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__);
1869
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
1870
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1871
1872
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \
1873
| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x50 << 4) | EncodeVm(Vm));
1874
}
1875
1876
void ARMXEmitter::VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1877
{
1878
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1879
bool register_quad = Vd >= Q0;
1880
1881
if (Size & F_32)
1882
Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD << 8) | EncodeVm(Vm));
1883
else
1884
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \
1885
| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | (register_quad << 6) | EncodeVm(Vm));
1886
}
1887
1888
void ARMXEmitter::VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1889
{
1890
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
1891
_dbg_assert_msg_(Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__);
1892
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
1893
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1894
1895
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \
1896
| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | EncodeVm(Vm));
1897
}
1898
1899
void ARMXEmitter::VABS(u32 Size, ARMReg Vd, ARMReg Vm)
1900
{
1901
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1902
bool register_quad = Vd >= Q0;
1903
1904
Write32((0xF3 << 24) | (0xB1 << 16) | (encodedSize(Size) << 18) | EncodeVd(Vd) \
1905
| ((Size & F_32 ? 1 : 0) << 10) | (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm));
1906
}
1907
1908
void ARMXEmitter::VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1909
{
1910
// Only Float
1911
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1912
bool register_quad = Vd >= Q0;
1913
1914
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) \
1915
| (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
1916
}
1917
1918
void ARMXEmitter::VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1919
{
1920
// Only Float
1921
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1922
bool register_quad = Vd >= Q0;
1923
1924
Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) \
1925
| (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
1926
}
1927
1928
void ARMXEmitter::VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1929
{
1930
VACGE(Vd, Vm, Vn);
1931
}
1932
1933
void ARMXEmitter::VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1934
{
1935
VACGT(Vd, Vn, Vm);
1936
}
1937
1938
void ARMXEmitter::VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1939
{
1940
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1941
1942
bool register_quad = Vd >= Q0;
1943
1944
if (Size & F_32)
1945
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD << 8) | (register_quad << 6) | EncodeVm(Vm));
1946
else
1947
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \
1948
| (0x8 << 8) | (register_quad << 6) | EncodeVm(Vm));
1949
}
1950
1951
void ARMXEmitter::VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1952
{
1953
_dbg_assert_msg_(Vd < Q0, "Pass invalid register to %s", __FUNCTION__);
1954
_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);
1955
_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);
1956
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1957
1958
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \
1959
| EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm));
1960
}
1961
1962
void ARMXEmitter::VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1963
{
1964
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
1965
_dbg_assert_msg_(Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__);
1966
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
1967
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1968
1969
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \
1970
| EncodeVd(Vd) | EncodeVm(Vm));
1971
}
1972
void ARMXEmitter::VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
1973
{
1974
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
1975
_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);
1976
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
1977
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1978
1979
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \
1980
| EncodeVd(Vd) | (1 << 8) | EncodeVm(Vm));
1981
}
1982
void ARMXEmitter::VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1983
{
1984
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1985
_dbg_assert_msg_(!(Vd == Vn && Vn == Vm), "All operands the same for %s is a nop", __FUNCTION__);
1986
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1987
bool register_quad = Vd >= Q0;
1988
1989
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
1990
}
1991
void ARMXEmitter::VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm)
1992
{
1993
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
1994
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
1995
bool register_quad = Vd >= Q0;
1996
1997
Write32((0xF2 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
1998
}
1999
void ARMXEmitter::VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2000
{
2001
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s: %i", __FUNCTION__, Vd);
2002
bool register_quad = Vd >= Q0;
2003
2004
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2005
}
2006
void ARMXEmitter::VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2007
{
2008
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2009
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2010
bool register_quad = Vd >= Q0;
2011
2012
Write32((0xF3 << 24) | (3 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2013
}
2014
void ARMXEmitter::VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2015
{
2016
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2017
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2018
bool register_quad = Vd >= Q0;
2019
2020
Write32((0xF3 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2021
}
2022
void ARMXEmitter::VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2023
{
2024
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2025
// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2026
bool register_quad = Vd >= Q0;
2027
2028
Write32((0xF3 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2029
}
2030
void ARMXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2031
{
2032
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2033
2034
bool register_quad = Vd >= Q0;
2035
if (Size & F_32)
2036
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2037
else
2038
Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \
2039
| (0x81 << 4) | (register_quad << 6) | EncodeVm(Vm));
2040
2041
}
2042
void ARMXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vm)
2043
{
2044
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2045
2046
bool register_quad = Vd >= Q0;
2047
2048
Write32((0xF2 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2049
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm));
2050
}
2051
void ARMXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2052
{
2053
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2054
2055
bool register_quad = Vd >= Q0;
2056
if (Size & F_32)
2057
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2058
else
2059
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \
2060
| (0x31 << 4) | (register_quad << 6) | EncodeVm(Vm));
2061
}
2062
void ARMXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vm)
2063
{
2064
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2065
2066
bool register_quad = Vd >= Q0;
2067
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2068
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x8 << 4) | (register_quad << 6) | EncodeVm(Vm));
2069
}
2070
void ARMXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2071
{
2072
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2073
2074
bool register_quad = Vd >= Q0;
2075
if (Size & F_32)
2076
Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2077
else
2078
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \
2079
| (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm));
2080
}
2081
void ARMXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vm)
2082
{
2083
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2084
2085
bool register_quad = Vd >= Q0;
2086
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2087
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (register_quad << 6) | EncodeVm(Vm));
2088
}
2089
void ARMXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2090
{
2091
VCGE(Size, Vd, Vm, Vn);
2092
}
2093
void ARMXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vm)
2094
{
2095
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2096
2097
bool register_quad = Vd >= Q0;
2098
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2099
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (3 << 7) | (register_quad << 6) | EncodeVm(Vm));
2100
}
2101
void ARMXEmitter::VCLS(u32 Size, ARMReg Vd, ARMReg Vm)
2102
{
2103
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2104
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2105
2106
bool register_quad = Vd >= Q0;
2107
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \
2108
| EncodeVd(Vd) | (1 << 10) | (register_quad << 6) | EncodeVm(Vm));
2109
}
2110
void ARMXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2111
{
2112
VCGT(Size, Vd, Vm, Vn);
2113
}
2114
void ARMXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vm)
2115
{
2116
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2117
2118
bool register_quad = Vd >= Q0;
2119
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \
2120
| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x20 << 4) | (register_quad << 6) | EncodeVm(Vm));
2121
}
2122
void ARMXEmitter::VCLZ(u32 Size, ARMReg Vd, ARMReg Vm)
2123
{
2124
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2125
2126
bool register_quad = Vd >= Q0;
2127
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \
2128
| EncodeVd(Vd) | (0x48 << 4) | (register_quad << 6) | EncodeVm(Vm));
2129
}
2130
void ARMXEmitter::VCNT(u32 Size, ARMReg Vd, ARMReg Vm)
2131
{
2132
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2133
_dbg_assert_msg_(Size & I_8, "Can only use I_8 with %s", __FUNCTION__);
2134
2135
bool register_quad = Vd >= Q0;
2136
Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \
2137
| EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm));
2138
}
2139
void ARMXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index)
2140
{
2141
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2142
_dbg_assert_msg_(Vm >= D0, "Pass invalid register to %s", __FUNCTION__);
2143
2144
bool register_quad = Vd >= Q0;
2145
u32 imm4 = 0;
2146
if (Size & I_8)
2147
imm4 = (index << 1) | 1;
2148
else if (Size & I_16)
2149
imm4 = (index << 2) | 2;
2150
else if (Size & (I_32 | F_32))
2151
imm4 = (index << 3) | 4;
2152
Write32((0xF3 << 24) | (0xB << 20) | (imm4 << 16) \
2153
| EncodeVd(Vd) | (0xC << 8) | (register_quad << 6) | EncodeVm(Vm));
2154
}
2155
void ARMXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Rt)
2156
{
2157
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2158
_dbg_assert_msg_(Rt < S0, "Pass invalid register to %s", __FUNCTION__);
2159
2160
bool register_quad = Vd >= Q0;
2161
Vd = SubBase(Vd);
2162
u8 sizeEncoded = 0;
2163
if (Size & I_8)
2164
sizeEncoded = 2;
2165
else if (Size & I_16)
2166
sizeEncoded = 1;
2167
else if (Size & I_32)
2168
sizeEncoded = 0;
2169
2170
Write32((0xEE << 24) | (0x8 << 20) | ((sizeEncoded & 2) << 21) | (register_quad << 21) \
2171
| ((Vd & 0xF) << 16) | (Rt << 12) | (0xB1 << 4) | ((Vd & 0x10) << 3) | ((sizeEncoded & 1) << 5));
2172
}
2173
void ARMXEmitter::VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index)
2174
{
2175
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2176
bool register_quad = Vd >= Q0;
2177
2178
Write32((0xF2 << 24) | (0xB << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (index & 0xF) \
2179
| (register_quad << 6) | EncodeVm(Vm));
2180
}
2181
void ARMXEmitter::VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2182
{
2183
_dbg_assert_msg_(Size == F_32, "Passed invalid size to FP-only NEON instruction");
2184
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2185
_dbg_assert_msg_(cpu_info.bVFPv4, "Can't use %s when CPU doesn't support it", __FUNCTION__);
2186
bool register_quad = Vd >= Q0;
2187
2188
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2189
}
2190
void ARMXEmitter::VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2191
{
2192
_dbg_assert_msg_(Size == F_32, "Passed invalid size to FP-only NEON instruction");
2193
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2194
_dbg_assert_msg_(cpu_info.bVFPv4, "Can't use %s when CPU doesn't support it", __FUNCTION__);
2195
bool register_quad = Vd >= Q0;
2196
2197
Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2198
}
2199
void ARMXEmitter::VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2200
{
2201
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2202
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2203
2204
bool register_quad = Vd >= Q0;
2205
2206
Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \
2207
| EncodeVn(Vn) | EncodeVd(Vd) | (register_quad << 6) | EncodeVm(Vm));
2208
}
2209
void ARMXEmitter::VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2210
{
2211
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2212
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2213
2214
bool register_quad = Vd >= Q0;
2215
2216
Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \
2217
| EncodeVn(Vn) | EncodeVd(Vd) | (1 << 9) | (register_quad << 6) | EncodeVm(Vm));
2218
}
2219
void ARMXEmitter::VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2220
{
2221
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2222
2223
bool register_quad = Vd >= Q0;
2224
2225
if (Size & F_32)
2226
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2227
else
2228
Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \
2229
| EncodeVn(Vn) | EncodeVd(Vd) | (0x60 << 4) | (register_quad << 6) | EncodeVm(Vm));
2230
}
2231
void ARMXEmitter::VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2232
{
2233
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2234
2235
bool register_quad = Vd >= Q0;
2236
2237
if (Size & F_32)
2238
Write32((0xF2 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2239
else
2240
Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \
2241
| EncodeVn(Vn) | EncodeVd(Vd) | (0x61 << 4) | (register_quad << 6) | EncodeVm(Vm));
2242
}
2243
void ARMXEmitter::VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2244
{
2245
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2246
2247
bool register_quad = Vd >= Q0;
2248
2249
if (Size & F_32)
2250
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2251
else
2252
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm));
2253
}
2254
void ARMXEmitter::VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2255
{
2256
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2257
2258
bool register_quad = Vd >= Q0;
2259
2260
if (Size & F_32)
2261
Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2262
else
2263
Write32((0xF2 << 24) | (1 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm));
2264
}
2265
void ARMXEmitter::VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2266
{
2267
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2268
_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);
2269
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
2270
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2271
2272
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \
2273
| EncodeVn(Vn) | EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm));
2274
}
2275
void ARMXEmitter::VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2276
{
2277
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2278
_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);
2279
_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);
2280
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);
2281
2282
Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \
2283
| EncodeVn(Vn) | EncodeVd(Vd) | (0xA0 << 4) | EncodeVm(Vm));
2284
}
2285
void ARMXEmitter::VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2286
{
2287
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2288
2289
bool register_quad = Vd >= Q0;
2290
2291
if (Size & F_32)
2292
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2293
else
2294
Write32((0xF2 << 24) | ((Size & I_POLYNOMIAL) ? (1 << 24) : 0) | (encodedSize(Size) << 20) | \
2295
EncodeVn(Vn) | EncodeVd(Vd) | (0x91 << 4) | (register_quad << 6) | EncodeVm(Vm));
2296
}
2297
void ARMXEmitter::VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2298
{
2299
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2300
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2301
2302
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2303
(0xC0 << 4) | ((Size & I_POLYNOMIAL) ? 1 << 9 : 0) | EncodeVm(Vm));
2304
}
2305
void ARMXEmitter::VMLA_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2306
{
2307
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2308
2309
bool register_quad = Vd >= Q0;
2310
2311
// No idea if the Non-Q case here works. Not really that interested.
2312
if (Size & F_32)
2313
Write32((0xF2 << 24) | (register_quad << 24) | (1 << 23) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x14 << 4) | EncodeVm(Vm));
2314
else
2315
_dbg_assert_msg_(false, "VMLA_scalar only supports float atm");
2316
//else
2317
// Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (1 << 6) | EncodeVm(Vm));
2318
// Unsigned support missing
2319
}
2320
void ARMXEmitter::VMUL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2321
{
2322
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2323
2324
bool register_quad = Vd >= Q0;
2325
2326
int VmEnc = EncodeVm(Vm);
2327
// No idea if the Non-Q case here works. Not really that interested.
2328
if (Size & F_32) // Q flag
2329
Write32((0xF2 << 24) | (register_quad << 24) | (1 << 23) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x94 << 4) | VmEnc);
2330
else
2331
_dbg_assert_msg_(false, "VMUL_scalar only supports float atm");
2332
2333
// Write32((0xF2 << 24) | ((Size & I_POLYNOMIAL) ? (1 << 24) : 0) | (1 << 23) | (encodedSize(Size) << 20) |
2334
// EncodeVn(Vn) | EncodeVd(Vd) | (0x84 << 4) | (register_quad << 6) | EncodeVm(Vm));
2335
// Unsigned support missing
2336
}
2337
2338
void ARMXEmitter::VMVN(ARMReg Vd, ARMReg Vm)
2339
{
2340
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2341
2342
bool register_quad = Vd >= Q0;
2343
2344
Write32((0xF3B << 20) | \
2345
EncodeVd(Vd) | (0xB << 7) | (register_quad << 6) | EncodeVm(Vm));
2346
}
2347
2348
void ARMXEmitter::VNEG(u32 Size, ARMReg Vd, ARMReg Vm)
2349
{
2350
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2351
2352
bool register_quad = Vd >= Q0;
2353
2354
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) | \
2355
EncodeVd(Vd) | ((Size & F_32) ? 1 << 10 : 0) | (0xE << 6) | (register_quad << 6) | EncodeVm(Vm));
2356
}
2357
void ARMXEmitter::VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2358
{
2359
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2360
2361
bool register_quad = Vd >= Q0;
2362
2363
Write32((0xF2 << 24) | (3 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2364
}
2365
void ARMXEmitter::VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2366
{
2367
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2368
_dbg_assert_msg_(!(Vd == Vn && Vn == Vm), "All operands the same for %s is a nop", __FUNCTION__);
2369
2370
bool register_quad = Vd >= Q0;
2371
2372
Write32((0xF2 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));
2373
}
2374
void ARMXEmitter::VPADAL(u32 Size, ARMReg Vd, ARMReg Vm)
2375
{
2376
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2377
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2378
2379
bool register_quad = Vd >= Q0;
2380
2381
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \
2382
(0x60 << 4) | ((Size & I_UNSIGNED) ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm));
2383
}
2384
void ARMXEmitter::VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2385
{
2386
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2387
2388
if (Size & F_32)
2389
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD0 << 4) | EncodeVm(Vm));
2390
else
2391
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2392
(0xB1 << 4) | EncodeVm(Vm));
2393
}
2394
void ARMXEmitter::VPADDL(u32 Size, ARMReg Vd, ARMReg Vm)
2395
{
2396
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2397
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2398
2399
bool register_quad = Vd >= Q0;
2400
2401
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \
2402
(0x20 << 4) | (Size & I_UNSIGNED ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm));
2403
}
2404
void ARMXEmitter::VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2405
{
2406
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2407
2408
if (Size & F_32)
2409
Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | EncodeVm(Vm));
2410
else
2411
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2412
(0xA0 << 4) | EncodeVm(Vm));
2413
}
2414
void ARMXEmitter::VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2415
{
2416
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2417
2418
if (Size & F_32)
2419
Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | EncodeVm(Vm));
2420
else
2421
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2422
(0xA1 << 4) | EncodeVm(Vm));
2423
}
2424
void ARMXEmitter::VQABS(u32 Size, ARMReg Vd, ARMReg Vm)
2425
{
2426
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2427
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2428
2429
bool register_quad = Vd >= Q0;
2430
2431
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \
2432
(0x70 << 4) | (register_quad << 6) | EncodeVm(Vm));
2433
}
2434
void ARMXEmitter::VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2435
{
2436
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2437
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2438
2439
bool register_quad = Vd >= Q0;
2440
2441
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2442
(0x1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2443
}
2444
void ARMXEmitter::VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2445
{
2446
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2447
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2448
2449
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2450
(0x90 << 4) | EncodeVm(Vm));
2451
}
2452
void ARMXEmitter::VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2453
{
2454
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2455
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2456
2457
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2458
(0xB0 << 4) | EncodeVm(Vm));
2459
}
2460
void ARMXEmitter::VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2461
{
2462
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2463
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2464
2465
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2466
(0xB0 << 4) | EncodeVm(Vm));
2467
}
2468
void ARMXEmitter::VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2469
{
2470
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2471
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2472
2473
Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2474
(0xD0 << 4) | EncodeVm(Vm));
2475
}
2476
void ARMXEmitter::VQNEG(u32 Size, ARMReg Vd, ARMReg Vm)
2477
{
2478
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2479
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2480
2481
bool register_quad = Vd >= Q0;
2482
2483
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \
2484
(0x78 << 4) | (register_quad << 6) | EncodeVm(Vm));
2485
}
2486
void ARMXEmitter::VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2487
{
2488
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2489
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2490
2491
Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2492
(0xB0 << 4) | EncodeVm(Vm));
2493
}
2494
void ARMXEmitter::VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2495
{
2496
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2497
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2498
2499
bool register_quad = Vd >= Q0;
2500
2501
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2502
(0x51 << 4) | (register_quad << 6) | EncodeVm(Vm));
2503
}
2504
void ARMXEmitter::VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2505
{
2506
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2507
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2508
2509
bool register_quad = Vd >= Q0;
2510
2511
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2512
(0x41 << 4) | (register_quad << 6) | EncodeVm(Vm));
2513
}
2514
void ARMXEmitter::VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2515
{
2516
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2517
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2518
2519
bool register_quad = Vd >= Q0;
2520
2521
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2522
(0x21 << 4) | (register_quad << 6) | EncodeVm(Vm));
2523
}
2524
void ARMXEmitter::VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2525
{
2526
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2527
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2528
2529
Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2530
(0x40 << 4) | EncodeVm(Vm));
2531
}
2532
void ARMXEmitter::VRECPE(u32 Size, ARMReg Vd, ARMReg Vm)
2533
{
2534
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2535
2536
bool register_quad = Vd >= Q0;
2537
2538
Write32((0xF3 << 24) | (0xB << 20) | (0xB << 16) | EncodeVd(Vd) | \
2539
(0x40 << 4) | (Size & F_32 ? 1 << 8 : 0) | (register_quad << 6) | EncodeVm(Vm));
2540
}
2541
void ARMXEmitter::VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2542
{
2543
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2544
2545
bool register_quad = Vd >= Q0;
2546
2547
Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2548
}
2549
void ARMXEmitter::VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2550
{
2551
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2552
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2553
2554
bool register_quad = Vd >= Q0;
2555
2556
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2557
(0x10 << 4) | (register_quad << 6) | EncodeVm(Vm));
2558
}
2559
void ARMXEmitter::VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2560
{
2561
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2562
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2563
2564
bool register_quad = Vd >= Q0;
2565
2566
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2567
(0x50 << 4) | (register_quad << 6) | EncodeVm(Vm));
2568
}
2569
void ARMXEmitter::VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm)
2570
{
2571
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2572
2573
bool register_quad = Vd >= Q0;
2574
Vd = SubBase(Vd);
2575
Vm = SubBase(Vm);
2576
2577
Write32((0xF3 << 24) | (0xB << 20) | ((Vd & 0x10) << 18) | (0xB << 16)
2578
| ((Vd & 0xF) << 12) | (9 << 7) | (Size & F_32 ? (1 << 8) : 0) | (register_quad << 6)
2579
| ((Vm & 0x10) << 1) | (Vm & 0xF));
2580
}
2581
void ARMXEmitter::VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm)
2582
{
2583
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2584
2585
bool register_quad = Vd >= Q0;
2586
2587
Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | \
2588
(0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm));
2589
}
2590
void ARMXEmitter::VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2591
{
2592
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2593
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2594
2595
Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2596
(0x60 << 4) | EncodeVm(Vm));
2597
}
2598
void ARMXEmitter::VSHL(u32 Size, ARMReg Vd, ARMReg Vm, ARMReg Vn)
2599
{
2600
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2601
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2602
2603
bool register_quad = Vd >= Q0;
2604
2605
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2606
(0x40 << 4) | (register_quad << 6) | EncodeVm(Vm));
2607
}
2608
2609
static int EncodeSizeShift(u32 Size, int amount, bool inverse, bool halve) {
2610
int sz = 0;
2611
switch (Size & 0xF) {
2612
case I_8: sz = 8; break;
2613
case I_16: sz = 16; break;
2614
case I_32: sz = 32; break;
2615
case I_64: sz = 64; break;
2616
}
2617
if (inverse && halve) {
2618
_dbg_assert_msg_(amount <= sz / 2, "Amount %d too large for narrowing shift (max %d)", amount, sz/2);
2619
return (sz / 2) + (sz / 2) - amount;
2620
} else if (inverse) {
2621
return sz + (sz - amount);
2622
} else {
2623
return sz + amount;
2624
}
2625
}
2626
2627
void ARMXEmitter::EncodeShiftByImm(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount, u8 opcode, bool register_quad, bool inverse, bool halve) {
2628
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2629
_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);
2630
int imm7 = EncodeSizeShift(Size, shiftAmount, inverse, halve);
2631
int L = (imm7 >> 6) & 1;
2632
int U = (Size & I_UNSIGNED) ? 1 : 0;
2633
u32 value = (0xF2 << 24) | (U << 24) | (1 << 23) | ((imm7 & 0x3f) << 16) | EncodeVd(Vd) | (opcode << 8) | (L << 7) | (register_quad << 6) | (1 << 4) | EncodeVm(Vm);
2634
Write32(value);
2635
}
2636
2637
void ARMXEmitter::VSHL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {
2638
EncodeShiftByImm((Size & ~I_UNSIGNED), Vd, Vm, shiftAmount, 0x5, Vd >= Q0, false, false);
2639
}
2640
2641
void ARMXEmitter::VSHLL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {
2642
if ((u32)shiftAmount == (8 * (Size & 0xF))) {
2643
// Entirely different encoding (A2) for size == shift! Bleh.
2644
int sz = 0;
2645
switch (Size & 0xF) {
2646
case I_8: sz = 0; break;
2647
case I_16: sz = 1; break;
2648
case I_32: sz = 2; break;
2649
case I_64:
2650
_dbg_assert_msg_(false, "Cannot VSHLL 64-bit elements");
2651
}
2652
int imm6 = 0x32 | (sz << 2);
2653
u32 value = (0xF3 << 24) | (1 << 23) | (imm6 << 16) | EncodeVd(Vd) | (0x3 << 8) | EncodeVm(Vm);
2654
Write32(value);
2655
} else {
2656
EncodeShiftByImm((Size & ~I_UNSIGNED), Vd, Vm, shiftAmount, 0xA, false, false, false);
2657
}
2658
}
2659
2660
void ARMXEmitter::VSHR(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {
2661
EncodeShiftByImm(Size, Vd, Vm, shiftAmount, 0x0, Vd >= Q0, true, false);
2662
}
2663
2664
void ARMXEmitter::VSHRN(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {
2665
// Reduce Size by 1 to encode correctly.
2666
EncodeShiftByImm(Size, Vd, Vm, shiftAmount, 0x8, false, true, true);
2667
}
2668
2669
void ARMXEmitter::VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2670
{
2671
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2672
2673
bool register_quad = Vd >= Q0;
2674
2675
if (Size & F_32)
2676
Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | \
2677
(0xD0 << 4) | (register_quad << 6) | EncodeVm(Vm));
2678
else
2679
Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2680
(0x80 << 4) | (register_quad << 6) | EncodeVm(Vm));
2681
}
2682
void ARMXEmitter::VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2683
{
2684
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2685
2686
Write32((0xF2 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2687
(0x60 << 4) | EncodeVm(Vm));
2688
}
2689
void ARMXEmitter::VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2690
{
2691
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2692
2693
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2694
(0x20 << 4) | EncodeVm(Vm));
2695
}
2696
void ARMXEmitter::VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2697
{
2698
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2699
2700
Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2701
(0x30 << 4) | EncodeVm(Vm));
2702
}
2703
void ARMXEmitter::VSWP(ARMReg Vd, ARMReg Vm)
2704
{
2705
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2706
2707
bool register_quad = Vd >= Q0;
2708
2709
Write32((0xF3 << 24) | (0xB << 20) | (1 << 17) | EncodeVd(Vd) | \
2710
(register_quad << 6) | EncodeVm(Vm));
2711
}
2712
void ARMXEmitter::VTRN(u32 Size, ARMReg Vd, ARMReg Vm)
2713
{
2714
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2715
2716
bool register_quad = Vd >= Q0;
2717
2718
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \
2719
(1 << 7) | (register_quad << 6) | EncodeVm(Vm));
2720
}
2721
void ARMXEmitter::VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)
2722
{
2723
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2724
2725
bool register_quad = Vd >= Q0;
2726
2727
Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \
2728
(0x81 << 4) | (register_quad << 6) | EncodeVm(Vm));
2729
}
2730
void ARMXEmitter::VUZP(u32 Size, ARMReg Vd, ARMReg Vm)
2731
{
2732
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2733
2734
bool register_quad = Vd >= Q0;
2735
2736
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \
2737
(0x10 << 4) | (register_quad << 6) | EncodeVm(Vm));
2738
}
2739
void ARMXEmitter::VZIP(u32 Size, ARMReg Vd, ARMReg Vm)
2740
{
2741
_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);
2742
2743
bool register_quad = Vd >= Q0;
2744
2745
Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \
2746
(0x18 << 4) | (register_quad << 6) | EncodeVm(Vm));
2747
}
2748
2749
void ARMXEmitter::VMOVL(u32 Size, ARMReg Vd, ARMReg Vm)
2750
{
2751
_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);
2752
_dbg_assert_msg_(Vm >= D0 && Vm <= D31, "Pass invalid register to %s", __FUNCTION__);
2753
_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED)) != 0, "Must specify I_SIGNED or I_UNSIGNED in VMOVL");
2754
2755
bool unsign = (Size & I_UNSIGNED) != 0;
2756
int imm3 = 0;
2757
if (Size & I_8) imm3 = 1;
2758
if (Size & I_16) imm3 = 2;
2759
if (Size & I_32) imm3 = 4;
2760
2761
Write32((0xF2 << 24) | (unsign << 24) | (1 << 23) | (imm3 << 19) | EncodeVd(Vd) | \
2762
(0xA1 << 4) | EncodeVm(Vm));
2763
}
2764
2765
void ARMXEmitter::VMOVN(u32 Size, ARMReg Vd, ARMReg Vm)
2766
{
2767
_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);
2768
_dbg_assert_msg_(Vd >= D0 && Vd <= D31, "Pass invalid register to %s", __FUNCTION__);
2769
_dbg_assert_msg_((Size & I_8) == 0, "%s cannot narrow from I_8", __FUNCTION__);
2770
2771
// For consistency with assembler syntax and VMOVL - encode one size down.
2772
u32 halfSize = encodedSize(Size) - 1;
2773
2774
Write32((0xF3B << 20) | (halfSize << 18) | (1 << 17) | EncodeVd(Vd) | (1 << 9) | EncodeVm(Vm));
2775
}
2776
2777
void ARMXEmitter::VQMOVN(u32 Size, ARMReg Vd, ARMReg Vm)
2778
{
2779
_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);
2780
_dbg_assert_msg_(Vd >= D0 && Vd <= D31, "Pass invalid register to %s", __FUNCTION__);
2781
_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED)) != 0, "Must specify I_SIGNED or I_UNSIGNED in %s NEON", __FUNCTION__);
2782
_dbg_assert_msg_((Size & I_8) == 0, "%s cannot narrow from I_8", __FUNCTION__);
2783
2784
u32 halfSize = encodedSize(Size) - 1;
2785
u32 op = (1 << 7) | (Size & I_UNSIGNED ? 1 << 6 : 0);
2786
2787
Write32((0xF3B << 20) | (halfSize << 18) | (1 << 17) | EncodeVd(Vd) | (1 << 9) | op | EncodeVm(Vm));
2788
}
2789
2790
void ARMXEmitter::VQMOVUN(u32 Size, ARMReg Vd, ARMReg Vm)
2791
{
2792
_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);
2793
_dbg_assert_msg_(Vd >= D0 && Vd <= D31, "Pass invalid register to %s", __FUNCTION__);
2794
_dbg_assert_msg_((Size & I_8) == 0, "%s cannot narrow from I_8", __FUNCTION__);
2795
2796
u32 halfSize = encodedSize(Size) - 1;
2797
u32 op = (1 << 6);
2798
2799
Write32((0xF3B << 20) | (halfSize << 18) | (1 << 17) | EncodeVd(Vd) | (1 << 9) | op | EncodeVm(Vm));
2800
}
2801
2802
void ARMXEmitter::VCVT(u32 Size, ARMReg Vd, ARMReg Vm)
2803
{
2804
_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED)) != 0, "Must specify I_SIGNED or I_UNSIGNED in VCVT NEON");
2805
2806
bool register_quad = Vd >= Q0;
2807
bool toInteger = (Size & I_32) != 0;
2808
bool isUnsigned = (Size & I_UNSIGNED) != 0;
2809
int op = (toInteger << 1) | (int)isUnsigned;
2810
2811
Write32((0xF3 << 24) | (0xBB << 16) | EncodeVd(Vd) | (0x3 << 9) | (op << 7) | (register_quad << 6) | EncodeVm(Vm));
2812
}
2813
2814
static int RegCountToType(int nRegs, NEONAlignment align) {
2815
switch (nRegs) {
2816
case 1:
2817
_dbg_assert_msg_(!((int)align & 1), "align & 1 must be == 0");
2818
return 7;
2819
case 2:
2820
_dbg_assert_msg_(!((int)align == 3), "align must be != 3");
2821
return 10;
2822
case 3:
2823
_dbg_assert_msg_(!((int)align & 1), "align & 1 must be == 0");
2824
return 6;
2825
case 4:
2826
return 2;
2827
default:
2828
_dbg_assert_msg_(false, "Invalid number of registers passed to vector load/store");
2829
return 0;
2830
}
2831
}
2832
2833
void ARMXEmitter::WriteVLDST1(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm)
2834
{
2835
u32 spacing = RegCountToType(regCount, align); // Only support loading to 1 reg
2836
// Gets encoded as a double register
2837
Vd = SubBase(Vd);
2838
2839
Write32((0xF4 << 24) | ((Vd & 0x10) << 18) | (load << 21) | (Rn << 16)
2840
| ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6)
2841
| (align << 4) | Rm);
2842
}
2843
2844
void ARMXEmitter::VLD1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm) {
2845
WriteVLDST1(true, Size, Vd, Rn, regCount, align, Rm);
2846
}
2847
2848
void ARMXEmitter::VST1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm) {
2849
WriteVLDST1(false, Size, Vd, Rn, regCount, align, Rm);
2850
}
2851
2852
void ARMXEmitter::WriteVLDST1_lane(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm)
2853
{
2854
bool register_quad = Vd >= Q0;
2855
2856
Vd = SubBase(Vd);
2857
// Support quad lanes by converting to D lanes
2858
if (register_quad && lane > 1) {
2859
Vd = (ARMReg)((int)Vd + 1);
2860
lane -= 2;
2861
}
2862
int encSize = encodedSize(Size);
2863
int index_align = 0;
2864
switch (encSize) {
2865
case 0: index_align = lane << 1; break;
2866
case 1: index_align = lane << 2; if (aligned) index_align |= 1; break;
2867
case 2: index_align = lane << 3; if (aligned) index_align |= 3; break;
2868
default:
2869
break;
2870
}
2871
2872
Write32((0xF4 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (load << 21) | (Rn << 16)
2873
| ((Vd & 0xF) << 12) | (encSize << 10)
2874
| (index_align << 4) | Rm);
2875
}
2876
2877
void ARMXEmitter::VLD1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm) {
2878
WriteVLDST1_lane(true, Size, Vd, Rn, lane, aligned, Rm);
2879
}
2880
2881
void ARMXEmitter::VST1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm) {
2882
WriteVLDST1_lane(false, Size, Vd, Rn, lane, aligned, Rm);
2883
}
2884
2885
void ARMXEmitter::VLD1_all_lanes(u32 Size, ARMReg Vd, ARMReg Rn, bool aligned, ARMReg Rm) {
2886
bool register_quad = Vd >= Q0;
2887
2888
Vd = SubBase(Vd);
2889
2890
int T = register_quad; // two D registers
2891
2892
Write32((0xF4 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (1 << 21) | (Rn << 16)
2893
| ((Vd & 0xF) << 12) | (0xC << 8) | (encodedSize(Size) << 6)
2894
| (T << 5) | (aligned << 4) | Rm);
2895
}
2896
2897
/*
2898
void ARMXEmitter::VLD2(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm)
2899
{
2900
u32 spacing = 0x8; // Single spaced registers
2901
// Gets encoded as a double register
2902
Vd = SubBase(Vd);
2903
2904
Write32((0xF4 << 24) | ((Vd & 0x10) << 18) | (1 << 21) | (Rn << 16)
2905
| ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6)
2906
| (align << 4) | Rm);
2907
}
2908
*/
2909
2910
void ARMXEmitter::WriteVimm(ARMReg Vd, int cmode, u8 imm, int op) {
2911
bool register_quad = Vd >= Q0;
2912
2913
Write32((0xF28 << 20) | ((imm >> 7) << 24) | (((imm >> 4) & 0x7) << 16) | (imm & 0xF) |
2914
EncodeVd(Vd) | (register_quad << 6) | (op << 5) | (1 << 4) | ((cmode & 0xF) << 8));
2915
}
2916
2917
void ARMXEmitter::VMOV_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {
2918
// Only let through the modes that apply.
2919
switch (type) {
2920
case VIMM___x___x:
2921
case VIMM__x___x_:
2922
case VIMM_x___x__:
2923
case VIMMx___x___:
2924
if (Size != I_32)
2925
goto error;
2926
WriteVimm(Vd, (int)type, imm, 0);
2927
break;
2928
case VIMM_x_x_x_x:
2929
case VIMMx_x_x_x_:
2930
if (Size != I_16)
2931
goto error;
2932
WriteVimm(Vd, (int)type, imm, 0);
2933
break;
2934
case VIMMxxxxxxxx: // replicate the byte
2935
if (Size != I_8)
2936
goto error;
2937
WriteVimm(Vd, (int)type, imm, 0);
2938
break;
2939
case VIMMbits2bytes:
2940
if (Size != I_64)
2941
goto error;
2942
WriteVimm(Vd, (int)type, imm, 1);
2943
break;
2944
default:
2945
goto error;
2946
}
2947
return;
2948
2949
error:
2950
_dbg_assert_msg_(false, "Bad Size or type specified in %s: Size %i Type %i", __FUNCTION__, (int)Size, type);
2951
}
2952
2953
void ARMXEmitter::VMOV_immf(ARMReg Vd, float value) { // This only works with a select few values. I've hardcoded 1.0f.
2954
u8 bits = 0;
2955
2956
if (value == 0.0f) {
2957
VEOR(Vd, Vd, Vd);
2958
return;
2959
}
2960
2961
// TODO: Do something more sophisticated here.
2962
if (value == 1.5f) {
2963
bits = 0x78;
2964
} else if (value == 1.0f) {
2965
bits = 0x70;
2966
} else if (value == -1.0f) {
2967
bits = 0xF0;
2968
} else {
2969
_dbg_assert_msg_(false, "%s: Invalid floating point immediate", __FUNCTION__);
2970
}
2971
WriteVimm(Vd, VIMMf000f000, bits, 0);
2972
}
2973
2974
void ARMXEmitter::VORR_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {
2975
// Only let through the modes that apply.
2976
switch (type) {
2977
case VIMM___x___x:
2978
case VIMM__x___x_:
2979
case VIMM_x___x__:
2980
case VIMMx___x___:
2981
if (Size != I_32)
2982
goto error;
2983
WriteVimm(Vd, (int)type | 1, imm, 0);
2984
break;
2985
case VIMM_x_x_x_x:
2986
case VIMMx_x_x_x_:
2987
if (Size != I_16)
2988
goto error;
2989
WriteVimm(Vd, (int)type | 1, imm, 0);
2990
break;
2991
default:
2992
goto error;
2993
}
2994
return;
2995
error:
2996
_dbg_assert_msg_(false, "Bad Size or type specified in VORR_imm");
2997
}
2998
2999
void ARMXEmitter::VBIC_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {
3000
// Only let through the modes that apply.
3001
switch (type) {
3002
case VIMM___x___x:
3003
case VIMM__x___x_:
3004
case VIMM_x___x__:
3005
case VIMMx___x___:
3006
if (Size != I_32)
3007
goto error;
3008
WriteVimm(Vd, (int)type | 1, imm, 1);
3009
break;
3010
case VIMM_x_x_x_x:
3011
case VIMMx_x_x_x_:
3012
if (Size != I_16)
3013
goto error;
3014
WriteVimm(Vd, (int)type | 1, imm, 1);
3015
break;
3016
default:
3017
goto error;
3018
}
3019
return;
3020
error:
3021
_dbg_assert_msg_(false, "Bad Size or type specified in VBIC_imm");
3022
}
3023
3024
3025
void ARMXEmitter::VMVN_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {
3026
// Only let through the modes that apply.
3027
switch (type) {
3028
case VIMM___x___x:
3029
case VIMM__x___x_:
3030
case VIMM_x___x__:
3031
case VIMMx___x___:
3032
if (Size != I_32)
3033
goto error;
3034
WriteVimm(Vd, (int)type, imm, 1);
3035
break;
3036
case VIMM_x_x_x_x:
3037
case VIMMx_x_x_x_:
3038
if (Size != I_16)
3039
goto error;
3040
WriteVimm(Vd, (int)type, imm, 1);
3041
break;
3042
default:
3043
goto error;
3044
}
3045
return;
3046
error:
3047
_dbg_assert_msg_(false, "Bad Size or type specified in VMVN_imm");
3048
}
3049
3050
3051
void ARMXEmitter::VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm)
3052
{
3053
bool register_quad = Vd >= Q0;
3054
Vd = SubBase(Vd);
3055
Vm = SubBase(Vm);
3056
3057
Write32((0xF3 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (0x3 << 20)
3058
| (encodedSize(Size) << 18) | ((Vd & 0xF) << 12) | (size << 7)
3059
| (register_quad << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF));
3060
}
3061
3062
void ARMXEmitter::VREV64(u32 Size, ARMReg Vd, ARMReg Vm)
3063
{
3064
VREVX(0, Size, Vd, Vm);
3065
}
3066
3067
void ARMXEmitter::VREV32(u32 Size, ARMReg Vd, ARMReg Vm)
3068
{
3069
VREVX(1, Size, Vd, Vm);
3070
}
3071
3072
void ARMXEmitter::VREV16(u32 Size, ARMReg Vd, ARMReg Vm)
3073
{
3074
VREVX(2, Size, Vd, Vm);
3075
}
3076
3077
// See page A8-878 in ARMv7-A Architecture Reference Manual
3078
3079
// Dest is a Q register, Src is a D register.
3080
void ARMXEmitter::VCVTF32F16(ARMReg Dest, ARMReg Src) {
3081
_assert_msg_(cpu_info.bVFPv4, "Can't use half-float conversions when you don't support VFPv4");
3082
if (Dest < Q0 || Dest > Q15 || Src < D0 || Src > D15) {
3083
// Invalid!
3084
}
3085
3086
Dest = SubBase(Dest);
3087
Src = SubBase(Src);
3088
3089
int op = 1;
3090
Write32((0xF3B6 << 16) | ((Dest & 0x10) << 18) | ((Dest & 0xF) << 12) | 0x600 | (op << 8) | ((Src & 0x10) << 1) | (Src & 0xF));
3091
}
3092
3093
// UNTESTED
3094
// Dest is a D register, Src is a Q register.
3095
void ARMXEmitter::VCVTF16F32(ARMReg Dest, ARMReg Src) {
3096
_assert_msg_(cpu_info.bVFPv4, "Can't use half-float conversions when you don't support VFPv4");
3097
if (Dest < D0 || Dest > D15 || Src < Q0 || Src > Q15) {
3098
// Invalid!
3099
}
3100
Dest = SubBase(Dest);
3101
Src = SubBase(Src);
3102
int op = 0;
3103
Write32((0xF3B6 << 16) | ((Dest & 0x10) << 18) | ((Dest & 0xF) << 12) | 0x600 | (op << 8) | ((Src & 0x10) << 1) | (Src & 0xF));
3104
}
3105
3106
// Always clear code space with breakpoints, so that if someone accidentally executes
3107
// uninitialized, it just breaks into the debugger.
3108
void ARMXCodeBlock::PoisonMemory(int offset) {
3109
// TODO: this isn't right for ARM!
3110
memset(region + offset, 0xCC, region_size - offset);
3111
ResetCodePtr(offset);
3112
}
3113
3114
}
3115
3116