CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/ArmEmitter.h
Views: 1401
1
// Copyright (C) 2003 Dolphin Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official SVN repository and contact information can be found at
16
// http://code.google.com/p/dolphin-emu/
17
18
#pragma once
19
20
#include <vector>
21
#include <cstdint>
22
23
#include "Common/CommonTypes.h"
24
#include "Common/Log.h"
25
#include "Common/ArmCommon.h"
26
#include "Common/CodeBlock.h"
27
28
// VCVT flags
29
#define TO_FLOAT 0
30
#define TO_INT 1 << 0
31
#define IS_SIGNED 1 << 1
32
#define ROUND_TO_ZERO 1 << 2
33
34
namespace ArmGen
35
{
36
enum ARMReg
37
{
38
// GPRs
39
R0 = 0, R1, R2, R3, R4, R5,
40
R6, R7, R8, R9, R10, R11,
41
42
// SPRs
43
// R13 - R15 are SP, LR, and PC.
44
// Almost always referred to by name instead of register number
45
R12 = 12, R13 = 13, R14 = 14, R15 = 15,
46
R_IP = 12, R_SP = 13, R_LR = 14, R_PC = 15,
47
48
49
// VFP single precision registers
50
S0, S1, S2, S3, S4, S5, S6,
51
S7, S8, S9, S10, S11, S12, S13,
52
S14, S15, S16, S17, S18, S19, S20,
53
S21, S22, S23, S24, S25, S26, S27,
54
S28, S29, S30, S31,
55
56
// VFP Double Precision registers
57
D0, D1, D2, D3, D4, D5, D6, D7,
58
D8, D9, D10, D11, D12, D13, D14, D15,
59
D16, D17, D18, D19, D20, D21, D22, D23,
60
D24, D25, D26, D27, D28, D29, D30, D31,
61
62
// ASIMD Quad-Word registers
63
Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
64
Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
65
66
// for NEON VLD/VST instructions
67
REG_UPDATE = R13,
68
INVALID_REG = 0xFFFFFFFF
69
};
70
71
enum ShiftType
72
{
73
ST_LSL = 0,
74
ST_ASL = 0,
75
ST_LSR = 1,
76
ST_ASR = 2,
77
ST_ROR = 3,
78
ST_RRX = 4
79
};
80
enum IntegerSize
81
{
82
I_I8 = 0,
83
I_I16,
84
I_I32,
85
I_I64
86
};
87
88
enum
89
{
90
NUMGPRs = 13,
91
};
92
93
class ARMXEmitter;
94
95
enum OpType
96
{
97
TYPE_IMM = 0,
98
TYPE_REG,
99
TYPE_IMMSREG,
100
TYPE_RSR,
101
TYPE_MEM
102
};
103
104
// This is no longer a proper operand2 class. Need to split up.
105
class Operand2
106
{
107
friend class ARMXEmitter;
108
protected:
109
u32 Value;
110
111
private:
112
OpType Type;
113
114
// IMM types
115
u8 Rotation = 0; // Only for u8 values
116
117
// Register types
118
u8 IndexOrShift = 0;
119
ShiftType Shift = ST_LSL;
120
public:
121
OpType GetType() const {
122
return Type;
123
}
124
Operand2() {
125
Type = TYPE_IMM;
126
Value = 0;
127
}
128
Operand2(u32 imm, OpType type = TYPE_IMM) {
129
Type = type;
130
Value = imm;
131
}
132
133
Operand2(ARMReg Reg) {
134
Type = TYPE_REG;
135
Value = Reg;
136
}
137
Operand2(u8 imm, u8 rotation) {
138
Type = TYPE_IMM;
139
Value = imm;
140
Rotation = rotation;
141
}
142
Operand2(ARMReg base, ShiftType type, ARMReg shift) // RSR
143
{
144
Type = TYPE_RSR;
145
_assert_msg_(type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount");
146
IndexOrShift = shift;
147
Shift = type;
148
Value = base;
149
}
150
151
Operand2(ARMReg base, ShiftType type, u8 shift)// For IMM shifted register
152
{
153
if(shift == 32) shift = 0;
154
switch (type)
155
{
156
case ST_LSL:
157
_assert_msg_(shift < 32, "Invalid Operand2: LSL %u", shift);
158
break;
159
case ST_LSR:
160
_assert_msg_(shift <= 32, "Invalid Operand2: LSR %u", shift);
161
if (!shift)
162
type = ST_LSL;
163
if (shift == 32)
164
shift = 0;
165
break;
166
case ST_ASR:
167
_assert_msg_(shift < 32, "Invalid Operand2: ASR %u", shift);
168
if (!shift)
169
type = ST_LSL;
170
if (shift == 32)
171
shift = 0;
172
break;
173
case ST_ROR:
174
_assert_msg_(shift < 32, "Invalid Operand2: ROR %u", shift);
175
if (!shift)
176
type = ST_LSL;
177
break;
178
case ST_RRX:
179
_assert_msg_(shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount");
180
type = ST_ROR;
181
break;
182
}
183
IndexOrShift = shift;
184
Shift = type;
185
Value = base;
186
Type = TYPE_IMMSREG;
187
}
188
u32 GetData()
189
{
190
switch(Type)
191
{
192
case TYPE_IMM:
193
return Imm12Mod(); // This'll need to be changed later
194
case TYPE_REG:
195
return Rm();
196
case TYPE_IMMSREG:
197
return IMMSR();
198
case TYPE_RSR:
199
return RSR();
200
default:
201
_assert_msg_(false, "GetData with Invalid Type");
202
return 0;
203
}
204
}
205
u32 IMMSR() // IMM shifted register
206
{
207
_assert_msg_(Type == TYPE_IMMSREG, "IMMSR must be imm shifted register");
208
return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value);
209
}
210
u32 RSR() // Register shifted register
211
{
212
_assert_msg_(Type == TYPE_RSR, "RSR must be RSR Of Course");
213
return (IndexOrShift << 8) | (Shift << 5) | 0x10 | Value;
214
}
215
u32 Rm() const
216
{
217
_assert_msg_(Type == TYPE_REG, "Rm must be with Reg");
218
return Value;
219
}
220
221
u32 Imm5() const
222
{
223
_assert_msg_((Type == TYPE_IMM), "Imm5 not IMM value");
224
return ((Value & 0x0000001F) << 7);
225
}
226
u32 Imm8() const
227
{
228
_assert_msg_((Type == TYPE_IMM), "Imm8Rot not IMM value");
229
return Value & 0xFF;
230
}
231
u32 Imm8Rot() const // IMM8 with Rotation
232
{
233
_assert_msg_((Type == TYPE_IMM), "Imm8Rot not IMM value");
234
_assert_msg_((Rotation & 0xE1) != 0, "Invalid Operand2: immediate rotation %u", Rotation);
235
return (1 << 25) | (Rotation << 7) | (Value & 0x000000FF);
236
}
237
u32 Imm12() const
238
{
239
_assert_msg_((Type == TYPE_IMM), "Imm12 not IMM");
240
return (Value & 0x00000FFF);
241
}
242
243
u32 Imm12Mod() const
244
{
245
// This is an IMM12 with the top four bits being rotation and the
246
// bottom eight being an IMM. This is for instructions that need to
247
// expand a 8bit IMM to a 32bit value and gives you some rotation as
248
// well.
249
// Each rotation rotates to the right by 2 bits
250
_assert_msg_((Type == TYPE_IMM), "Imm12Mod not IMM");
251
return ((Rotation & 0xF) << 8) | (Value & 0xFF);
252
}
253
u32 Imm16() const
254
{
255
_assert_msg_((Type == TYPE_IMM), "Imm16 not IMM");
256
return ( (Value & 0xF000) << 4) | (Value & 0x0FFF);
257
}
258
u32 Imm16Low() const
259
{
260
return Imm16();
261
}
262
u32 Imm16High() const // Returns high 16bits
263
{
264
_assert_msg_((Type == TYPE_IMM), "Imm16 not IMM");
265
return ( ((Value >> 16) & 0xF000) << 4) | ((Value >> 16) & 0x0FFF);
266
}
267
u32 Imm24() const
268
{
269
_assert_msg_((Type == TYPE_IMM), "Imm16 not IMM");
270
return (Value & 0x0FFFFFFF);
271
}
272
// NEON and ASIMD specific
273
u32 Imm8ASIMD() const
274
{
275
_assert_msg_((Type == TYPE_IMM), "Imm8ASIMD not IMM");
276
return ((Value & 0x80) << 17) | ((Value & 0x70) << 12) | (Value & 0xF);
277
}
278
u32 Imm8VFP() const
279
{
280
_assert_msg_((Type == TYPE_IMM), "Imm8VFP not IMM");
281
return ((Value & 0xF0) << 12) | (Value & 0xF);
282
}
283
};
284
285
// Use these when you don't know if an imm can be represented as an operand2.
286
// This lets you generate both an optimal and a fallback solution by checking
287
// the return value, which will be false if these fail to find a Operand2 that
288
// represents your 32-bit imm value.
289
bool TryMakeOperand2(u32 imm, Operand2 &op2);
290
bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse);
291
bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated);
292
293
// Use this only when you know imm can be made into an Operand2.
294
Operand2 AssumeMakeOperand2(u32 imm);
295
296
inline Operand2 R(ARMReg Reg) { return Operand2(Reg, TYPE_REG); }
297
inline Operand2 IMM(u32 Imm) { return Operand2(Imm, TYPE_IMM); }
298
inline Operand2 Mem(void *ptr) { return Operand2((u32)(uintptr_t)ptr, TYPE_IMM); }
299
//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
300
#define STRUCT_OFF(str,elem) ((u32)((u32)&(str).elem-(u32)&(str)))
301
302
303
struct FixupBranch
304
{
305
u8 *ptr;
306
u32 condition; // Remembers our codition at the time
307
int type; //0 = B 1 = BL
308
};
309
310
struct LiteralPool
311
{
312
intptr_t loc;
313
u8* ldr_address;
314
u32 val;
315
};
316
317
typedef const u8* JumpTarget;
318
319
// XXX: Stop polluting the global namespace
320
const u32 I_8 = (1 << 0);
321
const u32 I_16 = (1 << 1);
322
const u32 I_32 = (1 << 2);
323
const u32 I_64 = (1 << 3);
324
const u32 I_SIGNED = (1 << 4);
325
const u32 I_UNSIGNED = (1 << 5);
326
const u32 F_32 = (1 << 6);
327
const u32 I_POLYNOMIAL = (1 << 7); // Only used in VMUL/VMULL
328
329
enum VIMMMode {
330
VIMM___x___x = 0x0, // 0000 VMOV
331
VIMM__x___x_ = 0x2, // 0010
332
VIMM_x___x__ = 0x4, // 0100
333
VIMMx___x___ = 0x6, // 0110
334
VIMM_x_x_x_x = 0x8, // 1000
335
VIMMx_x_x_x_ = 0xA, // 1010
336
VIMM__x1__x1 = 0xC, // 1100
337
VIMM_x11_x11 = 0xD, // 1101
338
VIMMxxxxxxxx = 0xE, // 1110 // op == 0
339
VIMMf000f000 = 0xF, // 1111 // op == 0 ( really aBbbbbbc defgh 00000000 00000000 ) where B = NOT b
340
VIMMbits2bytes = 0x1E, // Bit replication into bytes! Easily created 111111111 00000000 masks!
341
};
342
343
u32 EncodeVd(ARMReg Vd);
344
u32 EncodeVn(ARMReg Vn);
345
u32 EncodeVm(ARMReg Vm);
346
347
u32 encodedSize(u32 value);
348
349
// Subtracts the base from the register to give us the real one
350
ARMReg SubBase(ARMReg Reg);
351
352
inline bool IsQ(ARMReg r) {
353
return r >= Q0 && r <= Q15;
354
}
355
356
inline bool IsD(ARMReg r) {
357
return r >= D0 && r <= D31;
358
}
359
360
// See A.7.1 in the ARMv7-A
361
// VMUL F32 scalars can only be up to D15[0], D15[1] - higher scalars cannot be individually addressed
362
ARMReg DScalar(ARMReg dreg, int subScalar);
363
ARMReg QScalar(ARMReg qreg, int subScalar);
364
inline ARMReg XScalar(ARMReg reg, int subScalar) {
365
if (IsQ(reg))
366
return QScalar(reg, subScalar);
367
else
368
return DScalar(reg, subScalar);
369
}
370
371
const char *ARMRegAsString(ARMReg reg);
372
373
// Get the two halves of a Q register.
374
inline ARMReg D_0(ARMReg q) {
375
if (q >= Q0 && q <= Q15) {
376
return ARMReg(D0 + (q - Q0) * 2);
377
} else if (q >= D0 && q <= D31) {
378
return q;
379
} else {
380
return INVALID_REG;
381
}
382
}
383
inline ARMReg D_1(ARMReg q) {
384
return ARMReg(D0 + (q - Q0) * 2 + 1);
385
}
386
387
enum NEONAlignment {
388
ALIGN_NONE = 0,
389
ALIGN_64 = 1,
390
ALIGN_128 = 2,
391
ALIGN_256 = 3
392
};
393
394
395
class NEONXEmitter;
396
397
class ARMXEmitter
398
{
399
friend struct OpArg; // for Write8 etc
400
friend class NEONXEmitter;
401
private:
402
u8 *code, *startcode;
403
u8 *lastCacheFlushEnd;
404
u32 condition;
405
std::vector<LiteralPool> currentLitPool;
406
407
void WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 op2, bool RegAdd);
408
void WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList);
409
void WriteVRegStoreOp(u32 op, ARMReg dest, bool Double, bool WriteBack, ARMReg firstreg, u8 numregs);
410
void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2);
411
void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2);
412
void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2);
413
414
void WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm);
415
416
void Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
417
418
// New Ops
419
void WriteInstruction(u32 op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags = false);
420
421
void WriteVLDST1(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm);
422
void WriteVLDST1_lane(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm);
423
424
void WriteVimm(ARMReg Vd, int cmode, u8 imm, int op);
425
426
void EncodeShiftByImm(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount, u8 opcode, bool quad, bool inverse, bool halve);
427
428
protected:
429
inline void Write32(u32 value) {*(u32*)code = value; code+=4;}
430
431
public:
432
ARMXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {
433
condition = CC_AL << 28;
434
}
435
ARMXEmitter(u8 *code_ptr) {
436
code = code_ptr;
437
lastCacheFlushEnd = code_ptr;
438
startcode = code_ptr;
439
condition = CC_AL << 28;
440
}
441
virtual ~ARMXEmitter() {}
442
443
void SetCodePointer(u8 *ptr, u8 *writePtr);
444
const u8 *GetCodePointer() const;
445
446
void ReserveCodeSpace(u32 bytes);
447
const u8 *AlignCode16();
448
const u8 *AlignCodePage();
449
const u8 *NopAlignCode16();
450
451
void FlushIcache();
452
void FlushIcacheSection(u8 *start, u8 *end);
453
u8 *GetWritableCodePtr();
454
455
void FlushLitPool();
456
void AddNewLit(u32 val);
457
bool TrySetValue_TwoOp(ARMReg reg, u32 val);
458
459
CCFlags GetCC() const { return CCFlags(condition >> 28); }
460
void SetCC(CCFlags cond = CC_AL);
461
462
// Special purpose instructions
463
464
// Dynamic Endian Switching
465
void SETEND(bool BE);
466
// Debug Breakpoint
467
void BKPT(u16 arg);
468
469
// Hint instruction
470
void YIELD();
471
472
// Do nothing
473
void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
474
475
#ifdef CALL
476
#undef CALL
477
#endif
478
479
// Branching
480
FixupBranch B();
481
FixupBranch B_CC(CCFlags Cond);
482
void B_CC(CCFlags Cond, const void *fnptr);
483
FixupBranch BL();
484
FixupBranch BL_CC(CCFlags Cond);
485
void SetJumpTarget(FixupBranch const &branch);
486
487
void B (const void *fnptr);
488
void B (ARMReg src);
489
void BL(const void *fnptr);
490
void BL(ARMReg src);
491
bool BLInRange(const void *fnptr) const;
492
493
void PUSH(const int num, ...);
494
void POP(const int num, ...);
495
496
// New Data Ops
497
void AND (ARMReg Rd, ARMReg Rn, Operand2 Rm);
498
void ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm);
499
void EOR (ARMReg dest, ARMReg src, Operand2 op2);
500
void EORS(ARMReg dest, ARMReg src, Operand2 op2);
501
void SUB (ARMReg dest, ARMReg src, Operand2 op2);
502
void SUBS(ARMReg dest, ARMReg src, Operand2 op2);
503
void RSB (ARMReg dest, ARMReg src, Operand2 op2);
504
void RSBS(ARMReg dest, ARMReg src, Operand2 op2);
505
void ADD (ARMReg dest, ARMReg src, Operand2 op2);
506
void ADDS(ARMReg dest, ARMReg src, Operand2 op2);
507
void ADC (ARMReg dest, ARMReg src, Operand2 op2);
508
void ADCS(ARMReg dest, ARMReg src, Operand2 op2);
509
void LSL (ARMReg dest, ARMReg src, Operand2 op2);
510
void LSL (ARMReg dest, ARMReg src, ARMReg op2);
511
void LSLS(ARMReg dest, ARMReg src, Operand2 op2);
512
void LSLS(ARMReg dest, ARMReg src, ARMReg op2);
513
void LSR (ARMReg dest, ARMReg src, Operand2 op2);
514
void LSRS(ARMReg dest, ARMReg src, Operand2 op2);
515
void LSR (ARMReg dest, ARMReg src, ARMReg op2);
516
void LSRS(ARMReg dest, ARMReg src, ARMReg op2);
517
void ASR (ARMReg dest, ARMReg src, Operand2 op2);
518
void ASRS(ARMReg dest, ARMReg src, Operand2 op2);
519
void ASR (ARMReg dest, ARMReg src, ARMReg op2);
520
void ASRS(ARMReg dest, ARMReg src, ARMReg op2);
521
522
void SBC (ARMReg dest, ARMReg src, Operand2 op2);
523
void SBCS(ARMReg dest, ARMReg src, Operand2 op2);
524
void RBIT(ARMReg dest, ARMReg src);
525
void REV (ARMReg dest, ARMReg src);
526
void REV16 (ARMReg dest, ARMReg src);
527
void RSC (ARMReg dest, ARMReg src, Operand2 op2);
528
void RSCS(ARMReg dest, ARMReg src, Operand2 op2);
529
void TST ( ARMReg src, Operand2 op2);
530
void TEQ ( ARMReg src, Operand2 op2);
531
void CMP ( ARMReg src, Operand2 op2);
532
void CMN ( ARMReg src, Operand2 op2);
533
void ORR (ARMReg dest, ARMReg src, Operand2 op2);
534
void ORRS(ARMReg dest, ARMReg src, Operand2 op2);
535
void MOV (ARMReg dest, Operand2 op2);
536
void MOVS(ARMReg dest, Operand2 op2);
537
void BIC (ARMReg dest, ARMReg src, Operand2 op2); // BIC = ANDN
538
void BICS(ARMReg dest, ARMReg src, Operand2 op2);
539
void MVN (ARMReg dest, Operand2 op2);
540
void MVNS(ARMReg dest, Operand2 op2);
541
void MOVW(ARMReg dest, Operand2 op2);
542
void MOVT(ARMReg dest, Operand2 op2, bool TopBits = false);
543
544
// UDIV and SDIV are only available on CPUs that have
545
// the idiva hardare capacity
546
void UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
547
void SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
548
549
void MUL (ARMReg dest, ARMReg src, ARMReg op2);
550
void MULS(ARMReg dest, ARMReg src, ARMReg op2);
551
552
void UMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
553
void SMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
554
555
void UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
556
void SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
557
558
void SXTB(ARMReg dest, ARMReg op2);
559
void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0);
560
void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0);
561
void BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width);
562
void BFC(ARMReg rd, u8 lsb, u8 width);
563
void UBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
564
void SBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
565
void CLZ(ARMReg rd, ARMReg rm);
566
void PLD(ARMReg rd, int offset, bool forWrite = false);
567
568
// Using just MSR here messes with our defines on the PPC side of stuff (when this code was in dolphin...)
569
// Just need to put an underscore here, bit annoying.
570
void _MSR (bool nzcvq, bool g, Operand2 op2);
571
void _MSR (bool nzcvq, bool g, ARMReg src);
572
void MRS (ARMReg dest);
573
574
// Memory load/store operations
575
void LDR (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
576
void LDRB (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
577
void LDRH (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
578
void LDRSB(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
579
void LDRSH(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
580
void STR (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
581
void STRB (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
582
void STRH (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
583
584
void STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
585
void LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
586
void STMIA(ARMReg dest, bool WriteBack, const int Regnum, ...);
587
void LDMIA(ARMReg dest, bool WriteBack, const int Regnum, ...);
588
void STM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...);
589
void LDM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...);
590
void STMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList);
591
void LDMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList);
592
593
// Exclusive Access operations
594
void LDREX(ARMReg dest, ARMReg base);
595
// result contains the result if the instruction managed to store the value
596
void STREX(ARMReg result, ARMReg base, ARMReg op);
597
void DMB ();
598
void SVC(Operand2 op);
599
600
// NEON and ASIMD instructions
601
// None of these will be created with conditional since ARM
602
// is deprecating conditional execution of ASIMD instructions.
603
// ASIMD instructions don't even have a conditional encoding.
604
605
// NEON Only
606
void VABD(IntegerSize size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
607
void VADD(IntegerSize size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
608
void VSUB(IntegerSize size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
609
610
// VFP Only
611
void VLDMIA(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
612
void VSTMIA(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
613
void VLDMDB(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
614
void VSTMDB(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
615
void VPUSH(ARMReg firstvreg, int numvregs) {
616
VSTMDB(R_SP, true, firstvreg, numvregs);
617
}
618
void VPOP(ARMReg firstvreg, int numvregs) {
619
VLDMIA(R_SP, true, firstvreg, numvregs);
620
}
621
void VLDR(ARMReg Dest, ARMReg Base, s16 offset);
622
void VSTR(ARMReg Src, ARMReg Base, s16 offset);
623
void VCMP(ARMReg Vd, ARMReg Vm);
624
void VCMPE(ARMReg Vd, ARMReg Vm);
625
// Compares against zero
626
void VCMP(ARMReg Vd);
627
void VCMPE(ARMReg Vd);
628
629
void VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
630
void VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
631
void VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
632
void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm);
633
void VSQRT(ARMReg Vd, ARMReg Vm);
634
635
// NEON and VFP
636
void VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm);
637
void VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm);
638
void VABS(ARMReg Vd, ARMReg Vm);
639
void VNEG(ARMReg Vd, ARMReg Vm);
640
void VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
641
void VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
642
void VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
643
void VMOV(ARMReg Dest, Operand2 op2);
644
void VMOV(ARMReg Dest, ARMReg Src, bool high);
645
void VMOV(ARMReg Dest, ARMReg Src);
646
// Either Vd, Rt, Rt2 or Rt, Rt2, Vd.
647
void VMOV(ARMReg Dest, ARMReg Src1, ARMReg Src2);
648
void VCVT(ARMReg Dest, ARMReg Src, int flags);
649
650
// NEON, need to check for this (supported if VFP4 is supported)
651
void VCVTF32F16(ARMReg Dest, ARMReg Src);
652
void VCVTF16F32(ARMReg Dest, ARMReg Src);
653
654
void VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
655
void VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
656
void VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
657
void VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
658
void VABS(u32 Size, ARMReg Vd, ARMReg Vm);
659
void VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm);
660
void VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
661
void VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm);
662
void VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
663
void VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
664
void VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
665
void VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
666
void VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
667
void VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm);
668
void VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
669
void VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
670
void VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
671
void VCEQ(u32 Size, ARMReg Vd, ARMReg Vm);
672
void VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
673
void VCGE(u32 Size, ARMReg Vd, ARMReg Vm);
674
void VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
675
void VCGT(u32 Size, ARMReg Vd, ARMReg Vm);
676
void VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
677
void VCLE(u32 Size, ARMReg Vd, ARMReg Vm);
678
void VCLS(u32 Size, ARMReg Vd, ARMReg Vm);
679
void VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
680
void VCLT(u32 Size, ARMReg Vd, ARMReg Vm);
681
void VCLZ(u32 Size, ARMReg Vd, ARMReg Vm);
682
void VCNT(u32 Size, ARMReg Vd, ARMReg Vm);
683
void VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index);
684
void VDUP(u32 Size, ARMReg Vd, ARMReg Rt);
685
void VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index);
686
void VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
687
void VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
688
void VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
689
void VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
690
void VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
691
void VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
692
693
// Three registers
694
void VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
695
void VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
696
void VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
697
void VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
698
void VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
699
void VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
700
void VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
701
void VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
702
void VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
703
void VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
704
void VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
705
706
// Two registers and a scalar
707
// These two are super useful for matrix multiplication
708
void VMUL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
709
void VMLA_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
710
711
// TODO:
712
/*
713
void VMLS_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
714
void VMLAL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
715
void VMLSL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
716
void VMULL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
717
void VQDMLAL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
718
void VQDMLSL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
719
void VQDMULH_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
720
void VQDMULL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
721
void VQRDMULH_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
722
*/
723
724
// Vector bitwise. These don't have an element size for obvious reasons.
725
void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm);
726
void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm);
727
void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
728
void VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm);
729
void VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
730
inline void VMOV_neon(ARMReg Dest, ARMReg Src) {
731
VORR(Dest, Src, Src);
732
}
733
void VMOV_neon(u32 Size, ARMReg Vd, u32 imm);
734
void VMOV_neon(u32 Size, ARMReg Vd, float imm) {
735
_dbg_assert_msg_(Size == F_32, "Expecting F_32 immediate for VMOV_neon float arg.");
736
union {
737
float f;
738
u32 u;
739
} val;
740
val.f = imm;
741
VMOV_neon(I_32, Vd, val.u);
742
}
743
void VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane);
744
745
void VNEG(u32 Size, ARMReg Vd, ARMReg Vm);
746
void VMVN(ARMReg Vd, ARMReg Vm);
747
void VPADAL(u32 Size, ARMReg Vd, ARMReg Vm);
748
void VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
749
void VPADDL(u32 Size, ARMReg Vd, ARMReg Vm);
750
void VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
751
void VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
752
void VQABS(u32 Size, ARMReg Vd, ARMReg Vm);
753
void VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
754
void VQNEG(u32 Size, ARMReg Vd, ARMReg Vm);
755
void VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
756
void VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
757
void VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
758
void VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
759
void VRECPE(u32 Size, ARMReg Vd, ARMReg Vm);
760
void VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
761
void VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
762
void VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
763
void VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm);
764
void VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
765
void VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
766
void VSHL(u32 Size, ARMReg Vd, ARMReg Vm, ARMReg Vn); // Register shift
767
void VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
768
void VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
769
void VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
770
void VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
771
void VSWP(ARMReg Vd, ARMReg Vm);
772
void VTRN(u32 Size, ARMReg Vd, ARMReg Vm);
773
void VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
774
void VUZP(u32 Size, ARMReg Vd, ARMReg Vm);
775
void VZIP(u32 Size, ARMReg Vd, ARMReg Vm);
776
void VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm);
777
void VREV64(u32 Size, ARMReg Vd, ARMReg Vm);
778
void VREV32(u32 Size, ARMReg Vd, ARMReg Vm);
779
void VREV16(u32 Size, ARMReg Vd, ARMReg Vm);
780
781
782
// NEON immediate instructions
783
784
785
void VMOV_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
786
void VMOV_immf(ARMReg Vd, float value); // This only works with a select few values (1.0f and -1.0f).
787
788
void VORR_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
789
void VMVN_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
790
void VBIC_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
791
792
// Widening and narrowing moves
793
void VMOVL(u32 Size, ARMReg Vd, ARMReg Vm);
794
void VMOVN(u32 Size, ARMReg Vd, ARMReg Vm);
795
void VQMOVN(u32 Size, ARMReg Vd, ARMReg Vm);
796
void VQMOVUN(u32 Size, ARMReg Vd, ARMReg Vm);
797
798
// Shifts by immediate
799
void VSHL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount);
800
void VSHLL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount); // widening
801
void VSHR(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount);
802
void VSHRN(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount); // narrowing
803
804
// Vector VCVT
805
void VCVT(u32 DestSize, ARMReg Dest, ARMReg Src);
806
807
808
// Notes:
809
// Rm == R_PC is interpreted as no offset, otherwise, effective address is sum of Rn and Rm
810
// Rm == R13 is interpreted as VLD1, .... [Rn]! Added a REG_UPDATE pseudo register.
811
812
// Load/store multiple registers full of elements (a register is a D register)
813
// Specifying alignment when it can be guaranteed is documented to improve load/store performance.
814
// For example, when loading a set of four 64-bit registers that we know is 32-byte aligned, we should specify ALIGN_256.
815
void VLD1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
816
void VST1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
817
818
// Load/store single lanes of D registers
819
void VLD1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm = R_PC);
820
void VST1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm = R_PC);
821
822
// Load one value into all lanes of a D or a Q register (either supported, all formats should work).
823
void VLD1_all_lanes(u32 Size, ARMReg Vd, ARMReg Rn, bool aligned, ARMReg Rm = R_PC);
824
825
/*
826
// Deinterleave two loads... or something. TODO
827
void VLD2(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
828
void VST2(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
829
830
void VLD2_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
831
void VST2_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
832
833
void VLD3(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
834
void VST3(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
835
836
void VLD3_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
837
void VST3_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
838
839
void VLD4(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
840
void VST4(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
841
842
void VLD4_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
843
void VST4_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
844
*/
845
846
void VMRS_APSR();
847
void VMRS(ARMReg Rt);
848
void VMSR(ARMReg Rt);
849
850
void QuickCallFunction(ARMReg scratchreg, const void *func);
851
template <typename T> void QuickCallFunction(ARMReg scratchreg, T func) {
852
QuickCallFunction(scratchreg, (const void *)func);
853
}
854
855
// Wrapper around MOVT/MOVW with fallbacks.
856
void MOVI2R(ARMReg reg, u32 val, bool optimize = true);
857
void MOVI2FR(ARMReg dest, float val, bool negate = false);
858
void MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate = false);
859
void MOVI2F_neon(ARMReg dest, float val, ARMReg tempReg, bool negate = false);
860
861
// Load pointers without casting
862
template <class T> void MOVP2R(ARMReg reg, T *val) {
863
MOVI2R(reg, (u32)(uintptr_t)(void *)val);
864
}
865
866
void MOVIU2F(ARMReg dest, u32 val, ARMReg tempReg, bool negate = false) {
867
union {
868
u32 u;
869
float f;
870
} v = {val};
871
MOVI2F(dest, v.f, tempReg, negate);
872
}
873
874
void ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
875
bool TryADDI2R(ARMReg rd, ARMReg rs, u32 val);
876
void SUBI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
877
bool TrySUBI2R(ARMReg rd, ARMReg rs, u32 val);
878
void ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
879
bool TryANDI2R(ARMReg rd, ARMReg rs, u32 val);
880
void CMPI2R(ARMReg rs, u32 val, ARMReg scratch);
881
bool TryCMPI2R(ARMReg rs, u32 val);
882
void TSTI2R(ARMReg rs, u32 val, ARMReg scratch);
883
bool TryTSTI2R(ARMReg rs, u32 val);
884
void ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
885
bool TryORI2R(ARMReg rd, ARMReg rs, u32 val);
886
void EORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
887
bool TryEORI2R(ARMReg rd, ARMReg rs, u32 val);
888
}; // class ARMXEmitter
889
890
891
// Everything that needs to generate machine code should inherit from this.
892
// You get memory management for free, plus, you can use all the MOV etc functions without
893
// having to prefix them with gen-> or something similar.
894
895
class ARMXCodeBlock : public CodeBlock<ARMXEmitter> {
896
public:
897
void PoisonMemory(int offset) override;
898
};
899
900
// VFP Specific
901
struct VFPEnc {
902
s16 opc1;
903
s16 opc2;
904
};
905
extern const VFPEnc VFPOps[16][2];
906
extern const char *VFPOpNames[16];
907
908
} // namespace
909
910