CoCalc -- ArmEmitter.h

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/ArmEmitter.h
Views: ¹⁴⁰¹
1
// Copyright (C) 2003 Dolphin Project.
2

3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0.
6

7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
// GNU General Public License 2.0 for more details.
11

12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14

15
// Official SVN repository and contact information can be found at
16
// http://code.google.com/p/dolphin-emu/
17

18
#pragma once
19

20
#include <vector>
21
#include <cstdint>
22

23
#include "Common/CommonTypes.h"
24
#include "Common/Log.h"
25
#include "Common/ArmCommon.h"
26
#include "Common/CodeBlock.h"
27

28
// VCVT flags
29
#define TO_FLOAT      0
30
#define TO_INT        1 << 0
31
#define IS_SIGNED     1 << 1
32
#define ROUND_TO_ZERO 1 << 2
33

34
namespace ArmGen
35
{
36
enum ARMReg
37
{
38
	// GPRs
39
	R0 = 0, R1, R2, R3, R4, R5,
40
	R6, R7, R8, R9, R10, R11,
41

42
	// SPRs
43
	// R13 - R15 are SP, LR, and PC.
44
	// Almost always referred to by name instead of register number
45
	R12 = 12, R13 = 13, R14 = 14, R15 = 15,
46
	R_IP = 12, R_SP = 13, R_LR = 14, R_PC = 15,
47

48

49
	// VFP single precision registers
50
	S0, S1, S2, S3, S4, S5, S6,
51
	S7, S8, S9, S10, S11, S12, S13,
52
	S14, S15, S16, S17, S18, S19, S20,
53
	S21, S22, S23, S24, S25, S26, S27,
54
	S28, S29, S30, S31,
55

56
	// VFP Double Precision registers
57
	D0, D1, D2, D3, D4, D5, D6, D7,
58
	D8, D9, D10, D11, D12, D13, D14, D15,
59
	D16, D17, D18, D19, D20, D21, D22, D23,
60
	D24, D25, D26, D27, D28, D29, D30, D31,
61
	
62
	// ASIMD Quad-Word registers
63
	Q0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,
64
	Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,
65

66
	// for NEON VLD/VST instructions
67
	REG_UPDATE = R13,
68
	INVALID_REG = 0xFFFFFFFF
69
};
70

71
enum ShiftType
72
{
73
	ST_LSL = 0,
74
	ST_ASL = 0,
75
	ST_LSR = 1,
76
	ST_ASR = 2,
77
	ST_ROR = 3,
78
	ST_RRX = 4
79
};
80
enum IntegerSize
81
{
82
	I_I8 = 0, 
83
	I_I16,
84
	I_I32,
85
	I_I64
86
};
87

88
enum
89
{
90
	NUMGPRs = 13,
91
};
92

93
class ARMXEmitter;
94

95
enum OpType
96
{
97
	TYPE_IMM = 0,
98
	TYPE_REG,
99
	TYPE_IMMSREG,
100
	TYPE_RSR,
101
	TYPE_MEM
102
};
103

104
// This is no longer a proper operand2 class. Need to split up.
105
class Operand2
106
{
107
	friend class ARMXEmitter;
108
protected:
109
	u32 Value;
110

111
private:
112
	OpType Type;
113

114
	// IMM types
115
	u8	Rotation = 0; // Only for u8 values
116

117
	// Register types
118
	u8 IndexOrShift = 0;
119
	ShiftType Shift = ST_LSL;
120
public:
121
	OpType GetType() const {
122
		return Type;
123
	}
124
	Operand2() {
125
		Type = TYPE_IMM;
126
		Value = 0;
127
	}
128
	Operand2(u32 imm, OpType type = TYPE_IMM) {
129
		Type = type;
130
		Value = imm;
131
	}
132

133
	Operand2(ARMReg Reg) {
134
		Type = TYPE_REG;
135
		Value = Reg;
136
	}
137
	Operand2(u8 imm, u8 rotation) {
138
		Type = TYPE_IMM;
139
		Value = imm;
140
		Rotation = rotation;
141
	}
142
	Operand2(ARMReg base, ShiftType type, ARMReg shift) // RSR
143
	{
144
		Type = TYPE_RSR;
145
		_assert_msg_(type != ST_RRX, "Invalid Operand2: RRX does not take a register shift amount");
146
		IndexOrShift = shift;
147
		Shift = type;
148
		Value = base;
149
	}
150

151
	Operand2(ARMReg base, ShiftType type, u8 shift)// For IMM shifted register
152
	{
153
		if(shift == 32) shift = 0;
154
		switch (type)
155
		{
156
		case ST_LSL:
157
			_assert_msg_(shift < 32, "Invalid Operand2: LSL %u", shift);
158
			break;
159
		case ST_LSR:
160
			_assert_msg_(shift <= 32, "Invalid Operand2: LSR %u", shift);
161
			if (!shift)
162
				type = ST_LSL;
163
			if (shift == 32)
164
				shift = 0;
165
			break;
166
		case ST_ASR:
167
			_assert_msg_(shift < 32, "Invalid Operand2: ASR %u", shift);
168
			if (!shift)
169
				type = ST_LSL;
170
			if (shift == 32)
171
				shift = 0;
172
			break;
173
		case ST_ROR:
174
			_assert_msg_(shift < 32, "Invalid Operand2: ROR %u", shift);
175
			if (!shift)
176
				type = ST_LSL;
177
			break;
178
		case ST_RRX:
179
			_assert_msg_(shift == 0, "Invalid Operand2: RRX does not take an immediate shift amount");
180
			type = ST_ROR;
181
			break;
182
		}
183
		IndexOrShift = shift;
184
		Shift = type;
185
		Value = base;
186
		Type = TYPE_IMMSREG;
187
	}
188
	u32 GetData()
189
	{
190
		switch(Type)
191
		{
192
		case TYPE_IMM:
193
			return Imm12Mod(); // This'll need to be changed later
194
		case TYPE_REG:
195
			return Rm();
196
		case TYPE_IMMSREG:
197
			return IMMSR();
198
		case TYPE_RSR:
199
			return RSR();
200
		default:
201
			_assert_msg_(false, "GetData with Invalid Type");
202
			return 0;
203
		}
204
	}
205
	u32 IMMSR() // IMM shifted register
206
	{
207
		_assert_msg_(Type == TYPE_IMMSREG, "IMMSR must be imm shifted register");
208
		return ((IndexOrShift & 0x1f) << 7 | (Shift << 5) | Value);
209
	}
210
	u32 RSR() // Register shifted register
211
	{
212
		_assert_msg_(Type == TYPE_RSR, "RSR must be RSR Of Course");
213
		return (IndexOrShift << 8) | (Shift << 5) | 0x10 | Value;
214
	}
215
	u32 Rm() const
216
	{
217
		_assert_msg_(Type == TYPE_REG, "Rm must be with Reg");
218
		return Value;
219
	}
220

221
	u32 Imm5() const
222
	{
223
		_assert_msg_((Type == TYPE_IMM), "Imm5 not IMM value");
224
		return ((Value & 0x0000001F) << 7);
225
	}
226
	u32 Imm8() const
227
	{
228
		_assert_msg_((Type == TYPE_IMM), "Imm8Rot not IMM value");
229
		return Value & 0xFF;
230
	}
231
	u32 Imm8Rot() const // IMM8 with Rotation
232
	{
233
		_assert_msg_((Type == TYPE_IMM), "Imm8Rot not IMM value");
234
		_assert_msg_((Rotation & 0xE1) != 0, "Invalid Operand2: immediate rotation %u", Rotation);
235
		return (1 << 25) | (Rotation << 7) | (Value & 0x000000FF);
236
	}
237
	u32 Imm12() const
238
	{
239
		_assert_msg_((Type == TYPE_IMM), "Imm12 not IMM");
240
		return (Value & 0x00000FFF);
241
	}
242

243
	u32 Imm12Mod() const
244
	{
245
		// This is an IMM12 with the top four bits being rotation and the
246
		// bottom eight being an IMM. This is for instructions that need to
247
		// expand a 8bit IMM to a 32bit value and gives you some rotation as
248
		// well.
249
		// Each rotation rotates to the right by 2 bits
250
		_assert_msg_((Type == TYPE_IMM), "Imm12Mod not IMM");
251
		return ((Rotation & 0xF) << 8) | (Value & 0xFF);
252
	}
253
	u32 Imm16() const
254
	{
255
		_assert_msg_((Type == TYPE_IMM), "Imm16 not IMM");
256
		return ( (Value & 0xF000) << 4) | (Value & 0x0FFF);
257
	}
258
	u32 Imm16Low() const
259
	{
260
		return Imm16();
261
	}
262
	u32 Imm16High() const // Returns high 16bits
263
	{
264
		_assert_msg_((Type == TYPE_IMM), "Imm16 not IMM");
265
		return ( ((Value >> 16) & 0xF000) << 4) | ((Value >> 16) & 0x0FFF);
266
	}
267
	u32 Imm24() const
268
	{
269
		_assert_msg_((Type == TYPE_IMM), "Imm16 not IMM");
270
		return (Value & 0x0FFFFFFF);
271
	}
272
	// NEON and ASIMD specific
273
	u32 Imm8ASIMD() const
274
	{
275
		_assert_msg_((Type == TYPE_IMM), "Imm8ASIMD not IMM");
276
		return  ((Value & 0x80) << 17) | ((Value & 0x70) << 12) | (Value & 0xF);
277
	}
278
	u32 Imm8VFP() const
279
	{
280
		_assert_msg_((Type == TYPE_IMM), "Imm8VFP not IMM");
281
		return ((Value & 0xF0) << 12) | (Value & 0xF);
282
	}
283
};
284

285
// Use these when you don't know if an imm can be represented as an operand2.
286
// This lets you generate both an optimal and a fallback solution by checking
287
// the return value, which will be false if these fail to find a Operand2 that
288
// represents your 32-bit imm value.
289
bool TryMakeOperand2(u32 imm, Operand2 &op2);
290
bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse);
291
bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated);
292

293
// Use this only when you know imm can be made into an Operand2.
294
Operand2 AssumeMakeOperand2(u32 imm);
295

296
inline Operand2 R(ARMReg Reg)	{ return Operand2(Reg, TYPE_REG); }
297
inline Operand2 IMM(u32 Imm)	{ return Operand2(Imm, TYPE_IMM); }
298
inline Operand2 Mem(void *ptr)	{ return Operand2((u32)(uintptr_t)ptr, TYPE_IMM); }
299
//usage: struct {int e;} s; STRUCT_OFFSET(s,e)
300
#define STRUCT_OFF(str,elem) ((u32)((u32)&(str).elem-(u32)&(str)))
301

302

303
struct FixupBranch
304
{
305
	u8 *ptr;
306
	u32 condition; // Remembers our codition at the time
307
	int type; //0 = B 1 = BL
308
};
309

310
struct LiteralPool
311
{
312
	intptr_t loc;
313
	u8* ldr_address;
314
	u32 val;
315
};
316

317
typedef const u8* JumpTarget;
318

319
// XXX: Stop polluting the global namespace
320
const u32 I_8 = (1 << 0);
321
const u32 I_16 = (1 << 1);
322
const u32 I_32 = (1 << 2);
323
const u32 I_64 = (1 << 3);
324
const u32 I_SIGNED = (1 << 4);
325
const u32 I_UNSIGNED = (1 << 5);
326
const u32 F_32 = (1 << 6);
327
const u32 I_POLYNOMIAL = (1 << 7); // Only used in VMUL/VMULL
328

329
enum VIMMMode {
330
	VIMM___x___x = 0x0, // 0000 VMOV
331
	VIMM__x___x_ = 0x2, // 0010
332
	VIMM_x___x__ = 0x4, // 0100
333
	VIMMx___x___ = 0x6, // 0110
334
	VIMM_x_x_x_x = 0x8, // 1000
335
	VIMMx_x_x_x_ = 0xA, // 1010
336
	VIMM__x1__x1 = 0xC, // 1100
337
	VIMM_x11_x11 = 0xD, // 1101
338
	VIMMxxxxxxxx = 0xE, // 1110  // op == 0
339
	VIMMf000f000 = 0xF, // 1111  // op == 0     ( really   aBbbbbbc defgh 00000000 00000000 ) where B = NOT b
340
	VIMMbits2bytes = 0x1E,   // Bit replication into bytes! Easily created 111111111 00000000 masks!
341
};
342

343
u32 EncodeVd(ARMReg Vd);
344
u32 EncodeVn(ARMReg Vn);
345
u32 EncodeVm(ARMReg Vm);
346

347
u32 encodedSize(u32 value);
348

349
// Subtracts the base from the register to give us the real one
350
ARMReg SubBase(ARMReg Reg);
351

352
inline bool IsQ(ARMReg r) {
353
	return r >= Q0 && r <= Q15;
354
}
355

356
inline bool IsD(ARMReg r) {
357
	return r >= D0 && r <= D31;
358
}
359

360
// See A.7.1 in the ARMv7-A
361
// VMUL F32 scalars can only be up to D15[0], D15[1] - higher scalars cannot be individually addressed
362
ARMReg DScalar(ARMReg dreg, int subScalar);
363
ARMReg QScalar(ARMReg qreg, int subScalar);
364
inline ARMReg XScalar(ARMReg reg, int subScalar) {
365
	if (IsQ(reg))
366
		return QScalar(reg, subScalar);
367
	else
368
		return DScalar(reg, subScalar);
369
}
370

371
const char *ARMRegAsString(ARMReg reg);
372

373
// Get the two halves of a Q register.
374
inline ARMReg D_0(ARMReg q) {
375
	if (q >= Q0 && q <= Q15) {
376
		return ARMReg(D0 + (q - Q0) * 2);
377
	} else if (q >= D0 && q <= D31) {
378
		return q;
379
	} else {
380
		return INVALID_REG;
381
	}
382
}
383
inline ARMReg D_1(ARMReg q) {
384
	return ARMReg(D0 + (q - Q0) * 2 + 1);
385
}
386

387
enum NEONAlignment {
388
	ALIGN_NONE = 0,
389
	ALIGN_64 = 1,
390
	ALIGN_128 = 2,
391
	ALIGN_256 = 3
392
};
393

394

395
class NEONXEmitter;
396

397
class ARMXEmitter
398
{
399
	friend struct OpArg;  // for Write8 etc
400
	friend class NEONXEmitter;
401
private:
402
	u8 *code, *startcode;
403
	u8 *lastCacheFlushEnd;
404
	u32 condition;
405
	std::vector<LiteralPool> currentLitPool;
406

407
	void WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 op2, bool RegAdd);
408
	void WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList);
409
	void WriteVRegStoreOp(u32 op, ARMReg dest, bool Double, bool WriteBack, ARMReg firstreg, u8 numregs);
410
	void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2);
411
	void WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2);
412
	void WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2);
413

414
	void WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm);
415

416
	void Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
417

418
	// New Ops
419
	void WriteInstruction(u32 op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags = false);
420

421
	void WriteVLDST1(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm);
422
	void WriteVLDST1_lane(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm);
423

424
	void WriteVimm(ARMReg Vd, int cmode, u8 imm, int op);
425

426
	void EncodeShiftByImm(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount, u8 opcode, bool quad, bool inverse, bool halve);
427

428
protected:
429
	inline void Write32(u32 value) {*(u32*)code = value; code+=4;}
430

431
public:
432
	ARMXEmitter() : code(0), startcode(0), lastCacheFlushEnd(0) {
433
		condition = CC_AL << 28;
434
	}
435
	ARMXEmitter(u8 *code_ptr) {
436
		code = code_ptr;
437
		lastCacheFlushEnd = code_ptr;
438
		startcode = code_ptr;
439
		condition = CC_AL << 28;
440
	}
441
	virtual ~ARMXEmitter() {}
442

443
	void SetCodePointer(u8 *ptr, u8 *writePtr);
444
	const u8 *GetCodePointer() const;
445

446
	void ReserveCodeSpace(u32 bytes);
447
	const u8 *AlignCode16();
448
	const u8 *AlignCodePage();
449
	const u8 *NopAlignCode16();
450

451
	void FlushIcache();
452
	void FlushIcacheSection(u8 *start, u8 *end);
453
	u8 *GetWritableCodePtr();
454

455
	void FlushLitPool();
456
	void AddNewLit(u32 val);
457
	bool TrySetValue_TwoOp(ARMReg reg, u32 val);
458

459
	CCFlags GetCC() const { return CCFlags(condition >> 28); }
460
	void SetCC(CCFlags cond = CC_AL);
461

462
	// Special purpose instructions
463

464
	// Dynamic Endian Switching
465
	void SETEND(bool BE);
466
	// Debug Breakpoint
467
	void BKPT(u16 arg);
468

469
	// Hint instruction
470
	void YIELD();
471

472
	// Do nothing
473
	void NOP(int count = 1); //nop padding - TODO: fast nop slides, for amd and intel (check their manuals)
474

475
#ifdef CALL
476
#undef CALL
477
#endif
478

479
	// Branching
480
	FixupBranch B();
481
	FixupBranch B_CC(CCFlags Cond);
482
	void B_CC(CCFlags Cond, const void *fnptr);
483
	FixupBranch BL();
484
	FixupBranch BL_CC(CCFlags Cond);
485
	void SetJumpTarget(FixupBranch const &branch);
486

487
	void B (const void *fnptr);
488
	void B (ARMReg src);
489
	void BL(const void *fnptr);
490
	void BL(ARMReg src);
491
	bool BLInRange(const void *fnptr) const;
492

493
	void PUSH(const int num, ...);
494
	void POP(const int num, ...);
495

496
	// New Data Ops
497
	void AND (ARMReg Rd, ARMReg Rn, Operand2 Rm);
498
	void ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm);
499
	void EOR (ARMReg dest, ARMReg src, Operand2 op2);
500
	void EORS(ARMReg dest, ARMReg src, Operand2 op2);
501
	void SUB (ARMReg dest, ARMReg src, Operand2 op2);
502
	void SUBS(ARMReg dest, ARMReg src, Operand2 op2);
503
	void RSB (ARMReg dest, ARMReg src, Operand2 op2);
504
	void RSBS(ARMReg dest, ARMReg src, Operand2 op2);
505
	void ADD (ARMReg dest, ARMReg src, Operand2 op2);
506
	void ADDS(ARMReg dest, ARMReg src, Operand2 op2);
507
	void ADC (ARMReg dest, ARMReg src, Operand2 op2);
508
	void ADCS(ARMReg dest, ARMReg src, Operand2 op2);
509
	void LSL (ARMReg dest, ARMReg src, Operand2 op2);
510
	void LSL (ARMReg dest, ARMReg src, ARMReg op2);
511
	void LSLS(ARMReg dest, ARMReg src, Operand2 op2);
512
	void LSLS(ARMReg dest, ARMReg src, ARMReg op2);
513
	void LSR (ARMReg dest, ARMReg src, Operand2 op2);
514
	void LSRS(ARMReg dest, ARMReg src, Operand2 op2);
515
	void LSR (ARMReg dest, ARMReg src, ARMReg op2);
516
	void LSRS(ARMReg dest, ARMReg src, ARMReg op2);
517
	void ASR (ARMReg dest, ARMReg src, Operand2 op2);
518
	void ASRS(ARMReg dest, ARMReg src, Operand2 op2);
519
	void ASR (ARMReg dest, ARMReg src, ARMReg op2);
520
	void ASRS(ARMReg dest, ARMReg src, ARMReg op2);
521

522
	void SBC (ARMReg dest, ARMReg src, Operand2 op2);
523
	void SBCS(ARMReg dest, ARMReg src, Operand2 op2);
524
	void RBIT(ARMReg dest, ARMReg src);
525
	void REV (ARMReg dest, ARMReg src);
526
	void REV16 (ARMReg dest, ARMReg src);
527
	void RSC (ARMReg dest, ARMReg src, Operand2 op2);
528
	void RSCS(ARMReg dest, ARMReg src, Operand2 op2);
529
	void TST (             ARMReg src, Operand2 op2);
530
	void TEQ (             ARMReg src, Operand2 op2);
531
	void CMP (             ARMReg src, Operand2 op2);
532
	void CMN (             ARMReg src, Operand2 op2);
533
	void ORR (ARMReg dest, ARMReg src, Operand2 op2);
534
	void ORRS(ARMReg dest, ARMReg src, Operand2 op2);
535
	void MOV (ARMReg dest,             Operand2 op2);
536
	void MOVS(ARMReg dest,             Operand2 op2);
537
	void BIC (ARMReg dest, ARMReg src, Operand2 op2);   // BIC = ANDN
538
	void BICS(ARMReg dest, ARMReg src, Operand2 op2);
539
	void MVN (ARMReg dest,             Operand2 op2);
540
	void MVNS(ARMReg dest,             Operand2 op2);
541
	void MOVW(ARMReg dest,             Operand2 op2);
542
	void MOVT(ARMReg dest, Operand2 op2, bool TopBits = false);
543

544
	// UDIV and SDIV are only available on CPUs that have 
545
	// the idiva hardare capacity
546
	void UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
547
	void SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor);
548

549
	void MUL (ARMReg dest,	ARMReg src, ARMReg op2);
550
	void MULS(ARMReg dest,	ARMReg src, ARMReg op2);
551

552
	void UMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
553
	void SMULL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
554

555
	void UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
556
	void SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rn, ARMReg rm);
557

558
	void SXTB(ARMReg dest, ARMReg op2);
559
	void SXTH(ARMReg dest, ARMReg op2, u8 rotation = 0);
560
	void SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation = 0);
561
	void BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width);
562
	void BFC(ARMReg rd, u8 lsb, u8 width);
563
	void UBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
564
	void SBFX(ARMReg dest, ARMReg op2, u8 lsb, u8 width);
565
	void CLZ(ARMReg rd, ARMReg rm);
566
	void PLD(ARMReg rd, int offset, bool forWrite = false);
567

568
	// Using just MSR here messes with our defines on the PPC side of stuff (when this code was in dolphin...)
569
	// Just need to put an underscore here, bit annoying.
570
	void _MSR (bool nzcvq, bool g, Operand2 op2);
571
	void _MSR (bool nzcvq, bool g, ARMReg src);
572
	void MRS  (ARMReg dest);
573

574
	// Memory load/store operations
575
	void LDR  (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
576
	void LDRB (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
577
	void LDRH (ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
578
	void LDRSB(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
579
	void LDRSH(ARMReg dest, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
580
	void STR  (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
581
	void STRB (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
582
	void STRH (ARMReg result, ARMReg base, Operand2 op2 = 0, bool RegAdd = true);
583

584
	void STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
585
	void LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...);
586
	void STMIA(ARMReg dest, bool WriteBack, const int Regnum, ...);
587
	void LDMIA(ARMReg dest, bool WriteBack, const int Regnum, ...);
588
	void STM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...);
589
	void LDM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...);
590
	void STMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList);
591
	void LDMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList);
592

593
	// Exclusive Access operations
594
	void LDREX(ARMReg dest, ARMReg base);
595
	// result contains the result if the instruction managed to store the value
596
	void STREX(ARMReg result, ARMReg base, ARMReg op);
597
	void DMB ();
598
	void SVC(Operand2 op);
599

600
	// NEON and ASIMD instructions
601
	// None of these will be created with conditional since ARM
602
	// is deprecating conditional execution of ASIMD instructions.
603
	// ASIMD instructions don't even have a conditional encoding.
604

605
	// NEON Only
606
	void VABD(IntegerSize size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
607
	void VADD(IntegerSize size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
608
	void VSUB(IntegerSize size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
609

610
	// VFP Only
611
	void VLDMIA(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
612
	void VSTMIA(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
613
	void VLDMDB(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
614
	void VSTMDB(ARMReg dest, bool WriteBack, ARMReg firstreg, int numregs);
615
	void VPUSH(ARMReg firstvreg, int numvregs) {
616
		VSTMDB(R_SP, true, firstvreg, numvregs);
617
	}
618
	void VPOP(ARMReg firstvreg, int numvregs) {
619
		VLDMIA(R_SP, true, firstvreg, numvregs);
620
	}
621
	void VLDR(ARMReg Dest, ARMReg Base, s16 offset);
622
	void VSTR(ARMReg Src,  ARMReg Base, s16 offset);
623
	void VCMP(ARMReg Vd, ARMReg Vm);
624
	void VCMPE(ARMReg Vd, ARMReg Vm);
625
	// Compares against zero
626
	void VCMP(ARMReg Vd);
627
	void VCMPE(ARMReg Vd);
628

629
	void VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
630
	void VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
631
	void VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
632
	void VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm);
633
	void VSQRT(ARMReg Vd, ARMReg Vm);
634

635
	// NEON and VFP
636
	void VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm);
637
	void VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm);
638
	void VABS(ARMReg Vd, ARMReg Vm);
639
	void VNEG(ARMReg Vd, ARMReg Vm);
640
	void VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
641
	void VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm);
642
	void VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
643
	void VMOV(ARMReg Dest, Operand2 op2);
644
	void VMOV(ARMReg Dest, ARMReg Src, bool high);
645
	void VMOV(ARMReg Dest, ARMReg Src);
646
	// Either Vd, Rt, Rt2 or Rt, Rt2, Vd.
647
	void VMOV(ARMReg Dest, ARMReg Src1, ARMReg Src2);
648
	void VCVT(ARMReg Dest, ARMReg Src, int flags);
649

650
	// NEON, need to check for this (supported if VFP4 is supported)
651
	void VCVTF32F16(ARMReg Dest, ARMReg Src);
652
	void VCVTF16F32(ARMReg Dest, ARMReg Src);
653

654
	void VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
655
	void VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
656
	void VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
657
	void VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
658
	void VABS(u32 Size, ARMReg Vd, ARMReg Vm);
659
	void VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm);
660
	void VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
661
	void VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm);
662
	void VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
663
	void VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
664
	void VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
665
	void VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
666
	void VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
667
	void VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm);
668
	void VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm);
669
	void VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm);
670
	void VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
671
	void VCEQ(u32 Size, ARMReg Vd, ARMReg Vm);
672
	void VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
673
	void VCGE(u32 Size, ARMReg Vd, ARMReg Vm);
674
	void VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
675
	void VCGT(u32 Size, ARMReg Vd, ARMReg Vm);
676
	void VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
677
	void VCLE(u32 Size, ARMReg Vd, ARMReg Vm);
678
	void VCLS(u32 Size, ARMReg Vd, ARMReg Vm);
679
	void VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
680
	void VCLT(u32 Size, ARMReg Vd, ARMReg Vm);
681
	void VCLZ(u32 Size, ARMReg Vd, ARMReg Vm);
682
	void VCNT(u32 Size, ARMReg Vd, ARMReg Vm);
683
	void VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index);
684
	void VDUP(u32 Size, ARMReg Vd, ARMReg Rt);
685
	void VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index);
686
	void VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
687
	void VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
688
	void VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
689
	void VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
690
	void VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
691
	void VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
692

693
	// Three registers
694
	void VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
695
	void VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
696
	void VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
697
	void VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
698
	void VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
699
	void VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
700
	void VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
701
	void VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
702
	void VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
703
	void VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
704
	void VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
705

706
	// Two registers and a scalar
707
	// These two are super useful for matrix multiplication
708
	void VMUL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
709
	void VMLA_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
710
	
711
	// TODO:
712
	/*
713
	void VMLS_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
714
	void VMLAL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
715
	void VMLSL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
716
	void VMULL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
717
	void VQDMLAL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
718
	void VQDMLSL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
719
	void VQDMULH_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
720
	void VQDMULL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
721
	void VQRDMULH_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
722
	*/
723

724
  // Vector bitwise. These don't have an element size for obvious reasons.
725
	void VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm);
726
	void VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm);
727
	void VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
728
	void VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm);
729
	void VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm);
730
	inline void VMOV_neon(ARMReg Dest, ARMReg Src) {
731
		VORR(Dest, Src, Src);
732
	}
733
	void VMOV_neon(u32 Size, ARMReg Vd, u32 imm);
734
	void VMOV_neon(u32 Size, ARMReg Vd, float imm) {
735
		_dbg_assert_msg_(Size == F_32, "Expecting F_32 immediate for VMOV_neon float arg.");
736
		union {
737
			float f;
738
			u32 u;
739
		} val;
740
		val.f = imm;
741
		VMOV_neon(I_32, Vd, val.u);
742
	}
743
	void VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane);
744

745
	void VNEG(u32 Size, ARMReg Vd, ARMReg Vm);
746
	void VMVN(ARMReg Vd, ARMReg Vm);
747
	void VPADAL(u32 Size, ARMReg Vd, ARMReg Vm);
748
	void VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
749
	void VPADDL(u32 Size, ARMReg Vd, ARMReg Vm);
750
	void VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
751
	void VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
752
	void VQABS(u32 Size, ARMReg Vd, ARMReg Vm);
753
	void VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
754
	void VQNEG(u32 Size, ARMReg Vd, ARMReg Vm);
755
	void VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
756
	void VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
757
	void VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
758
	void VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
759
	void VRECPE(u32 Size, ARMReg Vd, ARMReg Vm);
760
	void VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
761
	void VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
762
	void VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
763
	void VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm);
764
	void VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm);
765
	void VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
766
	void VSHL(u32 Size, ARMReg Vd, ARMReg Vm, ARMReg Vn);  // Register shift
767
	void VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
768
	void VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
769
	void VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
770
	void VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
771
	void VSWP(ARMReg Vd, ARMReg Vm);
772
	void VTRN(u32 Size, ARMReg Vd, ARMReg Vm);
773
	void VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm);
774
	void VUZP(u32 Size, ARMReg Vd, ARMReg Vm);
775
	void VZIP(u32 Size, ARMReg Vd, ARMReg Vm);
776
	void VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm);
777
	void VREV64(u32 Size, ARMReg Vd, ARMReg Vm);
778
	void VREV32(u32 Size, ARMReg Vd, ARMReg Vm);
779
	void VREV16(u32 Size, ARMReg Vd, ARMReg Vm);
780

781

782
	// NEON immediate instructions
783

784

785
	void VMOV_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
786
	void VMOV_immf(ARMReg Vd, float value);  // This only works with a select few values (1.0f and -1.0f).
787

788
	void VORR_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
789
	void VMVN_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
790
	void VBIC_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm);
791

792
	// Widening and narrowing moves
793
	void VMOVL(u32 Size, ARMReg Vd, ARMReg Vm);
794
	void VMOVN(u32 Size, ARMReg Vd, ARMReg Vm);
795
	void VQMOVN(u32 Size, ARMReg Vd, ARMReg Vm);
796
	void VQMOVUN(u32 Size, ARMReg Vd, ARMReg Vm);
797

798
	// Shifts by immediate
799
	void VSHL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount);
800
	void VSHLL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount);  // widening
801
	void VSHR(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount);
802
	void VSHRN(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount);  // narrowing
803

804
	// Vector VCVT
805
	void VCVT(u32 DestSize, ARMReg Dest, ARMReg Src);
806

807

808
	// Notes:
809
	// Rm == R_PC  is interpreted as no offset, otherwise, effective address is sum of Rn and Rm
810
	// Rm == R13  is interpreted as   VLD1,   ....  [Rn]!    Added a REG_UPDATE pseudo register.
811

812
	// Load/store multiple registers full of elements (a register is a D register)
813
	// Specifying alignment when it can be guaranteed is documented to improve load/store performance.
814
	// For example, when loading a set of four 64-bit registers that we know is 32-byte aligned, we should specify ALIGN_256.
815
	void VLD1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
816
	void VST1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
817

818
	// Load/store single lanes of D registers
819
	void VLD1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm = R_PC);
820
	void VST1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm = R_PC);
821

822
	// Load one value into all lanes of a D or a Q register (either supported, all formats should work). 
823
	void VLD1_all_lanes(u32 Size, ARMReg Vd, ARMReg Rn, bool aligned, ARMReg Rm = R_PC);
824

825
	/*
826
	// Deinterleave two loads... or something. TODO
827
	void VLD2(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
828
	void VST2(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
829

830
	void VLD2_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
831
	void VST2_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
832

833
	void VLD3(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
834
	void VST3(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
835

836
	void VLD3_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
837
	void VST3_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
838

839
	void VLD4(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
840
	void VST4(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align = ALIGN_NONE, ARMReg Rm = R_PC);
841

842
	void VLD4_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
843
	void VST4_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, ARMReg Rm = R_PC);
844
	*/
845

846
	void VMRS_APSR();
847
	void VMRS(ARMReg Rt);
848
	void VMSR(ARMReg Rt);
849

850
	void QuickCallFunction(ARMReg scratchreg, const void *func);
851
	template <typename T> void QuickCallFunction(ARMReg scratchreg, T func) {
852
		QuickCallFunction(scratchreg, (const void *)func);
853
	}
854

855
	// Wrapper around MOVT/MOVW with fallbacks.
856
	void MOVI2R(ARMReg reg, u32 val, bool optimize = true);
857
	void MOVI2FR(ARMReg dest, float val, bool negate = false);
858
	void MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate = false);
859
	void MOVI2F_neon(ARMReg dest, float val, ARMReg tempReg, bool negate = false);
860

861
	// Load pointers without casting
862
	template <class T> void MOVP2R(ARMReg reg, T *val) {
863
		MOVI2R(reg, (u32)(uintptr_t)(void *)val);
864
	}
865

866
	void MOVIU2F(ARMReg dest, u32 val, ARMReg tempReg, bool negate = false) {
867
		union {
868
			u32 u;
869
			float f;
870
		} v = {val};
871
		MOVI2F(dest, v.f, tempReg, negate);
872
	}
873

874
	void ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
875
	bool TryADDI2R(ARMReg rd, ARMReg rs, u32 val);
876
	void SUBI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
877
	bool TrySUBI2R(ARMReg rd, ARMReg rs, u32 val);
878
	void ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
879
	bool TryANDI2R(ARMReg rd, ARMReg rs, u32 val);
880
	void CMPI2R(ARMReg rs, u32 val, ARMReg scratch);
881
	bool TryCMPI2R(ARMReg rs, u32 val);
882
	void TSTI2R(ARMReg rs, u32 val, ARMReg scratch);
883
	bool TryTSTI2R(ARMReg rs, u32 val);
884
	void ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
885
	bool TryORI2R(ARMReg rd, ARMReg rs, u32 val);
886
	void EORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch);
887
	bool TryEORI2R(ARMReg rd, ARMReg rs, u32 val);
888
};  // class ARMXEmitter
889

890

891
// Everything that needs to generate machine code should inherit from this.
892
// You get memory management for free, plus, you can use all the MOV etc functions without
893
// having to prefix them with gen-> or something similar.
894

895
class ARMXCodeBlock : public CodeBlock<ARMXEmitter> {
896
public:
897
	void PoisonMemory(int offset) override;
898
};
899

900
// VFP Specific
901
struct VFPEnc {
902
	s16 opc1;
903
	s16 opc2;
904
};
905
extern const VFPEnc VFPOps[16][2];
906
extern const char *VFPOpNames[16];
907

908
}  // namespace
909

910
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

Product

Resources

Company