CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/Arm64Emitter.h
Views: 1401
// Copyright 2015 Dolphin Emulator Project1// Licensed under GPLv2+2// Refer to the license.txt file included.34#pragma once56#include <functional>78#include "Common/ArmCommon.h"9#include "Common/BitSet.h"10#include "Common/CodeBlock.h"11#include "Common/CommonTypes.h"12#include "Common/Log.h"1314#define DYNA_REC JIT1516#ifdef FMAX17#undef FMAX18#endif19#ifdef FMIN20#undef FMIN21#endif2223namespace Arm64Gen24{2526// X30 serves a dual purpose as a link register27// Encoded as <u3:type><u5:reg>28// Types:29// 000 - 32bit GPR30// 001 - 64bit GPR31// 010 - VFP single precision32// 100 - VFP double precision33// 110 - VFP quad precision34enum ARM64Reg35{36// 32bit registers37W0 = 0, W1, W2, W3, W4, W5, W6,38W7, W8, W9, W10, W11, W12, W13, W14,39W15, W16, W17, W18, W19, W20, W21, W22,40W23, W24, W25, W26, W27, W28, W29, W30,4142WSP, // 32bit stack pointer4344// 64bit registers45X0 = 0x20, X1, X2, X3, X4, X5, X6,46X7, X8, X9, X10, X11, X12, X13, X14,47X15, X16, X17, X18, X19, X20, X21, X22,48X23, X24, X25, X26, X27, X28, X29, X30,4950SP, // 64bit stack pointer5152// VFP single precision registers53S0 = 0x40, S1, S2, S3, S4, S5, S6,54S7, S8, S9, S10, S11, S12, S13,55S14, S15, S16, S17, S18, S19, S20,56S21, S22, S23, S24, S25, S26, S27,57S28, S29, S30, S31,5859// VFP Double Precision registers60D0 = 0x80, D1, D2, D3, D4, D5, D6, D7,61D8, D9, D10, D11, D12, D13, D14, D15,62D16, D17, D18, D19, D20, D21, D22, D23,63D24, D25, D26, D27, D28, D29, D30, D31,6465// ASIMD Quad-Word registers66Q0 = 0xC0, Q1, Q2, Q3, Q4, Q5, Q6, Q7,67Q8, Q9, Q10, Q11, Q12, Q13, Q14, Q15,68Q16, Q17, Q18, Q19, Q20, Q21, Q22, Q23,69Q24, Q25, Q26, Q27, Q28, Q29, Q30, Q31,7071// For PRFM(prefetch memory) encoding72// This is encoded in the Rt register73// Data preload74PLDL1KEEP = 0, PLDL1STRM,75PLDL2KEEP, PLDL2STRM,76PLDL3KEEP, PLDL3STRM,77// Instruction preload78PLIL1KEEP = 8, PLIL1STRM,79PLIL2KEEP, PLIL2STRM,80PLIL3KEEP, PLIL3STRM,81// Prepare for store82PLTL1KEEP = 16, PLTL1STRM,83PLTL2KEEP, PLTL2STRM,84PLTL3KEEP, PLTL3STRM,8586WZR = WSP,87ZR = SP,88FP = X29,89LR = X30,9091INVALID_REG = 0xFFFFFFFF92};9394// R19-R28. R29 (FP), R30 (LR) are always saved and FP updated appropriately.95const u32 ALL_CALLEE_SAVED = 0x1FF80000;96const u32 ALL_CALLEE_SAVED_FP = 0x0000FF00; // q8-q159798inline bool Is64Bit(ARM64Reg reg) { return (reg & 0x20) != 0; }99inline bool IsSingle(ARM64Reg reg) { return (reg & 0xC0) == 0x40; }100inline bool IsDouble(ARM64Reg reg) { return (reg & 0xC0) == 0x80; }101inline bool IsScalar(ARM64Reg reg) { return IsSingle(reg) || IsDouble(reg); }102inline bool IsQuad(ARM64Reg reg) { return (reg & 0xC0) == 0xC0; }103inline bool IsVector(ARM64Reg reg) { return (reg & 0xC0) != 0; }104inline bool IsGPR(ARM64Reg reg) { return (int)reg < 0x40; }105106int CountLeadingZeros(uint64_t value, int width);107108inline ARM64Reg DecodeReg(ARM64Reg reg) { return (ARM64Reg)(reg & 0x1F); }109inline ARM64Reg EncodeRegTo64(ARM64Reg reg) { return (ARM64Reg)(reg | 0x20); }110inline ARM64Reg EncodeRegToSingle(ARM64Reg reg) { return (ARM64Reg)(DecodeReg(reg) + S0); }111inline ARM64Reg EncodeRegToDouble(ARM64Reg reg) { return (ARM64Reg)((reg & ~0xC0) | 0x80); }112inline ARM64Reg EncodeRegToQuad(ARM64Reg reg) { return (ARM64Reg)(reg | 0xC0); }113114// For AND/TST/ORR/EOR etc115bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r);116// For ADD/SUB117bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift);118119float FPImm8ToFloat(uint8_t bits);120bool FPImm8FromFloat(float value, uint8_t *immOut);121122enum OpType123{124TYPE_IMM = 0,125TYPE_REG,126TYPE_IMMSREG,127TYPE_RSR,128TYPE_MEM129};130131enum ShiftType132{133ST_LSL = 0,134ST_LSR = 1,135ST_ASR = 2,136ST_ROR = 3,137};138139enum IndexType140{141INDEX_UNSIGNED = 0,142INDEX_POST = 1,143INDEX_PRE = 2,144INDEX_SIGNED = 3, // used in LDP/STP145};146147enum ShiftAmount148{149SHIFT_0 = 0,150SHIFT_16 = 1,151SHIFT_32 = 2,152SHIFT_48 = 3,153};154155enum RoundingMode {156ROUND_A, // round to nearest, ties to away157ROUND_M, // round towards -inf158ROUND_N, // round to nearest, ties to even159ROUND_P, // round towards +inf160ROUND_Z, // round towards zero161};162163struct FixupBranch164{165// Pointer to executable code address.166const u8 *ptr;167// Type defines168// 0 = CBZ (32bit)169// 1 = CBNZ (32bit)170// 2 = B (conditional)171// 3 = TBZ172// 4 = TBNZ173// 5 = B (unconditional)174// 6 = BL (unconditional)175u32 type;176177// Used with B.cond178CCFlags cond;179180// Used with TBZ/TBNZ181u8 bit;182183// Used with Test/Compare and Branch184ARM64Reg reg;185};186187enum PStateField188{189FIELD_SPSel = 0,190FIELD_DAIFSet,191FIELD_DAIFClr,192FIELD_NZCV, // The only system registers accessible from EL0 (user space)193FIELD_FPCR = 0x340,194FIELD_FPSR = 0x341,195};196197enum SystemHint198{199HINT_NOP = 0,200HINT_YIELD,201HINT_WFE,202HINT_WFI,203HINT_SEV,204HINT_SEVL,205};206207enum BarrierType208{209OSHLD = 1,210OSHST = 2,211OSH = 3,212NSHLD = 5,213NSHST = 6,214NSH = 7,215ISHLD = 9,216ISHST = 10,217ISH = 11,218LD = 13,219ST = 14,220SY = 15,221};222223class ArithOption224{225public:226enum WidthSpecifier227{228WIDTH_DEFAULT,229WIDTH_32BIT,230WIDTH_64BIT,231};232233enum ExtendSpecifier234{235EXTEND_UXTB = 0x0,236EXTEND_UXTH = 0x1,237EXTEND_UXTW = 0x2, /* Also LSL on 32bit width */238EXTEND_UXTX = 0x3, /* Also LSL on 64bit width */239EXTEND_SXTB = 0x4,240EXTEND_SXTH = 0x5,241EXTEND_SXTW = 0x6,242EXTEND_SXTX = 0x7,243};244245enum TypeSpecifier246{247TYPE_EXTENDEDREG,248TYPE_IMM,249TYPE_SHIFTEDREG,250};251252private:253ARM64Reg m_destReg;254WidthSpecifier m_width;255ExtendSpecifier m_extend = EXTEND_UXTB;256TypeSpecifier m_type;257ShiftType m_shifttype;258u32 m_shift;259260public:261ArithOption(ARM64Reg Rd, bool index = false)262{263// Indexed registers are a certain feature of AARch64264// On Loadstore instructions that use a register offset265// We can have the register as an index266// If we are indexing then the offset register will267// be shifted to the left so we are indexing at intervals268// of the size of what we are loading269// 8-bit: Index does nothing270// 16-bit: Index LSL 1271// 32-bit: Index LSL 2272// 64-bit: Index LSL 3273if (index)274m_shift = 4;275else276m_shift = 0;277278m_destReg = Rd;279m_type = TYPE_EXTENDEDREG;280if (Is64Bit(Rd))281{282m_width = WIDTH_64BIT;283m_extend = EXTEND_UXTX;284}285else286{287m_width = WIDTH_32BIT;288m_extend = EXTEND_UXTW;289}290m_shifttype = ST_LSL;291}292ArithOption(ARM64Reg Rd, bool index, bool signExtend) {293if (index)294m_shift = 4;295else296m_shift = 0;297298m_destReg = Rd;299m_type = TYPE_EXTENDEDREG;300if (Is64Bit(Rd)) {301m_width = WIDTH_64BIT;302m_extend = EXTEND_UXTX;303} else {304m_width = WIDTH_32BIT;305m_extend = signExtend ? EXTEND_SXTW : EXTEND_UXTW;306}307m_shifttype = ST_LSL;308}309ArithOption(ARM64Reg Rd, ShiftType shift_type, u32 shift)310{311m_destReg = Rd;312m_shift = shift;313m_shifttype = shift_type;314m_type = TYPE_SHIFTEDREG;315if (Is64Bit(Rd))316{317m_width = WIDTH_64BIT;318if (shift == 64)319m_shift = 0;320}321else322{323m_width = WIDTH_32BIT;324if (shift == 32)325m_shift = 0;326}327}328TypeSpecifier GetType() const329{330return m_type;331}332ARM64Reg GetReg() const333{334return m_destReg;335}336u32 GetData() const337{338switch (m_type)339{340case TYPE_EXTENDEDREG:341return (m_extend << 13) |342(m_shift << 10);343break;344case TYPE_SHIFTEDREG:345return (m_shifttype << 22) |346(m_shift << 10);347break;348default:349_dbg_assert_msg_(false, "Invalid type in GetData");350break;351}352return 0;353}354};355356class ARM64XEmitter357{358friend class ARM64FloatEmitter;359friend class ARM64CodeBlock;360361private:362const u8 *m_code = nullptr;363u8 *m_writable = nullptr;364const u8 *m_lastCacheFlushEnd = nullptr;365366void EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr);367void EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr);368void EncodeUnconditionalBranchInst(u32 op, const void* ptr);369void EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn);370void EncodeExceptionInst(u32 instenc, u32 imm);371void EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt);372void EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);373void EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);374void EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);375void EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);376void EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);377void EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn);378void EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);379void EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);380void EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);381void EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm);382void EncodeLoadStoreExcInst(u32 instenc, ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt);383void EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);384void EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm);385void EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size);386void EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos);387void EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);388void EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);389void EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd);390void EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n);391void EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);392void EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm);393void EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);394395protected:396inline void Write32(u32 value)397{398*(u32 *)m_writable = value;399m_code += 4;400m_writable += 4;401}402403public:404ARM64XEmitter()405{406}407408ARM64XEmitter(const u8 *codePtr, u8 *writablePtr);409410virtual ~ARM64XEmitter()411{412}413414void SetCodePointer(const u8 *ptr, u8 *writePtr);415const u8* GetCodePointer() const;416417void ReserveCodeSpace(u32 bytes);418const u8* AlignCode16();419const u8* AlignCodePage();420const u8 *NopAlignCode16();421void FlushIcache();422void FlushIcacheSection(const u8* start, const u8* end);423u8* GetWritableCodePtr();424425// FixupBranch branching426void SetJumpTarget(FixupBranch const& branch);427FixupBranch CBZ(ARM64Reg Rt);428FixupBranch CBNZ(ARM64Reg Rt);429FixupBranch B(CCFlags cond);430FixupBranch TBZ(ARM64Reg Rt, u8 bit);431FixupBranch TBNZ(ARM64Reg Rt, u8 bit);432FixupBranch B();433FixupBranch BL();434435// Compare and Branch436void CBZ(ARM64Reg Rt, const void* ptr);437void CBNZ(ARM64Reg Rt, const void* ptr);438439// Conditional Branch440void B(CCFlags cond, const void* ptr);441442// Test and Branch443void TBZ(ARM64Reg Rt, u8 bits, const void* ptr);444void TBNZ(ARM64Reg Rt, u8 bits, const void* ptr);445446// Unconditional Branch447void B(const void* ptr);448void BL(const void* ptr);449450// Unconditional Branch (register)451void BR(ARM64Reg Rn);452void BLR(ARM64Reg Rn);453void RET(ARM64Reg Rn = X30);454void ERET();455void DRPS();456457// Exception generation458void SVC(u32 imm);459void HVC(u32 imm);460void SMC(u32 imm);461void BRK(u32 imm);462void HLT(u32 imm);463void DCPS1(u32 imm);464void DCPS2(u32 imm);465void DCPS3(u32 imm);466467// System468void _MSR(PStateField field, u8 imm);469470void _MSR(PStateField field, ARM64Reg Rt);471void MRS(ARM64Reg Rt, PStateField field);472473void HINT(SystemHint op);474void CLREX();475void DSB(BarrierType type);476void DMB(BarrierType type);477void ISB(BarrierType type);478479// Add/Subtract (Extended/Shifted register)480void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);481void ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);482void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);483void ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);484void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);485void SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);486void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);487void SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);488void CMN(ARM64Reg Rn, ARM64Reg Rm);489void CMN(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);490void CMP(ARM64Reg Rn, ARM64Reg Rm);491void CMP(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option);492493// Add/Subtract (with carry)494void ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);495void ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);496void SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);497void SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);498499// Conditional Compare (immediate)500void CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);501void CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond);502503// Conditional Compare (register)504void CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);505void CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond);506507// Conditional Select508void CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);509void CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);510void CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);511void CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);512513// Aliases514void CSET(ARM64Reg Rd, CCFlags cond) {515ARM64Reg zr = Is64Bit(Rd) ? ZR : WZR;516CSINC(Rd, zr, zr, (CCFlags)((u32)cond ^ 1));517}518void NEG(ARM64Reg Rd, ARM64Reg Rs) {519SUB(Rd, Is64Bit(Rd) ? ZR : WZR, Rs);520}521522// Data-Processing 1 source523void RBIT(ARM64Reg Rd, ARM64Reg Rn);524void REV16(ARM64Reg Rd, ARM64Reg Rn);525void REV32(ARM64Reg Rd, ARM64Reg Rn);526void REV64(ARM64Reg Rd, ARM64Reg Rn);527void CLZ(ARM64Reg Rd, ARM64Reg Rn);528void CLS(ARM64Reg Rd, ARM64Reg Rn);529530// Data-Processing 2 source531void UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);532void SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);533void LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);534void LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);535void ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);536void RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);537void CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);538void CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);539void CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);540void CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);541void CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);542void CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);543void CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);544void CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);545546// Data-Processing 3 source547void MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);548void MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);549void SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);550void SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);551void SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);552void SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);553void UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);554void UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);555void UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);556void UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);557void MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);558void MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);559560// Logical (shifted register)561void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);562void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);563void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);564void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);565void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);566void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);567void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);568void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);569void TST(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift);570571// Wrap the above for saner syntax572void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { AND(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }573void BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BIC(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }574void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }575void ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ORN(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }576void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EOR(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }577void EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { EON(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }578void ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { ANDS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }579void BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) { BICS(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0)); }580void TST(ARM64Reg Rn, ARM64Reg Rm) { TST(Rn, Rm, ArithOption(Is64Bit(Rn) ? ZR : WZR, ST_LSL, 0)); }581582// Convenience wrappers around ORR. These match the official convenience syntax.583void MOV(ARM64Reg Rd, ARM64Reg Rm, const ArithOption &Shift);584void MOV(ARM64Reg Rd, ARM64Reg Rm);585void MVN(ARM64Reg Rd, ARM64Reg Rm);586587// Wrapper around ADD reg, reg, imm.588void MOVfromSP(ARM64Reg Rd);589void MOVtoSP(ARM64Reg Rn);590591// TODO: These are "slow" as they use arith+shift, should be replaced with UBFM/EXTR variants.592void LSR(ARM64Reg Rd, ARM64Reg Rm, int shift);593void LSL(ARM64Reg Rd, ARM64Reg Rm, int shift);594void ASR(ARM64Reg Rd, ARM64Reg Rm, int shift);595void ROR(ARM64Reg Rd, ARM64Reg Rm, int shift);596597// Logical (immediate)598void AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);599void ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);600void EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);601void ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);602void TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert = false);603604// Add/subtract (immediate)605void ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);606void ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);607void SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);608void SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift = false);609void CMP(ARM64Reg Rn, u32 imm, bool shift = false);610void CMN(ARM64Reg Rn, u32 imm, bool shift = false);611612// Data Processing (Immediate)613void MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);614void MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);615void MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos = SHIFT_0);616617// Bitfield move618void BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);619void SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);620void UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms);621void BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);622void UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width);623624// Extract register (ROR with two inputs, if same then faster on A67)625void EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift);626627// Aliases628void SXTB(ARM64Reg Rd, ARM64Reg Rn);629void SXTH(ARM64Reg Rd, ARM64Reg Rn);630void SXTW(ARM64Reg Rd, ARM64Reg Rn);631void UXTB(ARM64Reg Rd, ARM64Reg Rn);632void UXTH(ARM64Reg Rd, ARM64Reg Rn);633634void UBFX(ARM64Reg Rd, ARM64Reg Rn, int lsb, int width) {635UBFM(Rd, Rn, lsb, lsb + width - 1);636}637638// Load Register (Literal)639void LDR(ARM64Reg Rt, u32 imm);640void LDRSW(ARM64Reg Rt, u32 imm);641void PRFM(ARM64Reg Rt, u32 imm);642643// Load/Store Exclusive644void STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);645void STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);646void LDXRB(ARM64Reg Rt, ARM64Reg Rn);647void LDAXRB(ARM64Reg Rt, ARM64Reg Rn);648void STLRB(ARM64Reg Rt, ARM64Reg Rn);649void LDARB(ARM64Reg Rt, ARM64Reg Rn);650void STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);651void STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);652void LDXRH(ARM64Reg Rt, ARM64Reg Rn);653void LDAXRH(ARM64Reg Rt, ARM64Reg Rn);654void STLRH(ARM64Reg Rt, ARM64Reg Rn);655void LDARH(ARM64Reg Rt, ARM64Reg Rn);656void STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);657void STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn);658void STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);659void STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);660void LDXR(ARM64Reg Rt, ARM64Reg Rn);661void LDAXR(ARM64Reg Rt, ARM64Reg Rn);662void LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);663void LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn);664void STLR(ARM64Reg Rt, ARM64Reg Rn);665void LDAR(ARM64Reg Rt, ARM64Reg Rn);666667// Load/Store no-allocate pair (offset)668void STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);669void LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm);670671// Load/Store register (immediate indexed)672void STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);673void LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);674void LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);675void STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);676void LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);677void LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);678void STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);679void LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);680void LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);681682// Load/Store register (register offset)683void STRB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);684void LDRB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);685void LDRSB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);686void STRH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);687void LDRH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);688void LDRSH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);689void STR(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);690void LDR(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);691void LDRSW(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);692void PRFM(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);693694// Load/Store register (unscaled offset)695void STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);696void LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);697void LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm);698void STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);699void LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);700void LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm);701void STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);702void LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm);703void LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm);704705// Load/Store pair706void LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);707void LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);708void STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);709710// Address of label/page PC-relative711void ADR(ARM64Reg Rd, s32 imm);712void ADRP(ARM64Reg Rd, s32 imm);713714// Wrapper around MOVZ+MOVK715void MOVI2R(ARM64Reg Rd, u64 imm, bool optimize = true);716template <class P>717void MOVP2R(ARM64Reg Rd, P *ptr) {718_assert_msg_(Is64Bit(Rd), "Can't store pointers in 32-bit registers");719MOVI2R(Rd, (uintptr_t)ptr);720}721722// Wrapper around AND x, y, imm etc. If you are sure the imm will work, no need to pass a scratch register.723void ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);724void ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);725void TSTI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG) { ANDSI2R(Is64Bit(Rn) ? ZR : WZR, Rn, imm, scratch); }726void ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);727void EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);728void CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);729730void ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);731void SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);732void SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch = INVALID_REG);733734bool TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);735bool TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);736bool TryCMPI2R(ARM64Reg Rn, u64 imm);737738bool TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);739bool TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);740bool TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm);741742// Pseudo-instruction for convenience. PUSH pushes 16 bytes even though we only push a single register.743// This is so the stack pointer is always 16-byte aligned, which is checked by hardware!744void PUSH(ARM64Reg Rd);745void POP(ARM64Reg Rd);746void PUSH2(ARM64Reg Rd, ARM64Reg Rn);747void POP2(ARM64Reg Rd, ARM64Reg Rn);748749750// Utility to generate a call to a std::function object.751//752// Unfortunately, calling operator() directly is undefined behavior in C++753// (this method might be a thunk in the case of multi-inheritance) so we754// have to go through a trampoline function.755template <typename T, typename... Args>756static void CallLambdaTrampoline(const std::function<T(Args...)>* f,757Args... args)758{759(*f)(args...);760}761762// This function expects you to have set up the state.763// Overwrites X0 and X30764template <typename T, typename... Args>765ARM64Reg ABI_SetupLambda(const std::function<T(Args...)>* f)766{767auto trampoline = &ARM64XEmitter::CallLambdaTrampoline<T, Args...>;768MOVI2R(X30, (uintptr_t)trampoline);769MOVI2R(X0, (uintptr_t)const_cast<void*>((const void*)f));770return X30;771}772773// Plain function call774void QuickCallFunction(ARM64Reg scratchreg, const void *func);775template <typename T> void QuickCallFunction(ARM64Reg scratchreg, T func) {776QuickCallFunction(scratchreg, (const void *)func);777}778};779780class ARM64FloatEmitter781{782public:783ARM64FloatEmitter(ARM64XEmitter* emit) : m_emit(emit) {}784785void LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);786void STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);787788// Loadstore unscaled789void LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);790void STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm);791792// Loadstore single structure793void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);794void LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);795void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn);796void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn);797void LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);798void LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);799void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn);800void ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm);801802// Loadstore multiple structure803void LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);804void LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);805void ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn);806void ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm = SP);807808// Loadstore paired809void LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);810void STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);811812// Loadstore register offset813void STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);814void LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);815816// Scalar - 1 Source817void FABS(ARM64Reg Rd, ARM64Reg Rn);818void FNEG(ARM64Reg Rd, ARM64Reg Rn);819void FSQRT(ARM64Reg Rd, ARM64Reg Rn);820void FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top = false); // Also generalized move between GPR/FP821822// Scalar - pairwise823void FADDP(ARM64Reg Rd, ARM64Reg Rn);824void FMAXP(ARM64Reg Rd, ARM64Reg Rn);825void FMINP(ARM64Reg Rd, ARM64Reg Rn);826void FMAXNMP(ARM64Reg Rd, ARM64Reg Rn);827void FMINNMP(ARM64Reg Rd, ARM64Reg Rn);828829// Scalar - 2 Source830void FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);831void FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);832void FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);833void FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);834void FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);835void FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);836void FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);837void FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);838void FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);839840// Scalar - 3 Source. Note - the accumulator is last on ARM!841void FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);842void FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);843void FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);844void FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra);845846// Scalar floating point immediate847void FMOV(ARM64Reg Rd, uint8_t imm8);848849// Vector850void AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);851void EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);852void BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);853void BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);854void BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);855void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);856void FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn);857void FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);858void FADDP(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);859void FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);860void FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);861void FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);862void FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);863void FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn);864void FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn);865void FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);866void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn);867void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn);868void FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);869void FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);870void FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);871void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);872void FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn);873void FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn);874void FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);875void NOT(ARM64Reg Rd, ARM64Reg Rn);876void ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);877void MOV(ARM64Reg Rd, ARM64Reg Rn) {878ORR(Rd, Rn, Rn);879}880881void UMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);882void UMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);883void SMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);884void SMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);885886void REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn);887void REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn);888void REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn);889void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);890void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn);891void SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);892void UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale);893void SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);894void SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);895void UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);896void UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);897void XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);898void XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn);899900void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);901void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);902void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);903void CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);904void CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);905void CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);906void CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);907void CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);908void CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);909void CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);910void CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);911912// Move913void DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn);914void INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn);915void INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2);916void UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);917void SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index);918919// Vector immediates920void FMOV(u8 size, ARM64Reg Rd, u8 imm8);921// MSL means bits shifted in are 1s. For size=64, each bit of imm8 is expanded to 8 actual bits.922void MOVI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0, bool MSL = false);923void MVNI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0, bool MSL = false);924void ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);925void BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift = 0);926927bool TryMOVI(u8 size, ARM64Reg Rd, uint64_t value);928// Allow using a different size. Unclear if there's a penalty.929bool TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t value);930931// One source932void FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn);933934// Scalar convert float to int, in a lot of variants.935// Note that the scalar version of this operation has two encodings, one that goes to an integer register936// and one that outputs to a scalar fp register.937void FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);938void FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round);939void FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale);940void FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale);941942// Scalar convert int to float. No rounding mode specifier necessary.943void SCVTF(ARM64Reg Rd, ARM64Reg Rn);944void UCVTF(ARM64Reg Rd, ARM64Reg Rn);945946// Scalar fixed point to float. scale is the number of fractional bits.947void SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);948void UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale);949950// Float comparison951void FCMP(ARM64Reg Rn, ARM64Reg Rm);952void FCMP(ARM64Reg Rn);953void FCMPE(ARM64Reg Rn, ARM64Reg Rm);954void FCMPE(ARM64Reg Rn);955void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);956void FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn);957void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);958void FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn);959void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);960void FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn);961void FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn);962void FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn);963964// Conditional select965void FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond);966967// Conditional compare968void FCCMP(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond);969void FCCMPE(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond);970971// Permute972void UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);973void TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);974void ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);975void UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);976void TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);977void ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);978// Related to permute, extract vector from pair (always by byte arrangement.)979void EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, int index);980981// Shift by immediate982void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);983void SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);984void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);985void USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);986// Shift == src_size for these.987void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);988void SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);989void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);990void SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);991void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);992void SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);993void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);994void UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn);995996void SHL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);997void USHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);998void SSHR(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift);9991000// vector x indexed element1001void FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);1002void FMLA(u8 esize, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index);10031004void MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);1005void MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch = INVALID_REG, bool negate = false);10061007// ABI related1008void ABI_PushRegisters(uint32_t gpr_registers, uint32_t fp_registers);1009void ABI_PopRegisters(uint32_t gpr_registers, uint32_t fp_registers);10101011private:1012ARM64XEmitter* m_emit;1013inline void Write32(u32 value) { m_emit->Write32(value); }10141015// Emitting functions1016void EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm);1017void EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);1018void EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);1019void EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn);1020void EmitScalarPairwise(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);1021void Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);1022void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn);1023void EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);1024void Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);1025void EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);1026void EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn);1027void EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm);1028void EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);1029void EmitCondCompare(bool M, bool S, CCFlags cond, int op, u8 nzcv, ARM64Reg Rn, ARM64Reg Rm);1030void EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);1031void EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8);1032void EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);1033void EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);1034void EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn);1035void EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm);1036void EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn);1037void EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm);1038void EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm);1039void EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign);1040void EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode);1041void EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm);1042void EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm);1043void EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh);10441045void SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);1046void USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);1047void SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);1048void SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper);1049void SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);1050void UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper);1051};10521053class ARM64CodeBlock : public CodeBlock<ARM64XEmitter>1054{1055private:1056void PoisonMemory(int offset) override;1057};1058}105910601061