CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/Arm64Emitter.cpp
Views: 1401
// Copyright 2013 Dolphin Emulator Project1// Licensed under GPLv22// Refer to the license.txt file included.34#include "ppsspp_config.h"56#include <limits>7#include <vector>8#include <cmath>9#include <cinttypes>1011#include <cstdlib>12#include <cstring>1314#include "Common/Arm64Emitter.h"15#include "Common/Math/math_util.h"16#include "Common/CommonTypes.h"17#include "Common/CommonWindows.h"18#include "Common/CPUDetect.h"19#include "Common/Log.h"2021#if PPSSPP_PLATFORM(IOS) || PPSSPP_PLATFORM(MAC)22#include <libkern/OSCacheControl.h>23#endif2425namespace Arm64Gen26{2728const int kWRegSizeInBits = 32;29const int kXRegSizeInBits = 64;3031// The below few functions are taken from V8.32int CountLeadingZeros(uint64_t value, int width) {33// TODO(jbramley): Optimize this for ARM64 hosts.34int count = 0;35uint64_t bit_test = 1ULL << (width - 1);36while ((count < width) && ((bit_test & value) == 0)) {37count++;38bit_test >>= 1;39}40return count;41}4243uint64_t LargestPowerOf2Divisor(uint64_t value) {44return value & -(int64_t)value;45}4647bool IsPowerOfTwo(uint64_t x) {48return (x != 0) && ((x & (x - 1)) == 0);49}5051#define V8_UINT64_C(x) ((uint64_t)(x))5253bool IsImmArithmetic(uint64_t input, u32 *val, bool *shift) {54if (input < 4096) {55if (val) *val = (uint32_t)input;56if (shift) *shift = false;57return true;58} else if ((input & 0xFFF000) == input) {59if (val) *val = (uint32_t)(input >> 12);60if (shift) *shift = true;61return true;62}63return false;64}6566bool IsImmLogical(uint64_t value, unsigned int width, unsigned int *n, unsigned int *imm_s, unsigned int *imm_r) {67//DCHECK((n != NULL) && (imm_s != NULL) && (imm_r != NULL));68// DCHECK((width == kWRegSizeInBits) || (width == kXRegSizeInBits));6970bool negate = false;7172// Logical immediates are encoded using parameters n, imm_s and imm_r using73// the following table:74//75// N imms immr size S R76// 1 ssssss rrrrrr 64 UInt(ssssss) UInt(rrrrrr)77// 0 0sssss xrrrrr 32 UInt(sssss) UInt(rrrrr)78// 0 10ssss xxrrrr 16 UInt(ssss) UInt(rrrr)79// 0 110sss xxxrrr 8 UInt(sss) UInt(rrr)80// 0 1110ss xxxxrr 4 UInt(ss) UInt(rr)81// 0 11110s xxxxxr 2 UInt(s) UInt(r)82// (s bits must not be all set)83//84// A pattern is constructed of size bits, where the least significant S+1 bits85// are set. The pattern is rotated right by R, and repeated across a 32 or86// 64-bit value, depending on destination register width.87//88// Put another way: the basic format of a logical immediate is a single89// contiguous stretch of 1 bits, repeated across the whole word at intervals90// given by a power of 2. To identify them quickly, we first locate the91// lowest stretch of 1 bits, then the next 1 bit above that; that combination92// is different for every logical immediate, so it gives us all the93// information we need to identify the only logical immediate that our input94// could be, and then we simply check if that's the value we actually have.95//96// (The rotation parameter does give the possibility of the stretch of 1 bits97// going 'round the end' of the word. To deal with that, we observe that in98// any situation where that happens the bitwise NOT of the value is also a99// valid logical immediate. So we simply invert the input whenever its low bit100// is set, and then we know that the rotated case can't arise.)101102if (value & 1) {103// If the low bit is 1, negate the value, and set a flag to remember that we104// did (so that we can adjust the return values appropriately).105negate = true;106value = ~value;107}108109if (width == kWRegSizeInBits) {110// To handle 32-bit logical immediates, the very easiest thing is to repeat111// the input value twice to make a 64-bit word. The correct encoding of that112// as a logical immediate will also be the correct encoding of the 32-bit113// value.114115// The most-significant 32 bits may not be zero (ie. negate is true) so116// shift the value left before duplicating it.117value <<= kWRegSizeInBits;118value |= value >> kWRegSizeInBits;119}120121// The basic analysis idea: imagine our input word looks like this.122//123// 0011111000111110001111100011111000111110001111100011111000111110124// c b a125// |<--d-->|126//127// We find the lowest set bit (as an actual power-of-2 value, not its index)128// and call it a. Then we add a to our original number, which wipes out the129// bottommost stretch of set bits and replaces it with a 1 carried into the130// next zero bit. Then we look for the new lowest set bit, which is in131// position b, and subtract it, so now our number is just like the original132// but with the lowest stretch of set bits completely gone. Now we find the133// lowest set bit again, which is position c in the diagram above. Then we'll134// measure the distance d between bit positions a and c (using CLZ), and that135// tells us that the only valid logical immediate that could possibly be equal136// to this number is the one in which a stretch of bits running from a to just137// below b is replicated every d bits.138uint64_t a = LargestPowerOf2Divisor(value);139uint64_t value_plus_a = value + a;140uint64_t b = LargestPowerOf2Divisor(value_plus_a);141uint64_t value_plus_a_minus_b = value_plus_a - b;142uint64_t c = LargestPowerOf2Divisor(value_plus_a_minus_b);143144int d, clz_a, out_n;145uint64_t mask;146147if (c != 0) {148// The general case, in which there is more than one stretch of set bits.149// Compute the repeat distance d, and set up a bitmask covering the basic150// unit of repetition (i.e. a word with the bottom d bits set). Also, in all151// of these cases the N bit of the output will be zero.152clz_a = CountLeadingZeros(a, kXRegSizeInBits);153int clz_c = CountLeadingZeros(c, kXRegSizeInBits);154d = clz_a - clz_c;155mask = ((V8_UINT64_C(1) << d) - 1);156out_n = 0;157} else {158// Handle degenerate cases.159//160// If any of those 'find lowest set bit' operations didn't find a set bit at161// all, then the word will have been zero thereafter, so in particular the162// last lowest_set_bit operation will have returned zero. So we can test for163// all the special case conditions in one go by seeing if c is zero.164if (a == 0) {165// The input was zero (or all 1 bits, which will come to here too after we166// inverted it at the start of the function), for which we just return167// false.168return false;169} else {170// Otherwise, if c was zero but a was not, then there's just one stretch171// of set bits in our word, meaning that we have the trivial case of172// d == 64 and only one 'repetition'. Set up all the same variables as in173// the general case above, and set the N bit in the output.174clz_a = CountLeadingZeros(a, kXRegSizeInBits);175d = 64;176mask = ~V8_UINT64_C(0);177out_n = 1;178}179}180181// If the repeat period d is not a power of two, it can't be encoded.182if (!IsPowerOfTwo(d)) {183return false;184}185186if (((b - a) & ~mask) != 0) {187// If the bit stretch (b - a) does not fit within the mask derived from the188// repeat period, then fail.189return false;190}191192// The only possible option is b - a repeated every d bits. Now we're going to193// actually construct the valid logical immediate derived from that194// specification, and see if it equals our original input.195//196// To repeat a value every d bits, we multiply it by a number of the form197// (1 + 2^d + 2^(2d) + ...), i.e. 0x0001000100010001 or similar. These can198// be derived using a table lookup on CLZ(d).199static const uint64_t multipliers[] = {2000x0000000000000001UL,2010x0000000100000001UL,2020x0001000100010001UL,2030x0101010101010101UL,2040x1111111111111111UL,2050x5555555555555555UL,206};207int multiplier_idx = CountLeadingZeros(d, kXRegSizeInBits) - 57;208// Ensure that the index to the multipliers array is within bounds.209_dbg_assert_((multiplier_idx >= 0) &&210(static_cast<size_t>(multiplier_idx) < ARRAY_SIZE(multipliers)));211uint64_t multiplier = multipliers[multiplier_idx];212uint64_t candidate = (b - a) * multiplier;213214if (value != candidate) {215// The candidate pattern doesn't match our input value, so fail.216return false;217}218219// We have a match! This is a valid logical immediate, so now we have to220// construct the bits and pieces of the instruction encoding that generates221// it.222223// Count the set bits in our basic stretch. The special case of clz(0) == -1224// makes the answer come out right for stretches that reach the very top of225// the word (e.g. numbers like 0xffffc00000000000).226int clz_b = (b == 0) ? -1 : CountLeadingZeros(b, kXRegSizeInBits);227int s = clz_a - clz_b;228229// Decide how many bits to rotate right by, to put the low bit of that basic230// stretch in position a.231int r;232if (negate) {233// If we inverted the input right at the start of this function, here's234// where we compensate: the number of set bits becomes the number of clear235// bits, and the rotation count is based on position b rather than position236// a (since b is the location of the 'lowest' 1 bit after inversion).237s = d - s;238r = (clz_b + 1) & (d - 1);239} else {240r = (clz_a + 1) & (d - 1);241}242243// Now we're done, except for having to encode the S output in such a way that244// it gives both the number of set bits and the length of the repeated245// segment. The s field is encoded like this:246//247// imms size S248// ssssss 64 UInt(ssssss)249// 0sssss 32 UInt(sssss)250// 10ssss 16 UInt(ssss)251// 110sss 8 UInt(sss)252// 1110ss 4 UInt(ss)253// 11110s 2 UInt(s)254//255// So we 'or' (-d << 1) with our computed s to form imms.256*n = out_n;257*imm_s = ((-d << 1) | (s - 1)) & 0x3f;258*imm_r = r;259260return true;261}262263static int EncodeSize(int size) {264switch (size) {265case 8: return 0;266case 16: return 1;267case 32: return 2;268case 64: return 3;269default: return 0;270}271}272273ARM64XEmitter::ARM64XEmitter(const u8 *ptr, u8 *writePtr) {274SetCodePointer(ptr, writePtr);275}276277void ARM64XEmitter::SetCodePointer(const u8 *ptr, u8 *writePtr)278{279m_code = ptr;280m_writable = writePtr;281m_lastCacheFlushEnd = ptr;282}283284const u8* ARM64XEmitter::GetCodePointer() const285{286return m_code;287}288289u8* ARM64XEmitter::GetWritableCodePtr()290{291return m_writable;292}293294void ARM64XEmitter::ReserveCodeSpace(u32 bytes)295{296for (u32 i = 0; i < bytes/4; i++)297BRK(0);298}299300const u8* ARM64XEmitter::AlignCode16()301{302int c = int((u64)m_code & 15);303if (c)304ReserveCodeSpace(16 - c);305return m_code;306}307308const u8* ARM64XEmitter::AlignCodePage()309{310int page_size = GetMemoryProtectPageSize();311int c = int((u64)m_code & (page_size - 1));312if (c)313ReserveCodeSpace(page_size - c);314return m_code;315}316317const u8 *ARM64XEmitter::NopAlignCode16() {318int bytes = ((-(intptr_t)m_code) & 15);319for (int i = 0; i < bytes / 4; i++) {320Write32(0xD503201F); // official nop instruction321}322return m_code;323}324325void ARM64XEmitter::FlushIcache()326{327FlushIcacheSection(m_lastCacheFlushEnd, m_code);328m_lastCacheFlushEnd = m_code;329}330331void ARM64XEmitter::FlushIcacheSection(const u8 *start, const u8 *end)332{333#if PPSSPP_PLATFORM(IOS) || PPSSPP_PLATFORM(MAC)334// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);335sys_cache_control(kCacheFunctionPrepareForExecution, (void *)start, end - start);336#elif PPSSPP_PLATFORM(WINDOWS)337FlushInstructionCache(GetCurrentProcess(), start, end - start);338#elif PPSSPP_ARCH(ARM64)339// Code from Dolphin, contributed by the Mono project.340341size_t isize, dsize;342if (cpu_info.sQuirks.bExynos8890DifferingCachelineSizes) {343// Don't rely on GCC's __clear_cache implementation, as it caches344// icache/dcache cache line sizes, that can vary between cores on345// very buggy big.LITTLE architectures like Exynos8890D.346// Enforce the minimum cache line size to be completely safe on these CPUs.347isize = 64;348dsize = 64;349} else {350u64 ctr_el0;351static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;352__asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));353isize = 4 << ((ctr_el0 >> 0) & 0xf);354dsize = 4 << ((ctr_el0 >> 16) & 0xf);355356// use the global minimum cache line size357icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;358dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;359}360361u64 addr = (u64)start & ~(u64)(dsize - 1);362for (; addr < (u64)end; addr += dsize)363// use "civac" instead of "cvau", as this is the suggested workaround for364// Cortex-A53 errata 819472, 826319, 827319 and 824069.365__asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");366__asm__ volatile("dsb ish" : : : "memory");367368addr = (u64)start & ~(u64)(isize - 1);369for (; addr < (u64)end; addr += isize)370__asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");371372__asm__ volatile("dsb ish" : : : "memory");373__asm__ volatile("isb" : : : "memory");374#endif375}376377// Exception generation378static const u32 ExcEnc[][3] = {379{0, 0, 1}, // SVC380{0, 0, 2}, // HVC381{0, 0, 3}, // SMC382{1, 0, 0}, // BRK383{2, 0, 0}, // HLT384{5, 0, 1}, // DCPS1385{5, 0, 2}, // DCPS2386{5, 0, 3}, // DCPS3387};388389// Arithmetic generation390static const u32 ArithEnc[] = {3910x058, // ADD3920x258, // SUB393};394395// Conditional Select396static const u32 CondSelectEnc[][2] = {397{0, 0}, // CSEL398{0, 1}, // CSINC399{1, 0}, // CSINV400{1, 1}, // CSNEG401};402403// Data-Processing (1 source)404static const u32 Data1SrcEnc[][2] = {405{0, 0}, // RBIT406{0, 1}, // REV16407{0, 2}, // REV32408{0, 3}, // REV64409{0, 4}, // CLZ410{0, 5}, // CLS411};412413// Data-Processing (2 source)414static const u32 Data2SrcEnc[] = {4150x02, // UDIV4160x03, // SDIV4170x08, // LSLV4180x09, // LSRV4190x0A, // ASRV4200x0B, // RORV4210x10, // CRC32B4220x11, // CRC32H4230x12, // CRC32W4240x14, // CRC32CB4250x15, // CRC32CH4260x16, // CRC32CW4270x13, // CRC32X (64bit Only)4280x17, // XRC32CX (64bit Only)429};430431// Data-Processing (3 source)432static const u32 Data3SrcEnc[][2] = {433{0, 0}, // MADD434{0, 1}, // MSUB435{1, 0}, // SMADDL (64Bit Only)436{1, 1}, // SMSUBL (64Bit Only)437{2, 0}, // SMULH (64Bit Only)438{5, 0}, // UMADDL (64Bit Only)439{5, 1}, // UMSUBL (64Bit Only)440{6, 0}, // UMULH (64Bit Only)441};442443// Logical (shifted register)444static const u32 LogicalEnc[][2] = {445{0, 0}, // AND446{0, 1}, // BIC447{1, 0}, // OOR448{1, 1}, // ORN449{2, 0}, // EOR450{2, 1}, // EON451{3, 0}, // ANDS452{3, 1}, // BICS453};454455// Load/Store Exclusive456static const u32 LoadStoreExcEnc[][5] = {457{0, 0, 0, 0, 0}, // STXRB458{0, 0, 0, 0, 1}, // STLXRB459{0, 0, 1, 0, 0}, // LDXRB460{0, 0, 1, 0, 1}, // LDAXRB461{0, 1, 0, 0, 1}, // STLRB462{0, 1, 1, 0, 1}, // LDARB463{1, 0, 0, 0, 0}, // STXRH464{1, 0, 0, 0, 1}, // STLXRH465{1, 0, 1, 0, 0}, // LDXRH466{1, 0, 1, 0, 1}, // LDAXRH467{1, 1, 0, 0, 1}, // STLRH468{1, 1, 1, 0, 1}, // LDARH469{2, 0, 0, 0, 0}, // STXR470{3, 0, 0, 0, 0}, // (64bit) STXR471{2, 0, 0, 0, 1}, // STLXR472{3, 0, 0, 0, 1}, // (64bit) STLXR473{2, 0, 0, 1, 0}, // STXP474{3, 0, 0, 1, 0}, // (64bit) STXP475{2, 0, 0, 1, 1}, // STLXP476{3, 0, 0, 1, 1}, // (64bit) STLXP477{2, 0, 1, 0, 0}, // LDXR478{3, 0, 1, 0, 0}, // (64bit) LDXR479{2, 0, 1, 0, 1}, // LDAXR480{3, 0, 1, 0, 1}, // (64bit) LDAXR481{2, 0, 1, 1, 0}, // LDXP482{3, 0, 1, 1, 0}, // (64bit) LDXP483{2, 0, 1, 1, 1}, // LDAXP484{3, 0, 1, 1, 1}, // (64bit) LDAXP485{2, 1, 0, 0, 1}, // STLR486{3, 1, 0, 0, 1}, // (64bit) STLR487{2, 1, 1, 0, 1}, // LDAR488{3, 1, 1, 0, 1}, // (64bit) LDAR489};490491void ARM64XEmitter::EncodeCompareBranchInst(u32 op, ARM64Reg Rt, const void* ptr)492{493bool b64Bit = Is64Bit(Rt);494s64 distance = (s64)ptr - (s64)m_code;495496_assert_msg_(!(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);497498distance >>= 2;499500_assert_msg_(distance >= -0x40000 && distance <= 0x3FFFF, "%s: Received too large distance: %llx", __FUNCTION__, distance);501502Rt = DecodeReg(Rt);503Write32((b64Bit << 31) | (0x34 << 24) | (op << 24) | \504(((u32)distance << 5) & 0xFFFFE0) | Rt);505}506507void ARM64XEmitter::EncodeTestBranchInst(u32 op, ARM64Reg Rt, u8 bits, const void* ptr)508{509bool b64Bit = Is64Bit(Rt);510s64 distance = (s64)ptr - (s64)m_code;511512_assert_msg_(!(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);513514distance >>= 2;515516_assert_msg_(distance >= -0x2000 && distance <= 0x1FFF, "%s: Received too large distance: %llx", __FUNCTION__, distance);517518Rt = DecodeReg(Rt);519Write32((b64Bit << 31) | (0x36 << 24) | (op << 24) | \520(bits << 19) | (((u32)distance << 5) & 0x7FFE0) | Rt);521}522523void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 op, const void* ptr)524{525s64 distance = (s64)ptr - s64(m_code);526527_assert_msg_(!(distance & 0x3), "%s: distance must be a multiple of 4: %llx", __FUNCTION__, distance);528529distance >>= 2;530531_assert_msg_(distance >= -0x2000000LL && distance <= 0x1FFFFFFLL, "%s: Received too large distance: %llx", __FUNCTION__, distance);532533Write32((op << 31) | (0x5 << 26) | (distance & 0x3FFFFFF));534}535536void ARM64XEmitter::EncodeUnconditionalBranchInst(u32 opc, u32 op2, u32 op3, u32 op4, ARM64Reg Rn)537{538Rn = DecodeReg(Rn);539Write32((0x6B << 25) | (opc << 21) | (op2 << 16) | (op3 << 10) | (Rn << 5) | op4);540}541542void ARM64XEmitter::EncodeExceptionInst(u32 instenc, u32 imm)543{544_assert_msg_(!(imm & ~0xFFFF), "%s: Exception instruction too large immediate: %d", __FUNCTION__, imm);545546Write32((0xD4 << 24) | (ExcEnc[instenc][0] << 21) | (imm << 5) | (ExcEnc[instenc][1] << 2) | ExcEnc[instenc][2]);547}548549void ARM64XEmitter::EncodeSystemInst(u32 op0, u32 op1, u32 CRn, u32 CRm, u32 op2, ARM64Reg Rt)550{551Write32((0x354 << 22) | (op0 << 19) | (op1 << 16) | (CRn << 12) | (CRm << 8) | (op2 << 5) | Rt);552}553554void ARM64XEmitter::EncodeArithmeticInst(u32 instenc, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)555{556bool b64Bit = Is64Bit(Rd);557558Rd = DecodeReg(Rd);559Rn = DecodeReg(Rn);560Rm = DecodeReg(Rm);561Write32((b64Bit << 31) | (flags << 29) | (ArithEnc[instenc] << 21) | \562(Option.GetType() == ArithOption::TYPE_EXTENDEDREG ? (1 << 21) : 0) | (Rm << 16) | Option.GetData() | (Rn << 5) | Rd);563}564565void ARM64XEmitter::EncodeArithmeticCarryInst(u32 op, bool flags, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)566{567bool b64Bit = Is64Bit(Rd);568569Rd = DecodeReg(Rd);570Rm = DecodeReg(Rm);571Rn = DecodeReg(Rn);572Write32((b64Bit << 31) | (op << 30) | (flags << 29) | \573(0xD0 << 21) | (Rm << 16) | (Rn << 5) | Rd);574}575576void ARM64XEmitter::EncodeCondCompareImmInst(u32 op, ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)577{578bool b64Bit = Is64Bit(Rn);579580_assert_msg_(!(imm & ~0x1F), "%s: too large immediate: %d", __FUNCTION__, imm)581_assert_msg_(!(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv)582583Rn = DecodeReg(Rn);584Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \585(imm << 16) | (cond << 12) | (1 << 11) | (Rn << 5) | nzcv);586}587588void ARM64XEmitter::EncodeCondCompareRegInst(u32 op, ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)589{590bool b64Bit = Is64Bit(Rm);591592_assert_msg_(!(nzcv & ~0xF), "%s: Flags out of range: %d", __FUNCTION__, nzcv)593594Rm = DecodeReg(Rm);595Rn = DecodeReg(Rn);596Write32((b64Bit << 31) | (op << 30) | (1 << 29) | (0xD2 << 21) | \597(Rm << 16) | (cond << 12) | (Rn << 5) | nzcv);598}599600void ARM64XEmitter::EncodeCondSelectInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)601{602bool b64Bit = Is64Bit(Rd);603604Rd = DecodeReg(Rd);605Rm = DecodeReg(Rm);606Rn = DecodeReg(Rn);607Write32((b64Bit << 31) | (CondSelectEnc[instenc][0] << 30) | \608(0xD4 << 21) | (Rm << 16) | (cond << 12) | (CondSelectEnc[instenc][1] << 10) | \609(Rn << 5) | Rd);610}611612void ARM64XEmitter::EncodeData1SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn)613{614bool b64Bit = Is64Bit(Rd);615616Rd = DecodeReg(Rd);617Rn = DecodeReg(Rn);618Write32((b64Bit << 31) | (0x2D6 << 21) | \619(Data1SrcEnc[instenc][0] << 16) | (Data1SrcEnc[instenc][1] << 10) | \620(Rn << 5) | Rd);621}622623void ARM64XEmitter::EncodeData2SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)624{625bool b64Bit = Is64Bit(Rd);626627Rd = DecodeReg(Rd);628Rm = DecodeReg(Rm);629Rn = DecodeReg(Rn);630Write32((b64Bit << 31) | (0x0D6 << 21) | \631(Rm << 16) | (Data2SrcEnc[instenc] << 10) | \632(Rn << 5) | Rd);633}634635void ARM64XEmitter::EncodeData3SrcInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)636{637bool b64Bit = Is64Bit(Rd);638639Rd = DecodeReg(Rd);640Rm = DecodeReg(Rm);641Rn = DecodeReg(Rn);642Ra = DecodeReg(Ra);643Write32((b64Bit << 31) | (0xD8 << 21) | (Data3SrcEnc[instenc][0] << 21) | \644(Rm << 16) | (Data3SrcEnc[instenc][1] << 15) | \645(Ra << 10) | (Rn << 5) | Rd);646}647648void ARM64XEmitter::EncodeLogicalInst(u32 instenc, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)649{650bool b64Bit = Is64Bit(Rd);651652Rd = DecodeReg(Rd);653Rm = DecodeReg(Rm);654Rn = DecodeReg(Rn);655Write32((b64Bit << 31) | (LogicalEnc[instenc][0] << 29) | (0x5 << 25) | (LogicalEnc[instenc][1] << 21) | \656Shift.GetData() | (Rm << 16) | (Rn << 5) | Rd);657}658659void ARM64XEmitter::EncodeLoadRegisterInst(u32 bitop, ARM64Reg Rt, u32 imm)660{661bool b64Bit = Is64Bit(Rt);662bool bVec = IsVector(Rt);663664_assert_msg_(!(imm & 0xFFFFF), "%s: offset too large %d", __FUNCTION__, imm);665666Rt = DecodeReg(Rt);667if (b64Bit && bitop != 0x2) // LDRSW(0x2) uses 64bit reg, doesn't have 64bit bit set668bitop |= 0x1;669Write32((bitop << 30) | (bVec << 26) | (0x18 << 24) | (imm << 5) | Rt);670}671672void ARM64XEmitter::EncodeLoadStoreExcInst(u32 instenc,673ARM64Reg Rs, ARM64Reg Rt2, ARM64Reg Rn, ARM64Reg Rt)674{675Rs = DecodeReg(Rs);676Rt2 = DecodeReg(Rt2);677Rn = DecodeReg(Rn);678Rt = DecodeReg(Rt);679Write32((LoadStoreExcEnc[instenc][0] << 30) | (0x8 << 24) | (LoadStoreExcEnc[instenc][1] << 23) | \680(LoadStoreExcEnc[instenc][2] << 22) | (LoadStoreExcEnc[instenc][3] << 21) | (Rs << 16) | \681(LoadStoreExcEnc[instenc][4] << 15) | (Rt2 << 10) | (Rn << 5) | Rt);682}683684void ARM64XEmitter::EncodeLoadStorePairedInst(u32 op, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)685{686bool b64Bit = Is64Bit(Rt);687bool b128Bit = IsQuad(Rt);688bool bVec = IsVector(Rt);689690if (b128Bit)691imm >>= 4;692else if (b64Bit)693imm >>= 3;694else695imm >>= 2;696697_assert_msg_(!(imm & ~0xF), "%s: offset too large %d", __FUNCTION__, imm);698699u32 opc = 0;700if (b128Bit)701opc = 2;702else if (b64Bit && bVec)703opc = 1;704else if (b64Bit && !bVec)705opc = 2;706707Rt = DecodeReg(Rt);708Rt2 = DecodeReg(Rt2);709Rn = DecodeReg(Rn);710Write32((opc << 30) | (bVec << 26) | (op << 22) | (imm << 15) | (Rt2 << 10) | (Rn << 5) | Rt);711}712713void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, u32 op2, ARM64Reg Rt, ARM64Reg Rn, s32 imm)714{715bool b64Bit = Is64Bit(Rt);716bool bVec = IsVector(Rt);717718u32 offset = imm & 0x1FF;719720_assert_msg_(!(imm < -256 || imm > 255), "%s: offset too large %d", __FUNCTION__, imm);721722Rt = DecodeReg(Rt);723Rn = DecodeReg(Rn);724Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (offset << 12) | (op2 << 10) | (Rn << 5) | Rt);725}726727void ARM64XEmitter::EncodeLoadStoreIndexedInst(u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm, u8 size)728{729bool b64Bit = Is64Bit(Rt);730bool bVec = IsVector(Rt);731732u8 shift = 0;733if (size == 64)734shift = 3;735else if (size == 32)736shift = 2;737else if (size == 16)738shift = 1;739740if (shift) {741_assert_msg_(((imm >> shift) << shift) == imm, "%s(INDEX_UNSIGNED): offset must be aligned %d", __FUNCTION__, imm);742imm >>= shift;743}744745_assert_msg_(imm >= 0, "%s(INDEX_UNSIGNED): offset must be positive %d", __FUNCTION__, imm);746_assert_msg_(!(imm & ~0xFFF), "%s(INDEX_UNSIGNED): offset too large %d", __FUNCTION__, imm);747748Rt = DecodeReg(Rt);749Rn = DecodeReg(Rn);750Write32((b64Bit << 30) | (op << 22) | (bVec << 26) | (imm << 10) | (Rn << 5) | Rt);751}752753void ARM64XEmitter::EncodeMOVWideInst(u32 op, ARM64Reg Rd, u32 imm, ShiftAmount pos)754{755bool b64Bit = Is64Bit(Rd);756757_assert_msg_(!(imm & ~0xFFFF), "%s: immediate out of range: %d", __FUNCTION__, imm);758759Rd = DecodeReg(Rd);760Write32((b64Bit << 31) | (op << 29) | (0x25 << 23) | (pos << 21) | (imm << 5) | Rd);761}762763void ARM64XEmitter::EncodeBitfieldMOVInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)764{765bool b64Bit = Is64Bit(Rd);766767Rd = DecodeReg(Rd);768Rn = DecodeReg(Rn);769Write32((b64Bit << 31) | (op << 29) | (0x26 << 23) | (b64Bit << 22) | \770(immr << 16) | (imms << 10) | (Rn << 5) | Rd);771}772773void ARM64XEmitter::EncodeLoadStoreRegisterOffset(u32 size, u32 opc, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)774{775Rt = DecodeReg(Rt);776Rn = DecodeReg(Rn);777ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());778779Write32((size << 30) | (opc << 22) | (0x1C1 << 21) | (decoded_Rm << 16) | \780Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);781}782783void ARM64XEmitter::EncodeAddSubImmInst(u32 op, bool flags, u32 shift, u32 imm, ARM64Reg Rn, ARM64Reg Rd)784{785bool b64Bit = Is64Bit(Rd);786787_assert_msg_(!(imm & ~0xFFF), "%s: immediate too large: %x", __FUNCTION__, imm);788789Rd = DecodeReg(Rd);790Rn = DecodeReg(Rn);791Write32((b64Bit << 31) | (op << 30) | (flags << 29) | (0x11 << 24) | (shift << 22) | \792(imm << 10) | (Rn << 5) | Rd);793}794795void ARM64XEmitter::EncodeLogicalImmInst(u32 op, ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, int n)796{797// Sometimes Rd is fixed to SP, but can still be 32bit or 64bit.798// Use Rn to determine bitness here.799bool b64Bit = Is64Bit(Rn);800801Rd = DecodeReg(Rd);802Rn = DecodeReg(Rn);803804Write32((b64Bit << 31) | (op << 29) | (0x24 << 23) | (n << 22) | \805(immr << 16) | (imms << 10) | (Rn << 5) | Rd);806}807808void ARM64XEmitter::EncodeLoadStorePair(u32 op, u32 load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)809{810bool b64Bit = Is64Bit(Rt);811u32 type_encode = 0;812813switch (type) {814case INDEX_SIGNED:815type_encode = 2;816break;817case INDEX_POST:818type_encode = 1;819break;820case INDEX_PRE:821type_encode = 3;822break;823case INDEX_UNSIGNED:824_assert_msg_(false, "%s doesn't support INDEX_UNSIGNED!", __FUNCTION__);825break;826}827828if (b64Bit) {829op |= 2;830imm >>= 3;831}832else833{834imm >>= 2;835}836837_assert_msg_(imm >= -64 && imm <= 63, "%s recieved too large imm: %d", __FUNCTION__, imm);838839Rt = DecodeReg(Rt);840Rt2 = DecodeReg(Rt2);841Rn = DecodeReg(Rn);842843Write32((op << 30) | (5 << 27) | (type_encode << 23) | (load << 22) | \844(((uint32_t)imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);845}846void ARM64XEmitter::EncodeAddressInst(u32 op, ARM64Reg Rd, s32 imm)847{848Rd = DecodeReg(Rd);849850Write32((op << 31) | ((imm & 0x3) << 29) | (0x10 << 24) | \851((imm & 0x1FFFFC) << 3) | Rd);852}853854void ARM64XEmitter::EncodeLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm)855{856_assert_msg_(!(imm < -256 || imm > 255), "%s received too large offset: %d", __FUNCTION__, imm);857Rt = DecodeReg(Rt);858Rn = DecodeReg(Rn);859860Write32((size << 30) | (7 << 27) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);861}862863static inline bool IsInRangeImm19(s64 distance) {864return (distance >= -0x40000 && distance <= 0x3FFFF);865}866867static inline bool IsInRangeImm14(s64 distance) {868return (distance >= -0x2000 && distance <= 0x1FFF);869}870871static inline bool IsInRangeImm26(s64 distance) {872return (distance >= -0x2000000 && distance <= 0x1FFFFFF);873}874875static inline u32 MaskImm19(s64 distance) {876return distance & 0x7FFFF;877}878879static inline u32 MaskImm14(s64 distance) {880return distance & 0x3FFF;881}882883static inline u32 MaskImm26(s64 distance) {884return distance & 0x3FFFFFF;885}886887// FixupBranch branching888void ARM64XEmitter::SetJumpTarget(FixupBranch const& branch)889{890bool Not = false;891u32 inst = 0;892s64 distance = (s64)(m_code - branch.ptr);893distance >>= 2;894895switch (branch.type)896{897case 1: // CBNZ898Not = true;899case 0: // CBZ900{901_assert_msg_(IsInRangeImm19(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);902bool b64Bit = Is64Bit(branch.reg);903ARM64Reg reg = DecodeReg(branch.reg);904inst = (b64Bit << 31) | (0x1A << 25) | (Not << 24) | (MaskImm19(distance) << 5) | reg;905}906break;907case 2: // B (conditional)908_assert_msg_(IsInRangeImm19(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);909inst = (0x2A << 25) | (MaskImm19(distance) << 5) | branch.cond;910break;911case 4: // TBNZ912Not = true;913case 3: // TBZ914{915_assert_msg_(IsInRangeImm14(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);916ARM64Reg reg = DecodeReg(branch.reg);917inst = ((branch.bit & 0x20) << 26) | (0x1B << 25) | (Not << 24) | ((branch.bit & 0x1F) << 19) | (MaskImm14(distance) << 5) | reg;918}919break;920case 5: // B (unconditional)921_assert_msg_(IsInRangeImm26(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);922inst = (0x5 << 26) | MaskImm26(distance);923break;924case 6: // BL (unconditional)925_assert_msg_(IsInRangeImm26(distance), "%s(%d): Received too large distance: %llx", __FUNCTION__, branch.type, distance);926inst = (0x25 << 26) | MaskImm26(distance);927break;928}929930ptrdiff_t writable = m_writable - m_code;931*(u32 *)(branch.ptr + writable) = inst;932}933934FixupBranch ARM64XEmitter::CBZ(ARM64Reg Rt)935{936FixupBranch branch;937branch.ptr = m_code;938branch.type = 0;939branch.reg = Rt;940HINT(HINT_NOP);941return branch;942}943FixupBranch ARM64XEmitter::CBNZ(ARM64Reg Rt)944{945FixupBranch branch;946branch.ptr = m_code;947branch.type = 1;948branch.reg = Rt;949HINT(HINT_NOP);950return branch;951}952FixupBranch ARM64XEmitter::B(CCFlags cond)953{954FixupBranch branch;955branch.ptr = m_code;956branch.type = 2;957branch.cond = cond;958HINT(HINT_NOP);959return branch;960}961FixupBranch ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bit)962{963FixupBranch branch;964branch.ptr = m_code;965branch.type = 3;966branch.reg = Rt;967branch.bit = bit;968HINT(HINT_NOP);969return branch;970}971FixupBranch ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bit)972{973FixupBranch branch;974branch.ptr = m_code;975branch.type = 4;976branch.reg = Rt;977branch.bit = bit;978HINT(HINT_NOP);979return branch;980}981FixupBranch ARM64XEmitter::B()982{983FixupBranch branch;984branch.ptr = m_code;985branch.type = 5;986HINT(HINT_NOP);987return branch;988}989FixupBranch ARM64XEmitter::BL()990{991FixupBranch branch;992branch.ptr = m_code;993branch.type = 6;994HINT(HINT_NOP);995return branch;996}997998// Compare and Branch999void ARM64XEmitter::CBZ(ARM64Reg Rt, const void* ptr)1000{1001EncodeCompareBranchInst(0, Rt, ptr);1002}1003void ARM64XEmitter::CBNZ(ARM64Reg Rt, const void* ptr)1004{1005EncodeCompareBranchInst(1, Rt, ptr);1006}10071008// Conditional Branch1009void ARM64XEmitter::B(CCFlags cond, const void* ptr)1010{1011s64 distance = (s64)ptr - (s64)m_code;10121013distance >>= 2;10141015_assert_msg_(IsInRangeImm19(distance), "%s: Received too large distance: %p->%p %lld %llx", __FUNCTION__, m_code, ptr, distance, distance);1016Write32((0x54 << 24) | (MaskImm19(distance) << 5) | cond);1017}10181019// Test and Branch1020void ARM64XEmitter::TBZ(ARM64Reg Rt, u8 bits, const void* ptr)1021{1022EncodeTestBranchInst(0, Rt, bits, ptr);1023}1024void ARM64XEmitter::TBNZ(ARM64Reg Rt, u8 bits, const void* ptr)1025{1026EncodeTestBranchInst(1, Rt, bits, ptr);1027}10281029// Unconditional Branch1030void ARM64XEmitter::B(const void* ptr)1031{1032EncodeUnconditionalBranchInst(0, ptr);1033}1034void ARM64XEmitter::BL(const void* ptr)1035{1036EncodeUnconditionalBranchInst(1, ptr);1037}10381039void ARM64XEmitter::QuickCallFunction(ARM64Reg scratchreg, const void *func) {1040s64 distance = (s64)func - (s64)m_code;1041distance >>= 2; // Can only branch to opcode-aligned (4) addresses1042if (!IsInRangeImm26(distance)) {1043// WARN_LOG(Log::JIT, "Distance too far in function call (%p to %p)! Using scratch.", m_code, func);1044MOVI2R(scratchreg, (uintptr_t)func);1045BLR(scratchreg);1046} else {1047BL(func);1048}1049}10501051// Unconditional Branch (register)1052void ARM64XEmitter::BR(ARM64Reg Rn)1053{1054EncodeUnconditionalBranchInst(0, 0x1F, 0, 0, Rn);1055}1056void ARM64XEmitter::BLR(ARM64Reg Rn)1057{1058EncodeUnconditionalBranchInst(1, 0x1F, 0, 0, Rn);1059}1060void ARM64XEmitter::RET(ARM64Reg Rn)1061{1062EncodeUnconditionalBranchInst(2, 0x1F, 0, 0, Rn);1063}1064void ARM64XEmitter::ERET()1065{1066EncodeUnconditionalBranchInst(4, 0x1F, 0, 0, SP);1067}1068void ARM64XEmitter::DRPS()1069{1070EncodeUnconditionalBranchInst(5, 0x1F, 0, 0, SP);1071}10721073// Exception generation1074void ARM64XEmitter::SVC(u32 imm)1075{1076EncodeExceptionInst(0, imm);1077}10781079void ARM64XEmitter::HVC(u32 imm)1080{1081EncodeExceptionInst(1, imm);1082}10831084void ARM64XEmitter::SMC(u32 imm)1085{1086EncodeExceptionInst(2, imm);1087}10881089void ARM64XEmitter::BRK(u32 imm)1090{1091EncodeExceptionInst(3, imm);1092}10931094void ARM64XEmitter::HLT(u32 imm)1095{1096EncodeExceptionInst(4, imm);1097}10981099void ARM64XEmitter::DCPS1(u32 imm)1100{1101EncodeExceptionInst(5, imm);1102}11031104void ARM64XEmitter::DCPS2(u32 imm)1105{1106EncodeExceptionInst(6, imm);1107}11081109void ARM64XEmitter::DCPS3(u32 imm)1110{1111EncodeExceptionInst(7, imm);1112}11131114// System1115void ARM64XEmitter::_MSR(PStateField field, u8 imm)1116{1117u32 op1 = 0, op2 = 0;1118switch (field)1119{1120case FIELD_SPSel: op1 = 0; op2 = 5; break;1121case FIELD_DAIFSet: op1 = 3; op2 = 6; break;1122case FIELD_DAIFClr: op1 = 3; op2 = 7; break;1123default:1124_assert_msg_(false, "Invalid PStateField to do a imm move to");1125break;1126}1127EncodeSystemInst(0, op1, 4, imm, op2, WSP);1128}11291130static void GetSystemReg(PStateField field, int &o0, int &op1, int &CRn, int &CRm, int &op2) {1131switch (field) {1132case FIELD_NZCV:1133o0 = 3; op1 = 3; CRn = 4; CRm = 2; op2 = 0;1134break;1135case FIELD_FPCR:1136o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 0;1137break;1138case FIELD_FPSR:1139o0 = 3; op1 = 3; CRn = 4; CRm = 4; op2 = 1;1140break;1141default:1142_assert_msg_(false, "Invalid PStateField to do a register move from/to");1143break;1144}1145}11461147void ARM64XEmitter::_MSR(PStateField field, ARM64Reg Rt) {1148int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;1149_assert_msg_(Is64Bit(Rt), "MSR: Rt must be 64-bit");1150GetSystemReg(field, o0, op1, CRn, CRm, op2);1151EncodeSystemInst(o0, op1, CRn, CRm, op2, DecodeReg(Rt));1152}11531154void ARM64XEmitter::MRS(ARM64Reg Rt, PStateField field) {1155int o0 = 0, op1 = 0, CRn = 0, CRm = 0, op2 = 0;1156_assert_msg_(Is64Bit(Rt), "MRS: Rt must be 64-bit");1157GetSystemReg(field, o0, op1, CRn, CRm, op2);1158EncodeSystemInst(o0 | 4, op1, CRn, CRm, op2, DecodeReg(Rt));1159}11601161void ARM64XEmitter::HINT(SystemHint op)1162{1163EncodeSystemInst(0, 3, 2, 0, op, WSP);1164}1165void ARM64XEmitter::CLREX()1166{1167EncodeSystemInst(0, 3, 3, 0, 2, WSP);1168}1169void ARM64XEmitter::DSB(BarrierType type)1170{1171EncodeSystemInst(0, 3, 3, type, 4, WSP);1172}1173void ARM64XEmitter::DMB(BarrierType type)1174{1175EncodeSystemInst(0, 3, 3, type, 5, WSP);1176}1177void ARM64XEmitter::ISB(BarrierType type)1178{1179EncodeSystemInst(0, 3, 3, type, 6, WSP);1180}11811182// Add/Subtract (extended register)1183void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1184{1185ADD(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));1186}11871188void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)1189{1190EncodeArithmeticInst(0, false, Rd, Rn, Rm, Option);1191}11921193void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1194{1195EncodeArithmeticInst(0, true, Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));1196}11971198void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)1199{1200EncodeArithmeticInst(0, true, Rd, Rn, Rm, Option);1201}12021203void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1204{1205SUB(Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));1206}12071208void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)1209{1210EncodeArithmeticInst(1, false, Rd, Rn, Rm, Option);1211}12121213void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1214{1215EncodeArithmeticInst(1, true, Rd, Rn, Rm, ArithOption(Rd, ST_LSL, 0));1216}12171218void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)1219{1220EncodeArithmeticInst(1, true, Rd, Rn, Rm, Option);1221}12221223void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm)1224{1225CMN(Rn, Rm, ArithOption(Rn, ST_LSL, 0));1226}12271228void ARM64XEmitter::CMN(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)1229{1230EncodeArithmeticInst(0, true, Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Option);1231}12321233void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm)1234{1235CMP(Rn, Rm, ArithOption(Rn, ST_LSL, 0));1236}12371238void ARM64XEmitter::CMP(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Option)1239{1240EncodeArithmeticInst(1, true, Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Option);1241}12421243// Add/Subtract (with carry)1244void ARM64XEmitter::ADC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1245{1246EncodeArithmeticCarryInst(0, false, Rd, Rn, Rm);1247}1248void ARM64XEmitter::ADCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1249{1250EncodeArithmeticCarryInst(0, true, Rd, Rn, Rm);1251}1252void ARM64XEmitter::SBC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1253{1254EncodeArithmeticCarryInst(1, false, Rd, Rn, Rm);1255}1256void ARM64XEmitter::SBCS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1257{1258EncodeArithmeticCarryInst(1, true, Rd, Rn, Rm);1259}12601261// Conditional Compare (immediate)1262void ARM64XEmitter::CCMN(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)1263{1264EncodeCondCompareImmInst(0, Rn, imm, nzcv, cond);1265}1266void ARM64XEmitter::CCMP(ARM64Reg Rn, u32 imm, u32 nzcv, CCFlags cond)1267{1268EncodeCondCompareImmInst(1, Rn, imm, nzcv, cond);1269}12701271// Conditiona Compare (register)1272void ARM64XEmitter::CCMN(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)1273{1274EncodeCondCompareRegInst(0, Rn, Rm, nzcv, cond);1275}1276void ARM64XEmitter::CCMP(ARM64Reg Rn, ARM64Reg Rm, u32 nzcv, CCFlags cond)1277{1278EncodeCondCompareRegInst(1, Rn, Rm, nzcv, cond);1279}12801281// Conditional Select1282void ARM64XEmitter::CSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)1283{1284EncodeCondSelectInst(0, Rd, Rn, Rm, cond);1285}1286void ARM64XEmitter::CSINC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)1287{1288EncodeCondSelectInst(1, Rd, Rn, Rm, cond);1289}1290void ARM64XEmitter::CSINV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)1291{1292EncodeCondSelectInst(2, Rd, Rn, Rm, cond);1293}1294void ARM64XEmitter::CSNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)1295{1296EncodeCondSelectInst(3, Rd, Rn, Rm, cond);1297}12981299// Data-Processing 1 source1300void ARM64XEmitter::RBIT(ARM64Reg Rd, ARM64Reg Rn)1301{1302EncodeData1SrcInst(0, Rd, Rn);1303}1304void ARM64XEmitter::REV16(ARM64Reg Rd, ARM64Reg Rn)1305{1306EncodeData1SrcInst(1, Rd, Rn);1307}1308void ARM64XEmitter::REV32(ARM64Reg Rd, ARM64Reg Rn)1309{1310EncodeData1SrcInst(2, Rd, Rn);1311}1312void ARM64XEmitter::REV64(ARM64Reg Rd, ARM64Reg Rn)1313{1314EncodeData1SrcInst(3, Rd, Rn);1315}1316void ARM64XEmitter::CLZ(ARM64Reg Rd, ARM64Reg Rn)1317{1318EncodeData1SrcInst(4, Rd, Rn);1319}1320void ARM64XEmitter::CLS(ARM64Reg Rd, ARM64Reg Rn)1321{1322EncodeData1SrcInst(5, Rd, Rn);1323}13241325// Data-Processing 2 source1326void ARM64XEmitter::UDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1327{1328EncodeData2SrcInst(0, Rd, Rn, Rm);1329}1330void ARM64XEmitter::SDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1331{1332EncodeData2SrcInst(1, Rd, Rn, Rm);1333}1334void ARM64XEmitter::LSLV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1335{1336EncodeData2SrcInst(2, Rd, Rn, Rm);1337}1338void ARM64XEmitter::LSRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1339{1340EncodeData2SrcInst(3, Rd, Rn, Rm);1341}1342void ARM64XEmitter::ASRV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1343{1344EncodeData2SrcInst(4, Rd, Rn, Rm);1345}1346void ARM64XEmitter::RORV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1347{1348EncodeData2SrcInst(5, Rd, Rn, Rm);1349}1350void ARM64XEmitter::CRC32B(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1351{1352EncodeData2SrcInst(6, Rd, Rn, Rm);1353}1354void ARM64XEmitter::CRC32H(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1355{1356EncodeData2SrcInst(7, Rd, Rn, Rm);1357}1358void ARM64XEmitter::CRC32W(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1359{1360EncodeData2SrcInst(8, Rd, Rn, Rm);1361}1362void ARM64XEmitter::CRC32CB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1363{1364EncodeData2SrcInst(9, Rd, Rn, Rm);1365}1366void ARM64XEmitter::CRC32CH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1367{1368EncodeData2SrcInst(10, Rd, Rn, Rm);1369}1370void ARM64XEmitter::CRC32CW(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1371{1372EncodeData2SrcInst(11, Rd, Rn, Rm);1373}1374void ARM64XEmitter::CRC32X(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1375{1376EncodeData2SrcInst(12, Rd, Rn, Rm);1377}1378void ARM64XEmitter::CRC32CX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1379{1380EncodeData2SrcInst(13, Rd, Rn, Rm);1381}13821383// Data-Processing 3 source1384void ARM64XEmitter::MADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)1385{1386EncodeData3SrcInst(0, Rd, Rn, Rm, Ra);1387}1388void ARM64XEmitter::MSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)1389{1390EncodeData3SrcInst(1, Rd, Rn, Rm, Ra);1391}1392void ARM64XEmitter::SMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)1393{1394EncodeData3SrcInst(2, Rd, Rn, Rm, Ra);1395}1396void ARM64XEmitter::SMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1397{1398SMADDL(Rd, Rn, Rm, SP);1399}1400void ARM64XEmitter::SMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)1401{1402EncodeData3SrcInst(3, Rd, Rn, Rm, Ra);1403}1404void ARM64XEmitter::SMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1405{1406EncodeData3SrcInst(4, Rd, Rn, Rm, SP);1407}1408void ARM64XEmitter::UMADDL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)1409{1410EncodeData3SrcInst(5, Rd, Rn, Rm, Ra);1411}1412void ARM64XEmitter::UMULL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1413{1414UMADDL(Rd, Rn, Rm, SP);1415}1416void ARM64XEmitter::UMSUBL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra)1417{1418EncodeData3SrcInst(6, Rd, Rn, Rm, Ra);1419}1420void ARM64XEmitter::UMULH(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1421{1422EncodeData3SrcInst(7, Rd, Rn, Rm, SP);1423}1424void ARM64XEmitter::MUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1425{1426EncodeData3SrcInst(0, Rd, Rn, Rm, SP);1427}1428void ARM64XEmitter::MNEG(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)1429{1430EncodeData3SrcInst(1, Rd, Rn, Rm, SP);1431}14321433// Logical (shifted register)1434void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)1435{1436EncodeLogicalInst(0, Rd, Rn, Rm, Shift);1437}1438void ARM64XEmitter::BIC(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)1439{1440EncodeLogicalInst(1, Rd, Rn, Rm, Shift);1441}1442void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)1443{1444EncodeLogicalInst(2, Rd, Rn, Rm, Shift);1445}1446void ARM64XEmitter::ORN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)1447{1448EncodeLogicalInst(3, Rd, Rn, Rm, Shift);1449}1450void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)1451{1452EncodeLogicalInst(4, Rd, Rn, Rm, Shift);1453}1454void ARM64XEmitter::EON(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)1455{1456EncodeLogicalInst(5, Rd, Rn, Rm, Shift);1457}1458void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)1459{1460EncodeLogicalInst(6, Rd, Rn, Rm, Shift);1461}1462void ARM64XEmitter::BICS(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)1463{1464EncodeLogicalInst(7, Rd, Rn, Rm, Shift);1465}1466void ARM64XEmitter::TST(ARM64Reg Rn, ARM64Reg Rm, const ArithOption &Shift)1467{1468ANDS(Is64Bit(Rn) ? ZR : WZR, Rn, Rm, Shift);1469}14701471void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm, const ArithOption &Shift) {1472ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, Shift);1473}14741475void ARM64XEmitter::MOV(ARM64Reg Rd, ARM64Reg Rm)1476{1477if (IsGPR(Rd) && IsGPR(Rm)) {1478ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, 0));1479} else {1480_assert_msg_(false, "Non-GPRs not supported in MOV");1481}1482}14831484void ARM64XEmitter::MOVfromSP(ARM64Reg Rd) {1485ADD(Rd, ARM64Reg::SP, 0, false);1486}14871488void ARM64XEmitter::MOVtoSP(ARM64Reg Rn) {1489ADD(ARM64Reg::SP, Rn, 0, false);1490}14911492void ARM64XEmitter::MVN(ARM64Reg Rd, ARM64Reg Rm)1493{1494ORN(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, 0));1495}1496void ARM64XEmitter::LSL(ARM64Reg Rd, ARM64Reg Rm, int shift)1497{1498ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSL, shift));1499}1500void ARM64XEmitter::LSR(ARM64Reg Rd, ARM64Reg Rm, int shift)1501{1502ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_LSR, shift));1503}1504void ARM64XEmitter::ASR(ARM64Reg Rd, ARM64Reg Rm, int shift)1505{1506ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ASR, shift));1507}1508void ARM64XEmitter::ROR(ARM64Reg Rd, ARM64Reg Rm, int shift)1509{1510ORR(Rd, Is64Bit(Rd) ? ZR : WZR, Rm, ArithOption(Rm, ST_ROR, shift));1511}15121513// Logical (immediate)1514void ARM64XEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)1515{1516EncodeLogicalImmInst(0, Rd, Rn, immr, imms, invert);1517}1518void ARM64XEmitter::ANDS(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)1519{1520EncodeLogicalImmInst(3, Rd, Rn, immr, imms, invert);1521}1522void ARM64XEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)1523{1524EncodeLogicalImmInst(2, Rd, Rn, immr, imms, invert);1525}1526void ARM64XEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms, bool invert)1527{1528EncodeLogicalImmInst(1, Rd, Rn, immr, imms, invert);1529}1530void ARM64XEmitter::TST(ARM64Reg Rn, u32 immr, u32 imms, bool invert)1531{1532EncodeLogicalImmInst(3, Is64Bit(Rn) ? ZR : WZR, Rn, immr, imms, invert);1533}15341535// Add/subtract (immediate)1536void ARM64XEmitter::ADD(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)1537{1538EncodeAddSubImmInst(0, false, shift, imm, Rn, Rd);1539}1540void ARM64XEmitter::ADDS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)1541{1542EncodeAddSubImmInst(0, true, shift, imm, Rn, Rd);1543}1544void ARM64XEmitter::SUB(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)1545{1546EncodeAddSubImmInst(1, false, shift, imm, Rn, Rd);1547}1548void ARM64XEmitter::SUBS(ARM64Reg Rd, ARM64Reg Rn, u32 imm, bool shift)1549{1550EncodeAddSubImmInst(1, true, shift, imm, Rn, Rd);1551}1552void ARM64XEmitter::CMP(ARM64Reg Rn, u32 imm, bool shift)1553{1554EncodeAddSubImmInst(1, true, shift, imm, Rn, Is64Bit(Rn) ? SP : WSP);1555}1556void ARM64XEmitter::CMN(ARM64Reg Rn, u32 imm, bool shift)1557{1558EncodeAddSubImmInst(0, true, shift, imm, Rn, Is64Bit(Rn) ? SP : WSP);1559}15601561// Data Processing (Immediate)1562void ARM64XEmitter::MOVZ(ARM64Reg Rd, u32 imm, ShiftAmount pos)1563{1564EncodeMOVWideInst(2, Rd, imm, pos);1565}1566void ARM64XEmitter::MOVN(ARM64Reg Rd, u32 imm, ShiftAmount pos)1567{1568EncodeMOVWideInst(0, Rd, imm, pos);1569}1570void ARM64XEmitter::MOVK(ARM64Reg Rd, u32 imm, ShiftAmount pos)1571{1572EncodeMOVWideInst(3, Rd, imm, pos);1573}15741575// Bitfield move1576void ARM64XEmitter::BFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)1577{1578EncodeBitfieldMOVInst(1, Rd, Rn, immr, imms);1579}1580void ARM64XEmitter::SBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)1581{1582EncodeBitfieldMOVInst(0, Rd, Rn, immr, imms);1583}1584void ARM64XEmitter::UBFM(ARM64Reg Rd, ARM64Reg Rn, u32 immr, u32 imms)1585{1586EncodeBitfieldMOVInst(2, Rd, Rn, immr, imms);1587}15881589void ARM64XEmitter::BFI(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)1590{1591u32 size = Is64Bit(Rn) ? 64 : 32;1592_assert_msg_((lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!",1593__FUNCTION__, lsb, width);1594EncodeBitfieldMOVInst(1, Rd, Rn, (size - lsb) % size, width - 1);1595}1596void ARM64XEmitter::UBFIZ(ARM64Reg Rd, ARM64Reg Rn, u32 lsb, u32 width)1597{1598u32 size = Is64Bit(Rn) ? 64 : 32;1599_assert_msg_((lsb + width) <= size, "%s passed lsb %d and width %d which is greater than the register size!",1600__FUNCTION__, lsb, width);1601EncodeBitfieldMOVInst(2, Rd, Rn, (size - lsb) % size, width - 1);1602}1603void ARM64XEmitter::EXTR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u32 shift) {1604bool sf = Is64Bit(Rd);1605bool N = sf;1606Rd = DecodeReg(Rd);1607Rn = DecodeReg(Rn);1608Rm = DecodeReg(Rm);1609Write32((sf << 31) | (0x27 << 23) | (N << 22) | (Rm << 16) | (shift << 10) | (Rm << 5) | Rd);1610}1611void ARM64XEmitter::SXTB(ARM64Reg Rd, ARM64Reg Rn)1612{1613SBFM(Rd, Rn, 0, 7);1614}1615void ARM64XEmitter::SXTH(ARM64Reg Rd, ARM64Reg Rn)1616{1617SBFM(Rd, Rn, 0, 15);1618}1619void ARM64XEmitter::SXTW(ARM64Reg Rd, ARM64Reg Rn)1620{1621_assert_msg_(Is64Bit(Rd), "%s requires 64bit register as destination", __FUNCTION__);1622SBFM(Rd, Rn, 0, 31);1623}1624void ARM64XEmitter::UXTB(ARM64Reg Rd, ARM64Reg Rn)1625{1626UBFM(Rd, Rn, 0, 7);1627}1628void ARM64XEmitter::UXTH(ARM64Reg Rd, ARM64Reg Rn)1629{1630UBFM(Rd, Rn, 0, 15);1631}16321633// Load Register (Literal)1634void ARM64XEmitter::LDR(ARM64Reg Rt, u32 imm)1635{1636EncodeLoadRegisterInst(0, Rt, imm);1637}1638void ARM64XEmitter::LDRSW(ARM64Reg Rt, u32 imm)1639{1640EncodeLoadRegisterInst(2, Rt, imm);1641}1642void ARM64XEmitter::PRFM(ARM64Reg Rt, u32 imm)1643{1644EncodeLoadRegisterInst(3, Rt, imm);1645}16461647// Load/Store pair1648void ARM64XEmitter::LDP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)1649{1650EncodeLoadStorePair(0, 1, type, Rt, Rt2, Rn, imm);1651}1652void ARM64XEmitter::LDPSW(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)1653{1654EncodeLoadStorePair(1, 1, type, Rt, Rt2, Rn, imm);1655}1656void ARM64XEmitter::STP(IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)1657{1658EncodeLoadStorePair(0, 0, type, Rt, Rt2, Rn, imm);1659}16601661// Load/Store Exclusive1662void ARM64XEmitter::STXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)1663{1664EncodeLoadStoreExcInst(0, Rs, SP, Rt, Rn);1665}1666void ARM64XEmitter::STLXRB(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)1667{1668EncodeLoadStoreExcInst(1, Rs, SP, Rt, Rn);1669}1670void ARM64XEmitter::LDXRB(ARM64Reg Rt, ARM64Reg Rn)1671{1672EncodeLoadStoreExcInst(2, SP, SP, Rt, Rn);1673}1674void ARM64XEmitter::LDAXRB(ARM64Reg Rt, ARM64Reg Rn)1675{1676EncodeLoadStoreExcInst(3, SP, SP, Rt, Rn);1677}1678void ARM64XEmitter::STLRB(ARM64Reg Rt, ARM64Reg Rn)1679{1680EncodeLoadStoreExcInst(4, SP, SP, Rt, Rn);1681}1682void ARM64XEmitter::LDARB(ARM64Reg Rt, ARM64Reg Rn)1683{1684EncodeLoadStoreExcInst(5, SP, SP, Rt, Rn);1685}1686void ARM64XEmitter::STXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)1687{1688EncodeLoadStoreExcInst(6, Rs, SP, Rt, Rn);1689}1690void ARM64XEmitter::STLXRH(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)1691{1692EncodeLoadStoreExcInst(7, Rs, SP, Rt, Rn);1693}1694void ARM64XEmitter::LDXRH(ARM64Reg Rt, ARM64Reg Rn)1695{1696EncodeLoadStoreExcInst(8, SP, SP, Rt, Rn);1697}1698void ARM64XEmitter::LDAXRH(ARM64Reg Rt, ARM64Reg Rn)1699{1700EncodeLoadStoreExcInst(9, SP, SP, Rt, Rn);1701}1702void ARM64XEmitter::STLRH(ARM64Reg Rt, ARM64Reg Rn)1703{1704EncodeLoadStoreExcInst(10, SP, SP, Rt, Rn);1705}1706void ARM64XEmitter::LDARH(ARM64Reg Rt, ARM64Reg Rn)1707{1708EncodeLoadStoreExcInst(11, SP, SP, Rt, Rn);1709}1710void ARM64XEmitter::STXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)1711{1712EncodeLoadStoreExcInst(12 + Is64Bit(Rt), Rs, SP, Rt, Rn);1713}1714void ARM64XEmitter::STLXR(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rn)1715{1716EncodeLoadStoreExcInst(14 + Is64Bit(Rt), Rs, SP, Rt, Rn);1717}1718void ARM64XEmitter::STXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)1719{1720EncodeLoadStoreExcInst(16 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);1721}1722void ARM64XEmitter::STLXP(ARM64Reg Rs, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)1723{1724EncodeLoadStoreExcInst(18 + Is64Bit(Rt), Rs, Rt2, Rt, Rn);1725}1726void ARM64XEmitter::LDXR(ARM64Reg Rt, ARM64Reg Rn)1727{1728EncodeLoadStoreExcInst(20 + Is64Bit(Rt), SP, SP, Rt, Rn);1729}1730void ARM64XEmitter::LDAXR(ARM64Reg Rt, ARM64Reg Rn)1731{1732EncodeLoadStoreExcInst(22 + Is64Bit(Rt), SP, SP, Rt, Rn);1733}1734void ARM64XEmitter::LDXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)1735{1736EncodeLoadStoreExcInst(24 + Is64Bit(Rt), SP, Rt2, Rt, Rn);1737}1738void ARM64XEmitter::LDAXP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn)1739{1740EncodeLoadStoreExcInst(26 + Is64Bit(Rt), SP, Rt2, Rt, Rn);1741}1742void ARM64XEmitter::STLR(ARM64Reg Rt, ARM64Reg Rn)1743{1744EncodeLoadStoreExcInst(28 + Is64Bit(Rt), SP, SP, Rt, Rn);1745}1746void ARM64XEmitter::LDAR(ARM64Reg Rt, ARM64Reg Rn)1747{1748EncodeLoadStoreExcInst(30 + Is64Bit(Rt), SP, SP, Rt, Rn);1749}17501751// Load/Store no-allocate pair (offset)1752void ARM64XEmitter::STNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)1753{1754EncodeLoadStorePairedInst(0xA0, Rt, Rt2, Rn, imm);1755}1756void ARM64XEmitter::LDNP(ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, u32 imm)1757{1758EncodeLoadStorePairedInst(0xA1, Rt, Rt2, Rn, imm);1759}17601761// Load/Store register (immediate post-indexed)1762// XXX: Most of these support vectors1763void ARM64XEmitter::STRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)1764{1765if (type == INDEX_UNSIGNED)1766EncodeLoadStoreIndexedInst(0x0E4, Rt, Rn, imm, 8);1767else1768EncodeLoadStoreIndexedInst(0x0E0,1769type == INDEX_POST ? 1 : 3, Rt, Rn, imm);1770}1771void ARM64XEmitter::LDRB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)1772{1773if (type == INDEX_UNSIGNED)1774EncodeLoadStoreIndexedInst(0x0E5, Rt, Rn, imm, 8);1775else1776EncodeLoadStoreIndexedInst(0x0E1,1777type == INDEX_POST ? 1 : 3, Rt, Rn, imm);1778}1779void ARM64XEmitter::LDRSB(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)1780{1781if (type == INDEX_UNSIGNED)1782EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E6 : 0x0E7, Rt, Rn, imm, 8);1783else1784EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x0E2 : 0x0E3,1785type == INDEX_POST ? 1 : 3, Rt, Rn, imm);1786}1787void ARM64XEmitter::STRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)1788{1789if (type == INDEX_UNSIGNED)1790EncodeLoadStoreIndexedInst(0x1E4, Rt, Rn, imm, 16);1791else1792EncodeLoadStoreIndexedInst(0x1E0,1793type == INDEX_POST ? 1 : 3, Rt, Rn, imm);1794}1795void ARM64XEmitter::LDRH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)1796{1797if (type == INDEX_UNSIGNED)1798EncodeLoadStoreIndexedInst(0x1E5, Rt, Rn, imm, 16);1799else1800EncodeLoadStoreIndexedInst(0x1E1,1801type == INDEX_POST ? 1 : 3, Rt, Rn, imm);1802}1803void ARM64XEmitter::LDRSH(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)1804{1805if (type == INDEX_UNSIGNED)1806EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E6 : 0x1E7, Rt, Rn, imm, 16);1807else1808EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x1E2 : 0x1E3,1809type == INDEX_POST ? 1 : 3, Rt, Rn, imm);1810}1811void ARM64XEmitter::STR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)1812{1813if (type == INDEX_UNSIGNED)1814EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E4 : 0x2E4, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);1815else1816EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E0 : 0x2E0,1817type == INDEX_POST ? 1 : 3, Rt, Rn, imm);1818}1819void ARM64XEmitter::LDR(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)1820{1821if (type == INDEX_UNSIGNED)1822EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E5 : 0x2E5, Rt, Rn, imm, Is64Bit(Rt) ? 64 : 32);1823else1824EncodeLoadStoreIndexedInst(Is64Bit(Rt) ? 0x3E1 : 0x2E1,1825type == INDEX_POST ? 1 : 3, Rt, Rn, imm);1826}1827void ARM64XEmitter::LDRSW(IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)1828{1829if (type == INDEX_UNSIGNED)1830EncodeLoadStoreIndexedInst(0x2E6, Rt, Rn, imm, 32);1831else1832EncodeLoadStoreIndexedInst(0x2E2,1833type == INDEX_POST ? 1 : 3, Rt, Rn, imm);1834}18351836// Load/Store register (register offset)1837void ARM64XEmitter::STRB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1838{1839EncodeLoadStoreRegisterOffset(0, 0, Rt, Rn, Rm);1840}1841void ARM64XEmitter::LDRB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1842{1843EncodeLoadStoreRegisterOffset(0, 1, Rt, Rn, Rm);1844}1845void ARM64XEmitter::LDRSB(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1846{1847bool b64Bit = Is64Bit(Rt);1848EncodeLoadStoreRegisterOffset(0, 3 - b64Bit, Rt, Rn, Rm);1849}1850void ARM64XEmitter::STRH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1851{1852EncodeLoadStoreRegisterOffset(1, 0, Rt, Rn, Rm);1853}1854void ARM64XEmitter::LDRH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1855{1856EncodeLoadStoreRegisterOffset(1, 1, Rt, Rn, Rm);1857}1858void ARM64XEmitter::LDRSH(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1859{1860bool b64Bit = Is64Bit(Rt);1861EncodeLoadStoreRegisterOffset(1, 3 - b64Bit, Rt, Rn, Rm);1862}1863void ARM64XEmitter::STR(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1864{1865bool b64Bit = Is64Bit(Rt);1866EncodeLoadStoreRegisterOffset(2 + b64Bit, 0, Rt, Rn, Rm);1867}1868void ARM64XEmitter::LDR(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1869{1870bool b64Bit = Is64Bit(Rt);1871EncodeLoadStoreRegisterOffset(2 + b64Bit, 1, Rt, Rn, Rm);1872}1873void ARM64XEmitter::LDRSW(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1874{1875EncodeLoadStoreRegisterOffset(2, 2, Rt, Rn, Rm);1876}1877void ARM64XEmitter::PRFM(ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)1878{1879EncodeLoadStoreRegisterOffset(3, 2, Rt, Rn, Rm);1880}18811882// Load/Store register (unscaled offset)1883void ARM64XEmitter::STURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)1884{1885EncodeLoadStoreUnscaled(0, 0, Rt, Rn, imm);1886}1887void ARM64XEmitter::LDURB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)1888{1889EncodeLoadStoreUnscaled(0, 1, Rt, Rn, imm);1890}1891void ARM64XEmitter::LDURSB(ARM64Reg Rt, ARM64Reg Rn, s32 imm)1892{1893EncodeLoadStoreUnscaled(0, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);1894}1895void ARM64XEmitter::STURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)1896{1897EncodeLoadStoreUnscaled(1, 0, Rt, Rn, imm);1898}1899void ARM64XEmitter::LDURH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)1900{1901EncodeLoadStoreUnscaled(1, 1, Rt, Rn, imm);1902}1903void ARM64XEmitter::LDURSH(ARM64Reg Rt, ARM64Reg Rn, s32 imm)1904{1905EncodeLoadStoreUnscaled(1, Is64Bit(Rt) ? 2 : 3, Rt, Rn, imm);1906}1907void ARM64XEmitter::STUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm)1908{1909EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 0, Rt, Rn, imm);1910}1911void ARM64XEmitter::LDUR(ARM64Reg Rt, ARM64Reg Rn, s32 imm)1912{1913EncodeLoadStoreUnscaled(Is64Bit(Rt) ? 3 : 2, 1, Rt, Rn, imm);1914}1915void ARM64XEmitter::LDURSW(ARM64Reg Rt, ARM64Reg Rn, s32 imm)1916{1917_assert_msg_(!Is64Bit(Rt), "%s must have a 64bit destination register!", __FUNCTION__);1918EncodeLoadStoreUnscaled(2, 2, Rt, Rn, imm);1919}19201921// Address of label/page PC-relative1922void ARM64XEmitter::ADR(ARM64Reg Rd, s32 imm)1923{1924EncodeAddressInst(0, Rd, imm);1925}1926void ARM64XEmitter::ADRP(ARM64Reg Rd, s32 imm)1927{1928EncodeAddressInst(1, Rd, imm >> 12);1929}19301931// LLVM is unhappy about the regular abs function, so here we go.1932inline int64_t abs64(int64_t x) {1933return x >= 0 ? x : -x;1934}19351936static int Count(const bool part[4]) {1937int cnt = 0;1938for (int i = 0; i < 4; i++) {1939if (part[i])1940cnt++;1941}1942return cnt;1943}19441945// Wrapper around MOVZ+MOVK (and later MOVN)1946void ARM64XEmitter::MOVI2R(ARM64Reg Rd, u64 imm, bool optimize)1947{1948unsigned int parts = Is64Bit(Rd) ? 4 : 2;1949bool upload_part[4];19501951// Always start with a movz! Kills the dependency on the register.1952bool use_movz = true;19531954if (!imm) {1955// Zero immediate, just clear the register. EOR is pointless when we have MOVZ, which looks clearer in disasm too.1956MOVZ(Rd, 0, SHIFT_0);1957return;1958}19591960if ((Is64Bit(Rd) && imm == std::numeric_limits<u64>::max()) ||1961(!Is64Bit(Rd) && imm == std::numeric_limits<u32>::max()))1962{1963// Max unsigned value (or if signed, -1)1964// Set to ~ZR1965ARM64Reg ZR = Is64Bit(Rd) ? SP : WSP;1966ORN(Rd, ZR, ZR, ArithOption(ZR, ST_LSL, 0));1967return;1968}19691970// TODO: Make some more systemic use of MOVN, but this will take care of most cases.1971// Small negative integer. Use MOVN1972if (!Is64Bit(Rd) && (imm | 0xFFFF0000) == imm) {1973MOVN(Rd, (u32)(~imm), SHIFT_0);1974return;1975}197619771978// XXX: Use MOVN when possible.1979// XXX: Optimize more1980// XXX: Support rotating immediates to save instructions1981if (optimize)1982{1983for (unsigned int i = 0; i < parts; ++i)1984{1985if ((imm >> (i * 16)) & 0xFFFF)1986upload_part[i] = 1;1987}1988}19891990u64 aligned_pc = (u64)GetCodePointer() & ~0xFFF;1991s64 aligned_offset = (s64)imm - (s64)aligned_pc;1992if (Count(upload_part) > 1 && abs64(aligned_offset) < 0x7FFFFFFFLL)1993{1994// Immediate we are loading is within 4GB of our aligned range1995// Most likely a address that we can load in one or two instructions1996if (!(abs64(aligned_offset) & 0xFFF))1997{1998// Aligned ADR1999ADRP(Rd, (s32)aligned_offset);2000return;2001}2002else2003{2004// If the address is within 1MB of PC we can load it in a single instruction still2005s64 offset = (s64)imm - (s64)GetCodePointer();2006if (offset >= -0xFFFFF && offset <= 0xFFFFF)2007{2008ADR(Rd, (s32)offset);2009return;2010}2011else2012{2013ADRP(Rd, (s32)(aligned_offset & ~0xFFF));2014ADD(Rd, Rd, imm & 0xFFF);2015return;2016}2017}2018}20192020for (unsigned i = 0; i < parts; ++i)2021{2022if (use_movz && upload_part[i])2023{2024MOVZ(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);2025use_movz = false;2026}2027else2028{2029if (upload_part[i] || !optimize)2030MOVK(Rd, (imm >> (i * 16)) & 0xFFFF, (ShiftAmount)i);2031}2032}2033}20342035void ARM64XEmitter::PUSH(ARM64Reg Rd) {2036STR(INDEX_PRE, Rd, SP, -16);2037}20382039void ARM64XEmitter::POP(ARM64Reg Rd) {2040LDR(INDEX_POST, Rd, SP, 16);2041}20422043void ARM64XEmitter::PUSH2(ARM64Reg Rd, ARM64Reg Rn) {2044STP(INDEX_PRE, Rd, Rn, SP, -16);2045}20462047void ARM64XEmitter::POP2(ARM64Reg Rd, ARM64Reg Rn) {2048LDP(INDEX_POST, Rd, Rn, SP, 16);2049}20502051// Float Emitter2052void ARM64FloatEmitter::EmitLoadStoreImmediate(u8 size, u32 opc, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)2053{2054Rt = DecodeReg(Rt);2055Rn = DecodeReg(Rn);2056u32 encoded_size = 0;2057u32 encoded_imm = 0;20582059if (size == 8)2060encoded_size = 0;2061else if (size == 16)2062encoded_size = 1;2063else if (size == 32)2064encoded_size = 2;2065else if (size == 64)2066encoded_size = 3;2067else if (size == 128)2068encoded_size = 0;20692070if (type == INDEX_UNSIGNED)2071{2072_assert_msg_(!(imm & ((size - 1) >> 3)), "%s(INDEX_UNSIGNED) immediate offset must be aligned to size! (%d) (%p)", __FUNCTION__, imm, m_emit->GetCodePointer());2073_assert_msg_(imm >= 0, "%s(INDEX_UNSIGNED) immediate offset must be positive!", __FUNCTION__);2074if (size == 16)2075imm >>= 1;2076else if (size == 32)2077imm >>= 2;2078else if (size == 64)2079imm >>= 3;2080else if (size == 128)2081imm >>= 4;2082encoded_imm = (imm & 0xFFF);2083}2084else2085{2086_assert_msg_(!(imm < -256 || imm > 255), "%s immediate offset must be within range of -256 to 255!", __FUNCTION__);2087encoded_imm = (imm & 0x1FF) << 2;2088if (type == INDEX_POST)2089encoded_imm |= 1;2090else2091encoded_imm |= 3;2092}20932094Write32((encoded_size << 30) | (0xF << 26) | (type == INDEX_UNSIGNED ? (1 << 24) : 0) | \2095(size == 128 ? (1 << 23) : 0) | (opc << 22) | (encoded_imm << 10) | (Rn << 5) | Rt);2096}20972098void ARM64FloatEmitter::EmitScalar2Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)2099{2100_assert_msg_(!IsQuad(Rd), "%s only supports double and single registers!", __FUNCTION__);2101Rd = DecodeReg(Rd);2102Rn = DecodeReg(Rn);2103Rm = DecodeReg(Rm);21042105Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (Rm << 16) | \2106(opcode << 12) | (1 << 11) | (Rn << 5) | Rd);2107}21082109void ARM64FloatEmitter::EmitThreeSame(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)2110{2111_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);2112bool quad = IsQuad(Rd);2113Rd = DecodeReg(Rd);2114Rn = DecodeReg(Rn);2115Rm = DecodeReg(Rm);21162117Write32((quad << 30) | (U << 29) | (0x71 << 21) | (size << 22) | \2118(Rm << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);2119}21202121void ARM64FloatEmitter::EmitCopy(bool Q, u32 op, u32 imm5, u32 imm4, ARM64Reg Rd, ARM64Reg Rn)2122{2123Rd = DecodeReg(Rd);2124Rn = DecodeReg(Rn);21252126Write32((Q << 30) | (op << 29) | (0x7 << 25) | (imm5 << 16) | (imm4 << 11) | \2127(1 << 10) | (Rn << 5) | Rd);2128}21292130void ARM64FloatEmitter::EmitScalarPairwise(bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {2131Rd = DecodeReg(Rd);2132Rn = DecodeReg(Rn);21332134Write32((1 << 30) | (U << 29) | (0b111100011 << 20) | (size << 22) | (opcode << 12) | (1 << 11) | (Rn << 5) | Rd);2135}21362137void ARM64FloatEmitter::Emit2RegMisc(bool Q, bool U, u32 size, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)2138{2139_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);2140Rd = DecodeReg(Rd);2141Rn = DecodeReg(Rn);21422143Write32((Q << 30) | (U << 29) | (0x71 << 21) | (size << 22) | \2144(opcode << 12) | (1 << 11) | (Rn << 5) | Rd);2145}21462147void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn)2148{2149_assert_msg_(!IsSingle(Rt), "%s doesn't support singles!", __FUNCTION__);2150bool quad = IsQuad(Rt);2151Rt = DecodeReg(Rt);2152Rn = DecodeReg(Rn);21532154Write32((quad << 30) | (0xD << 24) | (L << 22) | (R << 21) | (opcode << 13) | \2155(S << 12) | (size << 10) | (Rn << 5) | Rt);2156}21572158void ARM64FloatEmitter::EmitLoadStoreSingleStructure(bool L, bool R, u32 opcode, bool S, u32 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)2159{2160_assert_msg_(!IsSingle(Rt), "%s doesn't support singles!", __FUNCTION__);2161bool quad = IsQuad(Rt);2162Rt = DecodeReg(Rt);2163Rn = DecodeReg(Rn);2164Rm = DecodeReg(Rm);21652166Write32((quad << 30) | (0x1B << 23) | (L << 22) | (R << 21) | (Rm << 16) | \2167(opcode << 13) | (S << 12) | (size << 10) | (Rn << 5) | Rt);2168}21692170void ARM64FloatEmitter::Emit1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)2171{2172_assert_msg_(!IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);2173Rd = DecodeReg(Rd);2174Rn = DecodeReg(Rn);21752176Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (opcode << 15) | \2177(1 << 14) | (Rn << 5) | Rd);2178}21792180void ARM64FloatEmitter::EmitConversion(bool sf, bool S, u32 type, u32 rmode, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)2181{2182_assert_msg_(Rn <= SP, "%s only supports GPR as source!", __FUNCTION__);2183Rd = DecodeReg(Rd);2184Rn = DecodeReg(Rn);21852186Write32((sf << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | (rmode << 19) | \2187(opcode << 16) | (Rn << 5) | Rd);2188}21892190void ARM64FloatEmitter::EmitConvertScalarToInt(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round, bool sign)2191{2192_dbg_assert_msg_(IsScalar(Rn), "fcvts: Rn must be floating point");2193if (IsGPR(Rd)) {2194// Use the encoding that transfers the result to a GPR.2195bool sf = Is64Bit(Rd);2196int type = IsDouble(Rn) ? 1 : 0;2197Rd = DecodeReg(Rd);2198Rn = DecodeReg(Rn);2199int opcode = (sign ? 1 : 0);2200int rmode = 0;2201switch (round) {2202case ROUND_A: rmode = 0; opcode |= 4; break;2203case ROUND_P: rmode = 1; break;2204case ROUND_M: rmode = 2; break;2205case ROUND_Z: rmode = 3; break;2206case ROUND_N: rmode = 0; break;2207}2208EmitConversion2(sf, 0, true, type, rmode, opcode, 0, Rd, Rn);2209}2210else2211{2212// Use the encoding (vector, single) that keeps the result in the fp register.2213int sz = IsDouble(Rn);2214Rd = DecodeReg(Rd);2215Rn = DecodeReg(Rn);2216int opcode = 0;2217switch (round) {2218case ROUND_A: opcode = 0x1C; break;2219case ROUND_N: opcode = 0x1A; break;2220case ROUND_M: opcode = 0x1B; break;2221case ROUND_P: opcode = 0x1A; sz |= 2; break;2222case ROUND_Z: opcode = 0x1B; sz |= 2; break;2223}2224Write32((0x5E << 24) | (sign << 29) | (sz << 22) | (1 << 21) | (opcode << 12) | (2 << 10) | (Rn << 5) | Rd);2225}2226}22272228void ARM64FloatEmitter::FCVTS(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {2229EmitConvertScalarToInt(Rd, Rn, round, false);2230}22312232void ARM64FloatEmitter::FCVTU(ARM64Reg Rd, ARM64Reg Rn, RoundingMode round) {2233EmitConvertScalarToInt(Rd, Rn, round, true);2234}22352236void ARM64FloatEmitter::FCVTZS(ARM64Reg Rd, ARM64Reg Rn, int scale) {2237if (IsScalar(Rd)) {2238int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;2239Rd = DecodeReg(Rd);2240Rn = DecodeReg(Rn);22412242Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);2243} else {2244bool sf = Is64Bit(Rd);2245u32 type = 0;2246if (IsDouble(Rd))2247type = 1;2248int rmode = 3;2249int opcode = 0;22502251Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);22522253}2254}22552256void ARM64FloatEmitter::FCVTZU(ARM64Reg Rd, ARM64Reg Rn, int scale) {2257if (IsScalar(Rd)) {2258int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;2259Rd = DecodeReg(Rd);2260Rn = DecodeReg(Rn);22612262Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1F << 11) | (1 << 10) | (Rn << 5) | Rd);2263} else {2264bool sf = Is64Bit(Rd);2265u32 type = 0;2266if (IsDouble(Rd))2267type = 1;2268int rmode = 3;2269int opcode = 1;22702271Write32((sf << 31) | (0 << 29) | (0x1E << 24) | (type << 22) | (rmode << 19) | (opcode << 16) | (scale << 10) | (Rn << 5) | Rd);2272}2273}22742275void ARM64FloatEmitter::EmitConversion2(bool sf, bool S, bool direction, u32 type, u32 rmode, u32 opcode, int scale, ARM64Reg Rd, ARM64Reg Rn)2276{2277Rd = DecodeReg(Rd);2278Rn = DecodeReg(Rn);22792280Write32((sf << 31) | (S << 29) | (0xF0 << 21) | (direction << 21) | (type << 22) | (rmode << 19) | \2281(opcode << 16) | (scale << 10) | (Rn << 5) | Rd);2282}22832284void ARM64FloatEmitter::EmitCompare(bool M, bool S, u32 op, u32 opcode2, ARM64Reg Rn, ARM64Reg Rm)2285{2286_assert_msg_(!IsQuad(Rn), "%s doesn't support vector!", __FUNCTION__);2287bool is_double = IsDouble(Rn);22882289Rn = DecodeReg(Rn);2290Rm = DecodeReg(Rm);22912292Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | \2293(op << 14) | (1 << 13) | (Rn << 5) | opcode2);2294}22952296void ARM64FloatEmitter::EmitCondSelect(bool M, bool S, CCFlags cond, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)2297{2298_assert_msg_(!IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);2299bool is_double = IsDouble(Rd);23002301Rd = DecodeReg(Rd);2302Rn = DecodeReg(Rn);2303Rm = DecodeReg(Rm);23042305Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | \2306(cond << 12) | (3 << 10) | (Rn << 5) | Rd);2307}23082309void ARM64FloatEmitter::EmitCondCompare(bool M, bool S, CCFlags cond, int op, u8 nzcv, ARM64Reg Rn, ARM64Reg Rm) {2310_assert_msg_(!IsQuad(Rn), "%s doesn't support vector!", __FUNCTION__);2311bool is_double = IsDouble(Rn);23122313Rn = DecodeReg(Rn);2314Rm = DecodeReg(Rm);23152316Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (Rm << 16) | \2317(cond << 12) | (1 << 10) | (Rn << 5) | (op << 4) | nzcv);2318}23192320void ARM64FloatEmitter::EmitPermute(u32 size, u32 op, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)2321{2322_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);23232324bool quad = IsQuad(Rd);23252326u32 encoded_size = 0;2327if (size == 16)2328encoded_size = 1;2329else if (size == 32)2330encoded_size = 2;2331else if (size == 64)2332encoded_size = 3;23332334Rd = DecodeReg(Rd);2335Rn = DecodeReg(Rn);2336Rm = DecodeReg(Rm);23372338Write32((quad << 30) | (7 << 25) | (encoded_size << 22) | (Rm << 16) | (op << 12) | \2339(1 << 11) | (Rn << 5) | Rd);2340}23412342void ARM64FloatEmitter::EmitScalarImm(bool M, bool S, u32 type, u32 imm5, ARM64Reg Rd, u32 imm8)2343{2344_assert_msg_(!IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);23452346bool is_double = !IsSingle(Rd);23472348Rd = DecodeReg(Rd);23492350Write32((M << 31) | (S << 29) | (0xF1 << 21) | (is_double << 22) | (type << 22) | \2351(imm8 << 13) | (1 << 12) | (imm5 << 5) | Rd);2352}23532354void ARM64FloatEmitter::EmitShiftImm(bool Q, bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)2355{2356_assert_msg_(immh, "%s bad encoding! Can't have zero immh", __FUNCTION__);23572358Rd = DecodeReg(Rd);2359Rn = DecodeReg(Rn);23602361Write32((Q << 30) | (U << 29) | (0xF << 24) | (immh << 19) | (immb << 16) | \2362(opcode << 11) | (1 << 10) | (Rn << 5) | Rd);2363}23642365void ARM64FloatEmitter::EmitScalarShiftImm(bool U, u32 immh, u32 immb, u32 opcode, ARM64Reg Rd, ARM64Reg Rn) {2366Rd = DecodeReg(Rd);2367Rn = DecodeReg(Rn);23682369Write32((2 << 30) | (U << 29) | (0x3E << 23) | (immh << 19) | (immb << 16) | (opcode << 11) | (1 << 10) | (Rn << 5) | Rd);2370}23712372void ARM64FloatEmitter::EmitLoadStoreMultipleStructure(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn)2373{2374bool quad = IsQuad(Rt);2375u32 encoded_size = 0;23762377if (size == 16)2378encoded_size = 1;2379else if (size == 32)2380encoded_size = 2;2381else if (size == 64)2382encoded_size = 3;23832384Rt = DecodeReg(Rt);2385Rn = DecodeReg(Rn);23862387Write32((quad << 30) | (3 << 26) | (L << 22) | (opcode << 12) | \2388(encoded_size << 10) | (Rn << 5) | Rt);2389}23902391void ARM64FloatEmitter::EmitLoadStoreMultipleStructurePost(u32 size, bool L, u32 opcode, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)2392{2393bool quad = IsQuad(Rt);2394u32 encoded_size = 0;23952396if (size == 16)2397encoded_size = 1;2398else if (size == 32)2399encoded_size = 2;2400else if (size == 64)2401encoded_size = 3;24022403Rt = DecodeReg(Rt);2404Rn = DecodeReg(Rn);2405Rm = DecodeReg(Rm);24062407Write32((quad << 30) | (0x19 << 23) | (L << 22) | (Rm << 16) | (opcode << 12) | \2408(encoded_size << 10) | (Rn << 5) | Rt);24092410}24112412void ARM64FloatEmitter::EmitScalar1Source(bool M, bool S, u32 type, u32 opcode, ARM64Reg Rd, ARM64Reg Rn)2413{2414_assert_msg_(!IsQuad(Rd), "%s doesn't support vector!", __FUNCTION__);24152416Rd = DecodeReg(Rd);2417Rn = DecodeReg(Rn);24182419Write32((M << 31) | (S << 29) | (0xF1 << 21) | (type << 22) | \2420(opcode << 15) | (1 << 14) | (Rn << 5) | Rd);2421}24222423void ARM64FloatEmitter::EmitVectorxElement(bool U, u32 size, bool L, u32 opcode, bool H, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)2424{2425bool quad = IsQuad(Rd);24262427Rd = DecodeReg(Rd);2428Rn = DecodeReg(Rn);2429Rm = DecodeReg(Rm);24302431Write32((quad << 30) | (U << 29) | (0xF << 24) | (size << 22) | (L << 21) | \2432(Rm << 16) | (opcode << 12) | (H << 11) | (Rn << 5) | Rd);2433}24342435void ARM64FloatEmitter::EmitLoadStoreUnscaled(u32 size, u32 op, ARM64Reg Rt, ARM64Reg Rn, s32 imm)2436{2437_assert_msg_(!(imm < -256 || imm > 255), "%s received too large offset: %d", __FUNCTION__, imm);2438Rt = DecodeReg(Rt);2439Rn = DecodeReg(Rn);24402441Write32((size << 30) | (0xF << 26) | (op << 22) | ((imm & 0x1FF) << 12) | (Rn << 5) | Rt);2442}24432444void ARM64FloatEmitter::EncodeLoadStorePair(u32 size, bool load, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)2445{2446u32 type_encode = 0;2447u32 opc = 0;24482449switch (type)2450{2451case INDEX_SIGNED:2452type_encode = 2;2453break;2454case INDEX_POST:2455type_encode = 1;2456break;2457case INDEX_PRE:2458type_encode = 3;2459break;2460case INDEX_UNSIGNED:2461_assert_msg_(false, "%s doesn't support INDEX_UNSIGNED!", __FUNCTION__);2462break;2463}24642465if (size == 128)2466{2467_assert_msg_(!(imm & 0xF), "%s received invalid offset 0x%x!", __FUNCTION__, imm);2468opc = 2;2469imm >>= 4;2470}2471else if (size == 64)2472{2473_assert_msg_(!(imm & 0x7), "%s received invalid offset 0x%x!", __FUNCTION__, imm);2474opc = 1;2475imm >>= 3;2476}2477else if (size == 32)2478{2479_assert_msg_(!(imm & 0x3), "%s received invalid offset 0x%x!", __FUNCTION__, imm);2480opc = 0;2481imm >>= 2;2482}24832484Rt = DecodeReg(Rt);2485Rt2 = DecodeReg(Rt2);2486Rn = DecodeReg(Rn);24872488Write32((opc << 30) | (0xB << 26) | (type_encode << 23) | (load << 22) | \2489((imm & 0x7F) << 15) | (Rt2 << 10) | (Rn << 5) | Rt);24902491}24922493void ARM64FloatEmitter::EncodeLoadStoreRegisterOffset(u32 size, bool load, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)2494{2495_assert_msg_(Rm.GetType() == ArithOption::TYPE_EXTENDEDREG, "%s must contain an extended reg as Rm!", __FUNCTION__);24962497u32 encoded_size = 0;2498u32 encoded_op = 0;24992500if (size == 8)2501{2502encoded_size = 0;2503encoded_op = 0;2504}2505else if (size == 16)2506{2507encoded_size = 1;2508encoded_op = 0;2509}2510else if (size == 32)2511{2512encoded_size = 2;2513encoded_op = 0;2514}2515else if (size == 64)2516{2517encoded_size = 3;2518encoded_op = 0;2519}2520else if (size == 128)2521{2522encoded_size = 0;2523encoded_op = 2;2524}25252526if (load)2527encoded_op |= 1;25282529Rt = DecodeReg(Rt);2530Rn = DecodeReg(Rn);2531ARM64Reg decoded_Rm = DecodeReg(Rm.GetReg());25322533Write32((encoded_size << 30) | (encoded_op << 22) | (0x1E1 << 21) | (decoded_Rm << 16) | \2534Rm.GetData() | (1 << 11) | (Rn << 5) | Rt);2535}25362537void ARM64FloatEmitter::LDR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)2538{2539EmitLoadStoreImmediate(size, 1, type, Rt, Rn, imm);2540}2541void ARM64FloatEmitter::STR(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rn, s32 imm)2542{2543EmitLoadStoreImmediate(size, 0, type, Rt, Rn, imm);2544}25452546// Loadstore unscaled2547void ARM64FloatEmitter::LDUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm)2548{2549u32 encoded_size = 0;2550u32 encoded_op = 0;25512552if (size == 8)2553{2554encoded_size = 0;2555encoded_op = 1;2556}2557else if (size == 16)2558{2559encoded_size = 1;2560encoded_op = 1;2561}2562else if (size == 32)2563{2564encoded_size = 2;2565encoded_op = 1;2566}2567else if (size == 64)2568{2569encoded_size = 3;2570encoded_op = 1;2571}2572else if (size == 128)2573{2574encoded_size = 0;2575encoded_op = 3;2576}25772578EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);2579}2580void ARM64FloatEmitter::STUR(u8 size, ARM64Reg Rt, ARM64Reg Rn, s32 imm)2581{2582u32 encoded_size = 0;2583u32 encoded_op = 0;25842585if (size == 8)2586{2587encoded_size = 0;2588encoded_op = 0;2589}2590else if (size == 16)2591{2592encoded_size = 1;2593encoded_op = 0;2594}2595else if (size == 32)2596{2597encoded_size = 2;2598encoded_op = 0;2599}2600else if (size == 64)2601{2602encoded_size = 3;2603encoded_op = 0;2604}2605else if (size == 128)2606{2607encoded_size = 0;2608encoded_op = 2;2609}26102611EmitLoadStoreUnscaled(encoded_size, encoded_op, Rt, Rn, imm);26122613}26142615// Loadstore single structure2616void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)2617{2618bool S = 0;2619u32 opcode = 0;2620u32 encoded_size = 0;2621ARM64Reg encoded_reg = INVALID_REG;26222623if (size == 8)2624{2625S = (index & 4) != 0;2626opcode = 0;2627encoded_size = index & 3;2628if (index & 8)2629encoded_reg = EncodeRegToQuad(Rt);2630else2631encoded_reg = EncodeRegToDouble(Rt);26322633}2634else if (size == 16)2635{2636S = (index & 2) != 0;2637opcode = 2;2638encoded_size = (index & 1) << 1;2639if (index & 4)2640encoded_reg = EncodeRegToQuad(Rt);2641else2642encoded_reg = EncodeRegToDouble(Rt);26432644}2645else if (size == 32)2646{2647S = (index & 1) != 0;2648opcode = 4;2649encoded_size = 0;2650if (index & 2)2651encoded_reg = EncodeRegToQuad(Rt);2652else2653encoded_reg = EncodeRegToDouble(Rt);2654}2655else if (size == 64)2656{2657S = 0;2658opcode = 4;2659encoded_size = 1;2660if (index == 1)2661encoded_reg = EncodeRegToQuad(Rt);2662else2663encoded_reg = EncodeRegToDouble(Rt);2664}26652666EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn);2667}26682669void ARM64FloatEmitter::LD1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)2670{2671bool S = 0;2672u32 opcode = 0;2673u32 encoded_size = 0;2674ARM64Reg encoded_reg = INVALID_REG;26752676if (size == 8)2677{2678S = (index & 4) != 0;2679opcode = 0;2680encoded_size = index & 3;2681if (index & 8)2682encoded_reg = EncodeRegToQuad(Rt);2683else2684encoded_reg = EncodeRegToDouble(Rt);26852686}2687else if (size == 16)2688{2689S = (index & 2) != 0;2690opcode = 2;2691encoded_size = (index & 1) << 1;2692if (index & 4)2693encoded_reg = EncodeRegToQuad(Rt);2694else2695encoded_reg = EncodeRegToDouble(Rt);26962697}2698else if (size == 32)2699{2700S = (index & 1) != 0;2701opcode = 4;2702encoded_size = 0;2703if (index & 2)2704encoded_reg = EncodeRegToQuad(Rt);2705else2706encoded_reg = EncodeRegToDouble(Rt);2707}2708else if (size == 64)2709{2710S = 0;2711opcode = 4;2712encoded_size = 1;2713if (index == 1)2714encoded_reg = EncodeRegToQuad(Rt);2715else2716encoded_reg = EncodeRegToDouble(Rt);2717}27182719EmitLoadStoreSingleStructure(1, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);2720}27212722void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn)2723{2724EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn);2725}2726void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn)2727{2728EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn);2729}2730void ARM64FloatEmitter::LD1R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)2731{2732EmitLoadStoreSingleStructure(1, 0, 6, 0, size >> 4, Rt, Rn, Rm);2733}2734void ARM64FloatEmitter::LD2R(u8 size, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)2735{2736EmitLoadStoreSingleStructure(1, 1, 6, 0, size >> 4, Rt, Rn, Rm);2737}27382739void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn)2740{2741bool S = 0;2742u32 opcode = 0;2743u32 encoded_size = 0;2744ARM64Reg encoded_reg = INVALID_REG;27452746if (size == 8)2747{2748S = (index & 4) != 0;2749opcode = 0;2750encoded_size = index & 3;2751if (index & 8)2752encoded_reg = EncodeRegToQuad(Rt);2753else2754encoded_reg = EncodeRegToDouble(Rt);27552756}2757else if (size == 16)2758{2759S = (index & 2) != 0;2760opcode = 2;2761encoded_size = (index & 1) << 1;2762if (index & 4)2763encoded_reg = EncodeRegToQuad(Rt);2764else2765encoded_reg = EncodeRegToDouble(Rt);27662767}2768else if (size == 32)2769{2770S = (index & 1) != 0;2771opcode = 4;2772encoded_size = 0;2773if (index & 2)2774encoded_reg = EncodeRegToQuad(Rt);2775else2776encoded_reg = EncodeRegToDouble(Rt);2777}2778else if (size == 64)2779{2780S = 0;2781opcode = 4;2782encoded_size = 1;2783if (index == 1)2784encoded_reg = EncodeRegToQuad(Rt);2785else2786encoded_reg = EncodeRegToDouble(Rt);2787}27882789EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn);2790}27912792void ARM64FloatEmitter::ST1(u8 size, ARM64Reg Rt, u8 index, ARM64Reg Rn, ARM64Reg Rm)2793{2794bool S = 0;2795u32 opcode = 0;2796u32 encoded_size = 0;2797ARM64Reg encoded_reg = INVALID_REG;27982799if (size == 8)2800{2801S = (index & 4) != 0;2802opcode = 0;2803encoded_size = index & 3;2804if (index & 8)2805encoded_reg = EncodeRegToQuad(Rt);2806else2807encoded_reg = EncodeRegToDouble(Rt);28082809}2810else if (size == 16)2811{2812S = (index & 2) != 0;2813opcode = 2;2814encoded_size = (index & 1) << 1;2815if (index & 4)2816encoded_reg = EncodeRegToQuad(Rt);2817else2818encoded_reg = EncodeRegToDouble(Rt);28192820}2821else if (size == 32)2822{2823S = (index & 1) != 0;2824opcode = 4;2825encoded_size = 0;2826if (index & 2)2827encoded_reg = EncodeRegToQuad(Rt);2828else2829encoded_reg = EncodeRegToDouble(Rt);2830}2831else if (size == 64)2832{2833S = 0;2834opcode = 4;2835encoded_size = 1;2836if (index == 1)2837encoded_reg = EncodeRegToQuad(Rt);2838else2839encoded_reg = EncodeRegToDouble(Rt);2840}28412842EmitLoadStoreSingleStructure(0, 0, opcode, S, encoded_size, encoded_reg, Rn, Rm);2843}28442845// Loadstore multiple structure2846void ARM64FloatEmitter::LD1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)2847{2848_assert_msg_(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);2849u32 opcode = 0;2850if (count == 1)2851opcode = 7;2852else if (count == 2)2853opcode = 0xA;2854else if (count == 3)2855opcode = 6;2856else if (count == 4)2857opcode = 2;2858EmitLoadStoreMultipleStructure(size, 1, opcode, Rt, Rn);2859}2860void ARM64FloatEmitter::LD1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)2861{2862_assert_msg_(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);2863_assert_msg_(type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);28642865u32 opcode = 0;2866if (count == 1)2867opcode = 7;2868else if (count == 2)2869opcode = 0xA;2870else if (count == 3)2871opcode = 6;2872else if (count == 4)2873opcode = 2;2874EmitLoadStoreMultipleStructurePost(size, 1, opcode, Rt, Rn, Rm);2875}2876void ARM64FloatEmitter::ST1(u8 size, u8 count, ARM64Reg Rt, ARM64Reg Rn)2877{2878_assert_msg_(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);2879u32 opcode = 0;2880if (count == 1)2881opcode = 7;2882else if (count == 2)2883opcode = 0xA;2884else if (count == 3)2885opcode = 6;2886else if (count == 4)2887opcode = 2;2888EmitLoadStoreMultipleStructure(size, 0, opcode, Rt, Rn);2889}2890void ARM64FloatEmitter::ST1(u8 size, u8 count, IndexType type, ARM64Reg Rt, ARM64Reg Rn, ARM64Reg Rm)2891{2892_assert_msg_(!(count == 0 || count > 4), "%s must have a count of 1 to 4 registers!", __FUNCTION__);2893_assert_msg_(type == INDEX_POST, "%s only supports post indexing!", __FUNCTION__);28942895u32 opcode = 0;2896if (count == 1)2897opcode = 7;2898else if (count == 2)2899opcode = 0xA;2900else if (count == 3)2901opcode = 6;2902else if (count == 4)2903opcode = 2;2904EmitLoadStoreMultipleStructurePost(size, 0, opcode, Rt, Rn, Rm);2905}29062907// Scalar - 1 Source2908void ARM64FloatEmitter::FMOV(ARM64Reg Rd, ARM64Reg Rn, bool top)2909{2910if (IsScalar(Rd) && IsScalar(Rn)) {2911EmitScalar1Source(0, 0, IsDouble(Rd), 0, Rd, Rn);2912} else {2913_assert_msg_(!IsQuad(Rd) && !IsQuad(Rn), "FMOV can't move to/from quads");2914int rmode = 0;2915int opcode = 6;2916int sf = 0;2917if (IsSingle(Rd) && !Is64Bit(Rn) && !top) {2918// GPR to scalar single2919opcode |= 1;2920} else if (!Is64Bit(Rd) && IsSingle(Rn) && !top) {2921// Scalar single to GPR - defaults are correct2922} else {2923// TODO2924_assert_msg_(false, "FMOV: Unhandled case");2925}2926Rd = DecodeReg(Rd);2927Rn = DecodeReg(Rn);2928Write32((sf << 31) | (0x1e2 << 20) | (rmode << 19) | (opcode << 16) | (Rn << 5) | Rd);2929}2930}29312932// Loadstore paired2933void ARM64FloatEmitter::LDP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)2934{2935EncodeLoadStorePair(size, true, type, Rt, Rt2, Rn, imm);2936}2937void ARM64FloatEmitter::STP(u8 size, IndexType type, ARM64Reg Rt, ARM64Reg Rt2, ARM64Reg Rn, s32 imm)2938{2939EncodeLoadStorePair(size, false, type, Rt, Rt2, Rn, imm);2940}29412942// Loadstore register offset2943void ARM64FloatEmitter::STR(u8 size, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)2944{2945EncodeLoadStoreRegisterOffset(size, false, Rt, Rn, Rm);2946}2947void ARM64FloatEmitter::LDR(u8 size, ARM64Reg Rt, ARM64Reg Rn, const ArithOption &Rm)2948{2949EncodeLoadStoreRegisterOffset(size, true, Rt, Rn, Rm);2950}29512952void ARM64FloatEmitter::FABS(ARM64Reg Rd, ARM64Reg Rn)2953{2954EmitScalar1Source(0, 0, IsDouble(Rd), 1, Rd, Rn);2955}2956void ARM64FloatEmitter::FNEG(ARM64Reg Rd, ARM64Reg Rn)2957{2958EmitScalar1Source(0, 0, IsDouble(Rd), 2, Rd, Rn);2959}2960void ARM64FloatEmitter::FSQRT(ARM64Reg Rd, ARM64Reg Rn)2961{2962EmitScalar1Source(0, 0, IsDouble(Rd), 3, Rd, Rn);2963}29642965// Scalar - pairwise2966void ARM64FloatEmitter::FADDP(ARM64Reg Rd, ARM64Reg Rn) {2967EmitScalarPairwise(1, IsDouble(Rd), 0b01101, Rd, Rn);2968}2969void ARM64FloatEmitter::FMAXP(ARM64Reg Rd, ARM64Reg Rn) {2970EmitScalarPairwise(1, IsDouble(Rd), 0b01111, Rd, Rn);2971}2972void ARM64FloatEmitter::FMINP(ARM64Reg Rd, ARM64Reg Rn) {2973EmitScalarPairwise(1, IsDouble(Rd) ? 3 : 2, 0b01111, Rd, Rn);2974}2975void ARM64FloatEmitter::FMAXNMP(ARM64Reg Rd, ARM64Reg Rn) {2976EmitScalarPairwise(1, IsDouble(Rd), 0b01100, Rd, Rn);2977}2978void ARM64FloatEmitter::FMINNMP(ARM64Reg Rd, ARM64Reg Rn) {2979EmitScalarPairwise(1, IsDouble(Rd) ? 3 : 2, 0b01100, Rd, Rn);2980}29812982// Scalar - 2 Source2983void ARM64FloatEmitter::FADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)2984{2985EmitScalar2Source(0, 0, IsDouble(Rd), 2, Rd, Rn, Rm);2986}2987void ARM64FloatEmitter::FMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)2988{2989EmitScalar2Source(0, 0, IsDouble(Rd), 0, Rd, Rn, Rm);2990}2991void ARM64FloatEmitter::FSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)2992{2993EmitScalar2Source(0, 0, IsDouble(Rd), 3, Rd, Rn, Rm);2994}2995void ARM64FloatEmitter::FDIV(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)2996{2997EmitScalar2Source(0, 0, IsDouble(Rd), 1, Rd, Rn, Rm);2998}2999void ARM64FloatEmitter::FMAX(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3000{3001EmitScalar2Source(0, 0, IsDouble(Rd), 4, Rd, Rn, Rm);3002}3003void ARM64FloatEmitter::FMIN(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3004{3005EmitScalar2Source(0, 0, IsDouble(Rd), 5, Rd, Rn, Rm);3006}3007void ARM64FloatEmitter::FMAXNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3008{3009EmitScalar2Source(0, 0, IsDouble(Rd), 6, Rd, Rn, Rm);3010}3011void ARM64FloatEmitter::FMINNM(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3012{3013EmitScalar2Source(0, 0, IsDouble(Rd), 7, Rd, Rn, Rm);3014}3015void ARM64FloatEmitter::FNMUL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3016{3017EmitScalar2Source(0, 0, IsDouble(Rd), 8, Rd, Rn, Rm);3018}30193020void ARM64FloatEmitter::FMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {3021EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 0);3022}3023void ARM64FloatEmitter::FMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {3024EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 1);3025}3026void ARM64FloatEmitter::FNMADD(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {3027EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 2);3028}3029void ARM64FloatEmitter::FNMSUB(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra) {3030EmitScalar3Source(IsDouble(Rd), Rd, Rn, Rm, Ra, 3);3031}30323033void ARM64FloatEmitter::EmitScalar3Source(bool isDouble, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, ARM64Reg Ra, int opcode) {3034int type = isDouble ? 1 : 0;3035Rd = DecodeReg(Rd);3036Rn = DecodeReg(Rn);3037Rm = DecodeReg(Rm);3038Ra = DecodeReg(Ra);3039int o1 = opcode >> 1;3040int o0 = opcode & 1;3041m_emit->Write32((0x1F << 24) | (type << 22) | (o1 << 21) | (Rm << 16) | (o0 << 15) | (Ra << 10) | (Rn << 5) | Rd);3042}30433044// Scalar floating point immediate3045void ARM64FloatEmitter::FMOV(ARM64Reg Rd, uint8_t imm8)3046{3047EmitScalarImm(0, 0, 0, 0, Rd, imm8);3048}30493050// Vector3051void ARM64FloatEmitter::AND(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3052{3053EmitThreeSame(0, 0, 3, Rd, Rn, Rm);3054}3055void ARM64FloatEmitter::EOR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3056{3057EmitThreeSame(1, 0, 3, Rd, Rn, Rm);3058}3059void ARM64FloatEmitter::BSL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3060{3061EmitThreeSame(1, 1, 3, Rd, Rn, Rm);3062}3063void ARM64FloatEmitter::BIT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {3064EmitThreeSame(1, 2, 3, Rd, Rn, Rm);3065}3066void ARM64FloatEmitter::BIF(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {3067EmitThreeSame(1, 3, 3, Rd, Rn, Rm);3068}3069void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)3070{3071u32 imm5 = 0;30723073if (size == 8)3074{3075imm5 = 1;3076imm5 |= index << 1;3077}3078else if (size == 16)3079{3080imm5 = 2;3081imm5 |= index << 2;3082}3083else if (size == 32)3084{3085imm5 = 4;3086imm5 |= index << 3;3087}3088else if (size == 64)3089{3090imm5 = 8;3091imm5 |= index << 4;3092}30933094EmitCopy(IsQuad(Rd), 0, imm5, 0, Rd, Rn);3095}3096void ARM64FloatEmitter::FABS(u8 size, ARM64Reg Rd, ARM64Reg Rn)3097{3098Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xF, Rd, Rn);3099}3100void ARM64FloatEmitter::FADD(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3101{3102EmitThreeSame(0, size >> 6, 0x1A, Rd, Rn, Rm);3103}3104void ARM64FloatEmitter::FADDP(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {3105EmitThreeSame(1, size >> 6, 0x1A, Rd, Rn, Rm);3106}3107void ARM64FloatEmitter::FMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3108{3109EmitThreeSame(0, size >> 6, 0x1E, Rd, Rn, Rm);3110}3111void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3112{3113EmitThreeSame(0, size >> 6, 0x19, Rd, Rn, Rm);3114}3115void ARM64FloatEmitter::FMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3116{3117EmitThreeSame(0, 2 | size >> 6, 0x1E, Rd, Rn, Rm);3118}3119void ARM64FloatEmitter::FCVTL(u8 size, ARM64Reg Rd, ARM64Reg Rn)3120{3121Emit2RegMisc(false, 0, size >> 6, 0x17, Rd, Rn);3122}3123void ARM64FloatEmitter::FCVTL2(u8 size, ARM64Reg Rd, ARM64Reg Rn)3124{3125Emit2RegMisc(true, 0, size >> 6, 0x17, Rd, Rn);3126}3127void ARM64FloatEmitter::FCVTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)3128{3129Emit2RegMisc(IsQuad(Rd), 0, dest_size >> 5, 0x16, Rd, Rn);3130}3131void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn)3132{3133Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x1B, Rd, Rn);3134}3135void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn)3136{3137Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1B, Rd, Rn);3138}3139void ARM64FloatEmitter::FCVTZS(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {3140int imm = size * 2 - scale;3141EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0x1F, Rd, Rn);3142}3143void ARM64FloatEmitter::FCVTZU(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale) {3144int imm = size * 2 - scale;3145EmitShiftImm(IsQuad(Rd), true, imm >> 3, imm & 7, 0x1F, Rd, Rn);3146}3147void ARM64FloatEmitter::FDIV(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3148{3149EmitThreeSame(1, size >> 6, 0x1F, Rd, Rn, Rm);3150}3151void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3152{3153EmitThreeSame(1, size >> 6, 0x1B, Rd, Rn, Rm);3154}3155void ARM64FloatEmitter::UMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3156{3157EmitThreeSame(1, EncodeSize(size), 0xD, Rd, Rn, Rm);3158}3159void ARM64FloatEmitter::UMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3160{3161EmitThreeSame(1, EncodeSize(size), 0xC, Rd, Rn, Rm);3162}3163void ARM64FloatEmitter::SMIN(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3164{3165EmitThreeSame(0, EncodeSize(size), 0xD, Rd, Rn, Rm);3166}3167void ARM64FloatEmitter::SMAX(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3168{3169EmitThreeSame(0, EncodeSize(size), 0xC, Rd, Rn, Rm);3170}3171void ARM64FloatEmitter::FNEG(u8 size, ARM64Reg Rd, ARM64Reg Rn)3172{3173Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xF, Rd, Rn);3174}3175void ARM64FloatEmitter::FRSQRTE(u8 size, ARM64Reg Rd, ARM64Reg Rn)3176{3177Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0x1D, Rd, Rn);3178}3179void ARM64FloatEmitter::FSUB(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3180{3181EmitThreeSame(0, 2 | (size >> 6), 0x1A, Rd, Rn, Rm);3182}3183void ARM64FloatEmitter::FMLS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3184{3185EmitThreeSame(0, 2 | (size >> 6), 0x19, Rd, Rn, Rm);3186}3187void ARM64FloatEmitter::NOT(ARM64Reg Rd, ARM64Reg Rn)3188{3189Emit2RegMisc(IsQuad(Rd), 1, 0, 5, Rd, Rn);3190}3191void ARM64FloatEmitter::ORR(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3192{3193EmitThreeSame(0, 2, 3, Rd, Rn, Rm);3194}3195void ARM64FloatEmitter::REV16(u8 size, ARM64Reg Rd, ARM64Reg Rn)3196{3197Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 1, Rd, Rn);3198}3199void ARM64FloatEmitter::REV32(u8 size, ARM64Reg Rd, ARM64Reg Rn)3200{3201Emit2RegMisc(IsQuad(Rd), 1, size >> 4, 0, Rd, Rn);3202}3203void ARM64FloatEmitter::REV64(u8 size, ARM64Reg Rd, ARM64Reg Rn)3204{3205Emit2RegMisc(IsQuad(Rd), 0, size >> 4, 0, Rd, Rn);3206}3207void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)3208{3209Emit2RegMisc(IsQuad(Rd), 0, size >> 6, 0x1D, Rd, Rn);3210}3211void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn)3212{3213Emit2RegMisc(IsQuad(Rd), 1, size >> 6, 0x1D, Rd, Rn);3214}3215void ARM64FloatEmitter::SCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)3216{3217int imm = size * 2 - scale;3218EmitShiftImm(IsQuad(Rd), 0, imm >> 3, imm & 7, 0x1C, Rd, Rn);3219}3220void ARM64FloatEmitter::UCVTF(u8 size, ARM64Reg Rd, ARM64Reg Rn, int scale)3221{3222int imm = size * 2 - scale;3223EmitShiftImm(IsQuad(Rd), 1, imm >> 3, imm & 7, 0x1C, Rd, Rn);3224}3225void ARM64FloatEmitter::SQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)3226{3227Emit2RegMisc(false, 0, dest_size >> 4, 0x14, Rd, Rn);3228}3229void ARM64FloatEmitter::SQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)3230{3231Emit2RegMisc(true, 0, dest_size >> 4, 0x14, Rd, Rn);3232}3233void ARM64FloatEmitter::UQXTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)3234{3235Emit2RegMisc(false, 1, dest_size >> 4, 0x14, Rd, Rn);3236}3237void ARM64FloatEmitter::UQXTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)3238{3239Emit2RegMisc(true, 1, dest_size >> 4, 0x14, Rd, Rn);3240}3241void ARM64FloatEmitter::XTN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)3242{3243Emit2RegMisc(false, 0, dest_size >> 4, 0x12, Rd, Rn);3244}3245void ARM64FloatEmitter::XTN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn)3246{3247Emit2RegMisc(true, 0, dest_size >> 4, 0x12, Rd, Rn);3248}32493250void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {3251_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3252EmitThreeSame(true, size >> 4, 0b10001, Rd, Rn, Rm);3253}32543255void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {3256_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3257EmitThreeSame(false, size >> 4, 0b00111, Rd, Rn, Rm);3258}32593260void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {3261_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3262EmitThreeSame(false, size >> 4, 0b00110, Rd, Rn, Rm);3263}32643265void ARM64FloatEmitter::CMHI(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {3266_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3267EmitThreeSame(true, size >> 4, 0b00110, Rd, Rn, Rm);3268}32693270void ARM64FloatEmitter::CMHS(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {3271_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3272EmitThreeSame(true, size >> 4, 0b00111, Rd, Rn, Rm);3273}32743275void ARM64FloatEmitter::CMTST(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm) {3276_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3277EmitThreeSame(false, size >> 4, 0b10001, Rd, Rn, Rm);3278}32793280void ARM64FloatEmitter::CMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn) {3281_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3282Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01001, Rd, Rn);3283}32843285void ARM64FloatEmitter::CMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {3286_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3287Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01000, Rd, Rn);3288}32893290void ARM64FloatEmitter::CMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {3291_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3292Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01000, Rd, Rn);3293}32943295void ARM64FloatEmitter::CMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn) {3296_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3297Emit2RegMisc(IsQuad(Rd), true, size >> 4, 0b01001, Rd, Rn);3298}32993300void ARM64FloatEmitter::CMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn) {3301_assert_msg_(!IsQuad(Rd) || size != 64, "%s cannot be used for scalar double", __FUNCTION__);3302Emit2RegMisc(IsQuad(Rd), false, size >> 4, 0b01010, Rd, Rn);3303}33043305// Move3306void ARM64FloatEmitter::DUP(u8 size, ARM64Reg Rd, ARM64Reg Rn)3307{3308u32 imm5 = 0;33093310if (size == 8)3311imm5 = 1;3312else if (size == 16)3313imm5 = 2;3314else if (size == 32)3315imm5 = 4;3316else if (size == 64)3317imm5 = 8;33183319EmitCopy(IsQuad(Rd), 0, imm5, 1, Rd, Rn);33203321}3322void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index, ARM64Reg Rn)3323{3324u32 imm5 = 0;33253326if (size == 8)3327{3328imm5 = 1;3329imm5 |= index << 1;3330}3331else if (size == 16)3332{3333imm5 = 2;3334imm5 |= index << 2;3335}3336else if (size == 32)3337{3338imm5 = 4;3339imm5 |= index << 3;3340}3341else if (size == 64)3342{3343imm5 = 8;3344imm5 |= index << 4;3345}33463347EmitCopy(1, 0, imm5, 3, Rd, Rn);3348}3349void ARM64FloatEmitter::INS(u8 size, ARM64Reg Rd, u8 index1, ARM64Reg Rn, u8 index2)3350{3351u32 imm5 = 0, imm4 = 0;33523353if (size == 8)3354{3355imm5 = 1;3356imm5 |= index1 << 1;3357imm4 = index2;3358}3359else if (size == 16)3360{3361imm5 = 2;3362imm5 |= index1 << 2;3363imm4 = index2 << 1;3364}3365else if (size == 32)3366{3367imm5 = 4;3368imm5 |= index1 << 3;3369imm4 = index2 << 2;3370}3371else if (size == 64)3372{3373imm5 = 8;3374imm5 |= index1 << 4;3375imm4 = index2 << 3;3376}33773378EmitCopy(1, 1, imm5, imm4, Rd, Rn);3379}33803381void ARM64FloatEmitter::UMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)3382{3383bool b64Bit = Is64Bit(Rd);3384_assert_msg_(Rd < SP, "%s destination must be a GPR!", __FUNCTION__);3385_assert_msg_(!(b64Bit && size != 64), "%s must have a size of 64 when destination is 64bit!", __FUNCTION__);3386u32 imm5 = 0;33873388if (size == 8)3389{3390imm5 = 1;3391imm5 |= index << 1;3392}3393else if (size == 16)3394{3395imm5 = 2;3396imm5 |= index << 2;3397}3398else if (size == 32)3399{3400imm5 = 4;3401imm5 |= index << 3;3402}3403else if (size == 64)3404{3405imm5 = 8;3406imm5 |= index << 4;3407}34083409EmitCopy(b64Bit, 0, imm5, 7, Rd, Rn);3410}3411void ARM64FloatEmitter::SMOV(u8 size, ARM64Reg Rd, ARM64Reg Rn, u8 index)3412{3413bool b64Bit = Is64Bit(Rd);3414_assert_msg_(Rd < SP, "%s destination must be a GPR!", __FUNCTION__);3415_assert_msg_(size != 64, "%s doesn't support 64bit destination. Use UMOV!", __FUNCTION__);3416u32 imm5 = 0;34173418if (size == 8)3419{3420imm5 = 1;3421imm5 |= index << 1;3422}3423else if (size == 16)3424{3425imm5 = 2;3426imm5 |= index << 2;3427}3428else if (size == 32)3429{3430imm5 = 4;3431imm5 |= index << 3;3432}34333434EmitCopy(b64Bit, 0, imm5, 5, Rd, Rn);3435}34363437void ARM64FloatEmitter::EncodeModImm(bool Q, u8 op, u8 cmode, u8 o2, ARM64Reg Rd, u8 abcdefgh) {3438Rd = DecodeReg(Rd);3439u8 abc = abcdefgh >> 5;3440u8 defgh = abcdefgh & 0x1F;3441Write32((Q << 30) | (op << 29) | (0xF << 24) | (abc << 16) | (cmode << 12) | (o2 << 11) | (1 << 10) | (defgh << 5) | Rd);3442}34433444void ARM64FloatEmitter::FMOV(u8 size, ARM64Reg Rd, u8 imm8) {3445_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);3446_assert_msg_(size == 32 || size == 64, "%s: unsupported size", __FUNCTION__);3447_assert_msg_(IsQuad(Rd) || size == 32, "Use non-SIMD FMOV to load one double imm8");3448EncodeModImm(IsQuad(Rd), size >> 6, 0b1111, 0, Rd, imm8);3449}34503451void ARM64FloatEmitter::MOVI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift, bool MSL) {3452_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);3453_assert_msg_(size == 8 || size == 16 || size == 32 || size == 64, "%s: unsupported size %d", __FUNCTION__, size);3454_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);3455_assert_msg_(!MSL || (size == 32 && shift > 0 && shift <= 16), "MOVI MSL shift requires size 32, shift must be 8 or 16");3456_assert_msg_(size != 64 || shift == 0, "MOVI 64-bit imm cannot be shifted");34573458u8 cmode = 0;3459if (size == 8)3460cmode = 0b1110;3461else if (size == 16)3462cmode = 0b1000 | (shift >> 2);3463else if (MSL)3464cmode = 0b1100 | (shift >> 3);3465else if (size == 32)3466cmode = (shift >> 2);3467else if (size == 64)3468cmode = 0b1110;3469else3470_assert_msg_(false, "%s: unhandled case", __FUNCTION__);34713472EncodeModImm(IsQuad(Rd), size >> 6, cmode, 0, Rd, imm8);3473}34743475void ARM64FloatEmitter::MVNI(u8 size, ARM64Reg Rd, u8 imm8, u8 shift, bool MSL) {3476_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);3477_assert_msg_(size == 16 || size == 32, "%s: unsupported size %d", __FUNCTION__, size);3478_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);3479_assert_msg_(!MSL || (size == 32 && shift > 0 && shift <= 16), "MVNI MSL shift requires size 32, shift must be 8 or 16");34803481u8 cmode = 0;3482if (size == 16)3483cmode = 0b1000 | (shift >> 2);3484else if (MSL)3485cmode = 0b1100 | (shift >> 3);3486else if (size == 32)3487cmode = (shift >> 2);3488else3489_assert_msg_(false, "%s: unhandled case", __FUNCTION__);34903491EncodeModImm(IsQuad(Rd), 1, cmode, 0, Rd, imm8);3492}34933494void ARM64FloatEmitter::ORR(u8 size, ARM64Reg Rd, u8 imm8, u8 shift) {3495_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);3496_assert_msg_(size == 16 || size == 32, "%s: unsupported size %d", __FUNCTION__, size);3497_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);34983499u8 cmode = 0;3500if (size == 16)3501cmode = 0b1001 | (shift >> 2);3502else if (size == 32)3503cmode = 0b0001 | (shift >> 2);3504else3505_assert_msg_(false, "%s: unhandled case", __FUNCTION__);35063507EncodeModImm(IsQuad(Rd), 0, cmode, 0, Rd, imm8);3508}35093510void ARM64FloatEmitter::BIC(u8 size, ARM64Reg Rd, u8 imm8, u8 shift) {3511_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);3512_assert_msg_(size == 16 || size == 32, "%s: unsupported size %d", __FUNCTION__, size);3513_assert_msg_((shift & 7) == 0 && shift < size, "%s: unsupported shift %d", __FUNCTION__, shift);35143515u8 cmode = 0;3516if (size == 16)3517cmode = 0b1001 | (shift >> 2);3518else if (size == 32)3519cmode = 0b0001 | (shift >> 2);3520else3521_assert_msg_(false, "%s: unhandled case", __FUNCTION__);35223523EncodeModImm(IsQuad(Rd), 1, cmode, 0, Rd, imm8);3524}35253526// One source3527void ARM64FloatEmitter::FCVT(u8 size_to, u8 size_from, ARM64Reg Rd, ARM64Reg Rn)3528{3529u32 dst_encoding = 0;3530u32 src_encoding = 0;35313532if (size_to == 16)3533dst_encoding = 3;3534else if (size_to == 32)3535dst_encoding = 0;3536else if (size_to == 64)3537dst_encoding = 1;35383539if (size_from == 16)3540src_encoding = 3;3541else if (size_from == 32)3542src_encoding = 0;3543else if (size_from == 64)3544src_encoding = 1;35453546Emit1Source(0, 0, src_encoding, 4 | dst_encoding, Rd, Rn);3547}35483549void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn)3550{3551if (IsScalar(Rn)) {3552// Source is in FP register (like destination!). We must use a vector encoding.3553bool sign = false;3554Rd = DecodeReg(Rd);3555Rn = DecodeReg(Rn);3556int sz = IsDouble(Rn);3557Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (Rn << 5) | Rd);3558} else {3559bool sf = Is64Bit(Rn);3560u32 type = 0;3561if (IsDouble(Rd))3562type = 1;3563EmitConversion(sf, 0, type, 0, 2, Rd, Rn);3564}3565}35663567void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn)3568{3569if (IsScalar(Rn)) {3570// Source is in FP register (like destination!). We must use a vector encoding.3571bool sign = true;3572Rd = DecodeReg(Rd);3573Rn = DecodeReg(Rn);3574int sz = IsDouble(Rn);3575Write32((0x5e << 24) | (sign << 29) | (sz << 22) | (0x876 << 10) | (Rn << 5) | Rd);3576} else {3577bool sf = Is64Bit(Rn);3578u32 type = 0;3579if (IsDouble(Rd))3580type = 1;35813582EmitConversion(sf, 0, type, 0, 3, Rd, Rn);3583}3584}35853586void ARM64FloatEmitter::SCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)3587{3588if (IsScalar(Rn)) {3589int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;3590Rd = DecodeReg(Rd);3591Rn = DecodeReg(Rn);35923593Write32((1 << 30) | (0 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);3594} else {3595bool sf = Is64Bit(Rn);3596u32 type = 0;3597if (IsDouble(Rd))3598type = 1;35993600EmitConversion2(sf, 0, false, type, 0, 2, 64 - scale, Rd, Rn);3601}3602}36033604void ARM64FloatEmitter::UCVTF(ARM64Reg Rd, ARM64Reg Rn, int scale)3605{3606if (IsScalar(Rn)) {3607int imm = (IsDouble(Rn) ? 64 : 32) * 2 - scale;3608Rd = DecodeReg(Rd);3609Rn = DecodeReg(Rn);36103611Write32((1 << 30) | (1 << 29) | (0x1F << 24) | (imm << 16) | (0x1C << 11) | (1 << 10) | (Rn << 5) | Rd);3612} else {3613bool sf = Is64Bit(Rn);3614u32 type = 0;3615if (IsDouble(Rd))3616type = 1;36173618EmitConversion2(sf, 0, false, type, 0, 3, 64 - scale, Rd, Rn);3619}3620}36213622void ARM64FloatEmitter::FCMP(ARM64Reg Rn, ARM64Reg Rm)3623{3624EmitCompare(0, 0, 0, 0, Rn, Rm);3625}3626void ARM64FloatEmitter::FCMP(ARM64Reg Rn)3627{3628EmitCompare(0, 0, 0, 8, Rn, (ARM64Reg)0);3629}3630void ARM64FloatEmitter::FCMPE(ARM64Reg Rn, ARM64Reg Rm)3631{3632EmitCompare(0, 0, 0, 0x10, Rn, Rm);3633}3634void ARM64FloatEmitter::FCMPE(ARM64Reg Rn)3635{3636EmitCompare(0, 0, 0, 0x18, Rn, (ARM64Reg)0);3637}3638void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3639{3640EmitThreeSame(0, size >> 6, 0x1C, Rd, Rn, Rm);3641}3642void ARM64FloatEmitter::FCMEQ(u8 size, ARM64Reg Rd, ARM64Reg Rn)3643{3644Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xD, Rd, Rn);3645}3646void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3647{3648EmitThreeSame(1, size >> 6, 0x1C, Rd, Rn, Rm);3649}3650void ARM64FloatEmitter::FCMGE(u8 size, ARM64Reg Rd, ARM64Reg Rn)3651{3652Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xC, Rd, Rn);3653}3654void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3655{3656EmitThreeSame(1, 2 | (size >> 6), 0x1C, Rd, Rn, Rm);3657}3658void ARM64FloatEmitter::FCMGT(u8 size, ARM64Reg Rd, ARM64Reg Rn)3659{3660Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0x0C, Rd, Rn);3661}3662void ARM64FloatEmitter::FCMLE(u8 size, ARM64Reg Rd, ARM64Reg Rn)3663{3664Emit2RegMisc(IsQuad(Rd), 1, 2 | (size >> 6), 0xD, Rd, Rn);3665}3666void ARM64FloatEmitter::FCMLT(u8 size, ARM64Reg Rd, ARM64Reg Rn)3667{3668Emit2RegMisc(IsQuad(Rd), 0, 2 | (size >> 6), 0xE, Rd, Rn);3669}36703671void ARM64FloatEmitter::FCSEL(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, CCFlags cond)3672{3673EmitCondSelect(0, 0, cond, Rd, Rn, Rm);3674}36753676void ARM64FloatEmitter::FCCMP(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond) {3677EmitCondCompare(0, 0, cond, 0, nzcv, Rn, Rm);3678}36793680void ARM64FloatEmitter::FCCMPE(ARM64Reg Rn, ARM64Reg Rm, u8 nzcv, CCFlags cond) {3681EmitCondCompare(0, 0, cond, 1, nzcv, Rn, Rm);3682}36833684// Permute3685void ARM64FloatEmitter::UZP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3686{3687EmitPermute(size, 1, Rd, Rn, Rm);3688}3689void ARM64FloatEmitter::TRN1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3690{3691EmitPermute(size, 2, Rd, Rn, Rm);3692}3693void ARM64FloatEmitter::ZIP1(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3694{3695EmitPermute(size, 3, Rd, Rn, Rm);3696}3697void ARM64FloatEmitter::UZP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3698{3699EmitPermute(size, 5, Rd, Rn, Rm);3700}3701void ARM64FloatEmitter::TRN2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3702{3703EmitPermute(size, 6, Rd, Rn, Rm);3704}3705void ARM64FloatEmitter::ZIP2(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm)3706{3707EmitPermute(size, 7, Rd, Rn, Rm);3708}37093710void ARM64FloatEmitter::EXT(ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, int index) {3711_assert_msg_(!IsSingle(Rd), "%s doesn't support singles!", __FUNCTION__);37123713bool quad = IsQuad(Rd);3714_assert_msg_(index >= 0 && index < 16 && (quad || index < 8), "%s start index out of bounds", __FUNCTION__);3715_assert_msg_(IsQuad(Rd) == IsQuad(Rn) && IsQuad(Rd) == IsQuad(Rm), "%s operands not same size", __FUNCTION__);37163717Rd = DecodeReg(Rd);3718Rn = DecodeReg(Rn);3719Rm = DecodeReg(Rm);37203721Write32((quad << 30) | (0x17 << 25) | (Rm << 16) | (index << 11) | (Rn << 5) | Rd);3722}37233724// Shift by immediate3725void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)3726{3727SSHLL(src_size, Rd, Rn, shift, false);3728}3729void ARM64FloatEmitter::SSHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)3730{3731SSHLL(src_size, Rd, Rn, shift, true);3732}3733void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)3734{3735SHRN(dest_size, Rd, Rn, shift, false);3736}3737void ARM64FloatEmitter::SHRN2(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)3738{3739SHRN(dest_size, Rd, Rn, shift, true);3740}3741void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)3742{3743USHLL(src_size, Rd, Rn, shift, false);3744}3745void ARM64FloatEmitter::USHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift)3746{3747USHLL(src_size, Rd, Rn, shift, true);3748}3749void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {3750SHLL(src_size, Rd, Rn, false);3751}3752void ARM64FloatEmitter::SHLL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn) {3753SHLL(src_size, Rd, Rn, true);3754}3755void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)3756{3757SXTL(src_size, Rd, Rn, false);3758}3759void ARM64FloatEmitter::SXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)3760{3761SXTL(src_size, Rd, Rn, true);3762}3763void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)3764{3765UXTL(src_size, Rd, Rn, false);3766}3767void ARM64FloatEmitter::UXTL2(u8 src_size, ARM64Reg Rd, ARM64Reg Rn)3768{3769UXTL(src_size, Rd, Rn, true);3770}37713772static u32 EncodeImmShiftLeft(u8 src_size, u32 shift) {3773return src_size + shift;3774}37753776static u32 EncodeImmShiftRight(u8 src_size, u32 shift) {3777return src_size * 2 - shift;3778}37793780void ARM64FloatEmitter::SSHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)3781{3782_assert_msg_(shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__);3783u32 imm = EncodeImmShiftLeft(src_size, shift);3784EmitShiftImm(upper, 0, imm >> 3, imm & 7, 0x14, Rd, Rn);3785}37863787void ARM64FloatEmitter::USHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)3788{3789_assert_msg_(shift < src_size, "%s shift amount must less than the element size!", __FUNCTION__);3790u32 imm = EncodeImmShiftLeft(src_size, shift);3791EmitShiftImm(upper, 1, imm >> 3, imm & 7, 0x14, Rd, Rn);3792}37933794void ARM64FloatEmitter::SHLL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper) {3795_assert_msg_(src_size <= 32, "%s shift amount cannot be 64", __FUNCTION__);3796Emit2RegMisc(upper, 1, src_size >> 4, 0b10011, Rd, Rn);3797}37983799void ARM64FloatEmitter::SHRN(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift, bool upper)3800{3801_assert_msg_(shift > 0, "%s shift amount must be greater than zero!", __FUNCTION__);3802_assert_msg_(shift <= dest_size, "%s shift amount must less than or equal to the element size!", __FUNCTION__);3803u32 imm = EncodeImmShiftRight(dest_size, shift);3804EmitShiftImm(upper, 0, imm >> 3, imm & 7, 0x10, Rd, Rn);3805}38063807void ARM64FloatEmitter::SHL(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {3808_assert_msg_(shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);3809u32 imm = EncodeImmShiftLeft(dest_size, shift);3810EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0xA, Rd, Rn);3811}38123813void ARM64FloatEmitter::USHR(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {3814_assert_msg_(shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);3815u32 imm = EncodeImmShiftRight(dest_size, shift);3816EmitShiftImm(IsQuad(Rd), true, imm >> 3, imm & 7, 0x0, Rd, Rn);3817}38183819void ARM64FloatEmitter::SSHR(u8 dest_size, ARM64Reg Rd, ARM64Reg Rn, u32 shift) {3820_assert_msg_(shift < dest_size, "%s shift amount must less than the element size!", __FUNCTION__);3821u32 imm = EncodeImmShiftRight(dest_size, shift);3822EmitShiftImm(IsQuad(Rd), false, imm >> 3, imm & 7, 0x0, Rd, Rn);3823}38243825void ARM64FloatEmitter::SXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)3826{3827SSHLL(src_size, Rd, Rn, 0, upper);3828}38293830void ARM64FloatEmitter::UXTL(u8 src_size, ARM64Reg Rd, ARM64Reg Rn, bool upper)3831{3832USHLL(src_size, Rd, Rn, 0, upper);3833}38343835// vector x indexed element3836void ARM64FloatEmitter::FMUL(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)3837{3838_assert_msg_(size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __FUNCTION__);38393840bool L = false;3841bool H = false;3842if (size == 32) {3843L = index & 1;3844H = (index >> 1) & 1;3845} else if (size == 64) {3846H = index == 1;3847}38483849EmitVectorxElement(0, 2 | (size >> 6), L, 0x9, H, Rd, Rn, Rm);3850}38513852void ARM64FloatEmitter::FMLA(u8 size, ARM64Reg Rd, ARM64Reg Rn, ARM64Reg Rm, u8 index)3853{3854_assert_msg_(size == 32 || size == 64, "%s only supports 32bit or 64bit size!", __FUNCTION__);38553856bool L = false;3857bool H = false;3858if (size == 32) {3859L = index & 1;3860H = (index >> 1) & 1;3861} else if (size == 64) {3862H = index == 1;3863}38643865EmitVectorxElement(0, 2 | (size >> 6), L, 1, H, Rd, Rn, Rm);3866}38673868void ARM64FloatEmitter::ABI_PushRegisters(uint32_t registers, uint32_t fp_registers) {3869_assert_msg_((registers & 0x60000000) == 0, "ABI_PushRegisters: Do not include FP and LR, those are handled non-conditionally");38703871ARM64Reg gprs[32]{}, fprs[32]{};3872int num_gprs = 0, num_fprs = 0;3873for (int i = 0; i < 29; i++) {3874if (registers & (1U << i))3875gprs[num_gprs++] = (ARM64Reg)(X0 + i);3876}38773878for (int i = 0; i < 32; i++) {3879if (fp_registers & (1U << i))3880fprs[num_fprs++] = (ARM64Reg)(D0 + i);3881}38823883u32 stack_size = 16 + ROUND_UP(num_gprs * 8, 16) + ROUND_UP(num_fprs * 8, 16);38843885// Stack is required to be quad-word aligned.3886if (stack_size < 256) {3887m_emit->STP(INDEX_PRE, FP, LR, SP, -(s32)stack_size);3888} else {3889m_emit->SUB(SP, SP, stack_size);3890m_emit->STP(INDEX_UNSIGNED, FP, LR, SP, 0);3891}3892m_emit->MOVfromSP(X29); // Set new frame pointer3893int offset = 16;3894for (int i = 0; i < num_gprs / 2; i++) {3895m_emit->STP(INDEX_SIGNED, gprs[i*2], gprs[i*2+1], X29, offset);3896offset += 16;3897}3898if (num_gprs & 1) {3899m_emit->STR(INDEX_UNSIGNED, gprs[num_gprs - 1], X29, offset);3900offset += 16;3901}39023903for (int i = 0; i < num_fprs / 2; i++) {3904STP(64, INDEX_SIGNED, fprs[i * 2], fprs[i * 2 + 1], SP, offset);3905offset += 16;3906}3907if (num_fprs & 1) {3908STR(64, INDEX_UNSIGNED, fprs[num_fprs - 1], X29, offset);3909offset += 16;3910}3911// Now offset should be == stack_size.3912}39133914void ARM64FloatEmitter::ABI_PopRegisters(uint32_t registers, uint32_t fp_registers) {3915ARM64Reg gprs[32]{}, fprs[32]{};3916int num_gprs = 0, num_fprs = 0;3917for (int i = 0; i < 29; i++) {3918if (registers & (1U << i))3919gprs[num_gprs++] = (ARM64Reg)(X0 + i);3920}39213922for (int i = 0; i < 32; i++) {3923if (fp_registers & (1U << i))3924fprs[num_fprs++] = (ARM64Reg)(D0 + i);3925}39263927u32 stack_size = 16 + ROUND_UP(num_gprs * 8, 16) + ROUND_UP(num_fprs * 8, 16);39283929// SP points to the bottom. We're gonna walk it upwards.3930// Reload FP, LR.3931m_emit->LDP(INDEX_SIGNED, FP, LR, SP, 0);3932int offset = 16;3933for (int i = 0; i < num_gprs / 2; i++) {3934m_emit->LDP(INDEX_SIGNED, gprs[i*2], gprs[i*2+1], SP, offset);3935offset += 16;3936}3937// Do the straggler.3938if (num_gprs & 1) {3939m_emit->LDR(INDEX_UNSIGNED, gprs[num_gprs-1], SP, offset);3940offset += 16;3941}39423943// Time for the FP regs.3944for (int i = 0; i < num_fprs / 2; i++) {3945LDP(64, INDEX_SIGNED, fprs[i * 2], fprs[i * 2 + 1], SP, offset);3946offset += 16;3947}3948// Do the straggler.3949if (num_fprs & 1) {3950LDR(64, INDEX_UNSIGNED, fprs[num_fprs-1], SP, offset);3951offset += 16;3952}3953// Now offset should be == stack_size.39543955// Restore the stack pointer.3956m_emit->ADD(SP, SP, stack_size);3957}39583959void ARM64XEmitter::ANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {3960// It's probably okay to AND by extra bits.3961if (!Is64Bit(Rn))3962imm &= 0xFFFFFFFF;3963if (!TryANDI2R(Rd, Rn, imm)) {3964_assert_msg_(scratch != INVALID_REG, "ANDI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);3965MOVI2R(scratch, imm);3966AND(Rd, Rn, scratch);3967}3968}39693970void ARM64XEmitter::ORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {3971_assert_msg_(Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "ORRI2R - more bits in imm than Rn");3972if (!TryORRI2R(Rd, Rn, imm)) {3973_assert_msg_(scratch != INVALID_REG, "ORRI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);3974MOVI2R(scratch, imm);3975ORR(Rd, Rn, scratch);3976}3977}39783979void ARM64XEmitter::EORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {3980_assert_msg_(Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "EORI2R - more bits in imm than Rn");3981if (!TryEORI2R(Rd, Rn, imm)) {3982_assert_msg_(scratch != INVALID_REG, "EORI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);3983MOVI2R(scratch, imm);3984EOR(Rd, Rn, scratch);3985}3986}39873988void ARM64XEmitter::ANDSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {3989if (!Is64Bit(Rn))3990imm &= 0xFFFFFFFF;3991unsigned int n, imm_s, imm_r;3992if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {3993ANDS(Rd, Rn, imm_r, imm_s, n != 0);3994} else if (imm == 0) {3995ANDS(Rd, Rn, Is64Bit(Rn) ? ZR : WZR);3996} else {3997_assert_msg_(scratch != INVALID_REG, "ANDSI2R - failed to construct logical immediate value from %08x, need scratch", (u32)imm);3998MOVI2R(scratch, imm);3999ANDS(Rd, Rn, scratch);4000}4001}40024003void ARM64XEmitter::ADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {4004if (!TryADDI2R(Rd, Rn, imm)) {4005_assert_msg_(scratch != INVALID_REG, "ADDI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);4006MOVI2R(scratch, imm);4007ADD(Rd, Rn, scratch);4008}4009}40104011void ARM64XEmitter::SUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {4012if (!TrySUBI2R(Rd, Rn, imm)) {4013_assert_msg_(scratch != INVALID_REG, "SUBI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);4014MOVI2R(scratch, imm);4015SUB(Rd, Rn, scratch);4016}4017}40184019void ARM64XEmitter::CMPI2R(ARM64Reg Rn, u64 imm, ARM64Reg scratch) {4020if (!TryCMPI2R(Rn, imm)) {4021_assert_msg_(scratch != INVALID_REG, "CMPI2R - failed to construct arithmetic immediate value from %08x, need scratch", (u32)imm);4022MOVI2R(scratch, imm);4023CMP(Rn, scratch);4024}4025}40264027bool ARM64XEmitter::TryADDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {4028s64 negated = Is64Bit(Rn) ? -(s64)imm : -(s32)(u32)imm;4029u32 val;4030bool shift;4031if (imm == 0) {4032// Prefer MOV (ORR) instead of ADD for moves.4033MOV(Rd, Rn);4034return true;4035} else if (IsImmArithmetic(imm, &val, &shift)) {4036ADD(Rd, Rn, val, shift);4037return true;4038} else if (IsImmArithmetic((u64)negated, &val, &shift)) {4039SUB(Rd, Rn, val, shift);4040return true;4041} else {4042return false;4043}4044}40454046bool ARM64XEmitter::TrySUBI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {4047s64 negated = Is64Bit(Rn) ? -(s64)imm : -(s32)(u32)imm;4048u32 val;4049bool shift;4050if (imm == 0) {4051// Prefer MOV (ORR) instead of ADD for moves.4052MOV(Rd, Rn);4053return true;4054} else if (IsImmArithmetic(imm, &val, &shift)) {4055SUB(Rd, Rn, val, shift);4056return true;4057} else if (IsImmArithmetic((u64)negated, &val, &shift)) {4058ADD(Rd, Rn, val, shift);4059return true;4060} else {4061return false;4062}4063}40644065bool ARM64XEmitter::TryCMPI2R(ARM64Reg Rn, u64 imm) {4066s64 negated = Is64Bit(Rn) ? -(s64)imm : -(s32)(u32)imm;4067u32 val;4068bool shift;4069if (IsImmArithmetic(imm, &val, &shift)) {4070CMP(Rn, val, shift);4071return true;4072} else if (IsImmArithmetic((u64)negated, &val, &shift)) {4073CMN(Rn, val, shift);4074return true;4075} else {4076return false;4077}4078}40794080bool ARM64XEmitter::TryANDI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {4081if (!Is64Bit(Rn))4082imm &= 0xFFFFFFFF;4083u32 n, imm_r, imm_s;4084if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {4085AND(Rd, Rn, imm_r, imm_s, n != 0);4086return true;4087} else if (imm == 0) {4088MOVI2R(Rd, 0);4089return true;4090} else {4091return false;4092}4093}4094bool ARM64XEmitter::TryORRI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {4095_assert_msg_(Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "TryORRI2R - more bits in imm than Rn");4096u32 n, imm_r, imm_s;4097if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {4098ORR(Rd, Rn, imm_r, imm_s, n != 0);4099return true;4100} else if (imm == 0) {4101if (Rd != Rn) {4102MOV(Rd, Rn);4103}4104return true;4105} else {4106return false;4107}4108}4109bool ARM64XEmitter::TryEORI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm) {4110_assert_msg_(Is64Bit(Rn) || (imm & 0xFFFFFFFF00000000UL) == 0, "TryEORI2R - more bits in imm than Rn");4111u32 n, imm_r, imm_s;4112if (IsImmLogical(imm, Is64Bit(Rn) ? 64 : 32, &n, &imm_s, &imm_r)) {4113EOR(Rd, Rn, imm_r, imm_s, n != 0);4114return true;4115} else if (imm == 0) {4116if (Rd != Rn) {4117MOV(Rd, Rn);4118}4119return true;4120} else {4121return false;4122}4123}41244125float FPImm8ToFloat(uint8_t bits) {4126int sign = bits >> 7;4127uint32_t f = 0;4128f |= (sign << 31);4129int bit6 = (bits >> 6) & 1;4130uint32_t exp = ((!bit6) << 7) | (0x7C * bit6) | ((bits >> 4) & 3);4131uint32_t mantissa = (bits & 0xF) << 19;4132f |= exp << 23;4133f |= mantissa;4134float fl;4135memcpy(&fl, &f, sizeof(float));4136return fl;4137}41384139bool FPImm8FromFloat(float value, uint8_t *immOut) {4140uint32_t f;4141memcpy(&f, &value, sizeof(float));4142uint32_t mantissa4 = (f & 0x7FFFFF) >> 19;4143uint32_t exponent = (f >> 23) & 0xFF;4144uint32_t sign = f >> 31;4145if ((exponent >> 7) == ((exponent >> 6) & 1))4146return false;4147uint8_t imm8 = (sign << 7) | ((!(exponent >> 7)) << 6) | ((exponent & 3) << 4) | mantissa4;4148float newFloat = FPImm8ToFloat(imm8);4149if (newFloat == value) {4150*immOut = imm8;4151return true;4152} else {4153return false;4154}4155}41564157void ARM64FloatEmitter::MOVI2F(ARM64Reg Rd, float value, ARM64Reg scratch, bool negate) {4158_assert_msg_(!IsDouble(Rd), "MOVI2F does not yet support double precision");4159uint8_t imm8;4160if (value == 0.0) {4161if (std::signbit(value)) {4162negate = !negate;4163}4164FMOV(Rd, IsDouble(Rd) ? ZR : WZR);4165if (negate) {4166FNEG(Rd, Rd);4167}4168// TODO: There are some other values we could generate with the float-imm instruction, like 1.0...4169} else if (negate && FPImm8FromFloat(-value, &imm8)) {4170FMOV(Rd, imm8);4171} else if (FPImm8FromFloat(value, &imm8)) {4172FMOV(Rd, imm8);4173if (negate) {4174FNEG(Rd, Rd);4175}4176} else {4177_assert_msg_(scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value);4178u32 ival;4179if (negate) {4180value = -value;4181}4182memcpy(&ival, &value, sizeof(ival));4183m_emit->MOVI2R(scratch, ival);4184FMOV(Rd, scratch);4185}4186}41874188// TODO: Quite a few values could be generated easily using the MOVI instruction and friends.4189void ARM64FloatEmitter::MOVI2FDUP(ARM64Reg Rd, float value, ARM64Reg scratch, bool negate) {4190_assert_msg_(!IsSingle(Rd), "%s doesn't support singles", __FUNCTION__);4191int ival;4192memcpy(&ival, &value, 4);4193uint8_t imm8;4194if (ival == 0) { // Make sure to not catch negative zero here4195// Prefer MOVI 0, which may have no latency on some CPUs.4196MOVI(32, Rd, 0);4197if (negate)4198FNEG(32, Rd, Rd);4199} else if (negate && FPImm8FromFloat(-value, &imm8)) {4200FMOV(32, Rd, imm8);4201} else if (FPImm8FromFloat(value, &imm8)) {4202FMOV(32, Rd, imm8);4203if (negate) {4204FNEG(32, Rd, Rd);4205}4206} else if (TryAnyMOVI(32, Rd, ival)) {4207if (negate) {4208FNEG(32, Rd, Rd);4209}4210} else if (TryAnyMOVI(32, Rd, ival ^ 0x80000000)) {4211if (!negate) {4212FNEG(32, Rd, Rd);4213}4214} else {4215_assert_msg_(scratch != INVALID_REG, "Failed to find a way to generate FP immediate %f without scratch", value);4216if (negate) {4217ival ^= 0x80000000;4218}4219m_emit->MOVI2R(scratch, ival);4220DUP(32, Rd, scratch);4221}4222}42234224bool ARM64FloatEmitter::TryMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {4225if (size == 8) {4226// Can always do 8.4227MOVI(size, Rd, elementValue & 0xFF);4228return true;4229} else if (size == 16) {4230if ((elementValue & 0xFF00) == 0) {4231MOVI(size, Rd, elementValue & 0xFF, 0);4232return true;4233} else if ((elementValue & 0x00FF) == 0) {4234MOVI(size, Rd, (elementValue >> 8) & 0xFF, 8);4235return true;4236} else if ((elementValue & 0xFF00) == 0xFF00) {4237MVNI(size, Rd, ~elementValue & 0xFF, 0);4238return true;4239} else if ((elementValue & 0x00FF) == 0x00FF) {4240MVNI(size, Rd, (~elementValue >> 8) & 0xFF, 8);4241return true;4242}42434244return false;4245} else if (size == 32) {4246for (int shift = 0; shift < 32; shift += 8) {4247uint32_t mask = 0xFFFFFFFF &~ (0xFF << shift);4248if ((elementValue & mask) == 0) {4249MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift);4250return true;4251} else if ((elementValue & mask) == mask) {4252MVNI(size, Rd, (~elementValue >> shift) & 0xFF, shift);4253return true;4254}4255}42564257// Maybe an MSL shift will work?4258for (int shift = 8; shift <= 16; shift += 8) {4259uint32_t mask = 0xFFFFFFFF & ~(0xFF << shift);4260uint32_t ones = (1 << shift) - 1;4261uint32_t notOnes = 0xFFFFFF00 << shift;4262if ((elementValue & mask) == ones) {4263MOVI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);4264return true;4265} else if ((elementValue & mask) == notOnes) {4266MVNI(size, Rd, (elementValue >> shift) & 0xFF, shift, true);4267return true;4268}4269}42704271return false;4272} else if (size == 64) {4273uint8_t imm8 = 0;4274for (int i = 0; i < 8; ++i) {4275uint8_t byte = (elementValue >> (i * 8)) & 0xFF;4276if (byte != 0 && byte != 0xFF)4277return false;42784279if (byte == 0xFF)4280imm8 |= 1 << i;4281}42824283// Didn't run into any partial bytes, so size 64 is doable.4284MOVI(size, Rd, imm8);4285return true;4286}4287return false;4288}42894290bool ARM64FloatEmitter::TryAnyMOVI(u8 size, ARM64Reg Rd, uint64_t elementValue) {4291// Try the original size first in case that's more optimal.4292if (TryMOVI(size, Rd, elementValue))4293return true;42944295uint64_t value = elementValue;4296if (size != 64) {4297uint64_t masked = elementValue & ((1ULL << size) - 1ULL);4298for (int i = size; i < 64; ++i) {4299value |= masked << i;4300}4301}43024303for (int attempt = 8; attempt <= 64; attempt += attempt) {4304// Original size was already attempted above.4305if (attempt != size) {4306if (TryMOVI(attempt, Rd, value))4307return true;4308}4309}43104311return false;4312}43134314void ARM64XEmitter::SUBSI2R(ARM64Reg Rd, ARM64Reg Rn, u64 imm, ARM64Reg scratch) {4315u32 val;4316bool shift;4317if (IsImmArithmetic(imm, &val, &shift)) {4318SUBS(Rd, Rn, val, shift);4319} else {4320_assert_msg_(scratch != INVALID_REG, "SUBSI2R - failed to construct immediate value from %08x, need scratch", (u32)imm);4321MOVI2R(scratch, imm);4322SUBS(Rd, Rn, scratch);4323}4324}43254326void ARM64CodeBlock::PoisonMemory(int offset) {4327// So we can adjust region to writable space. Might be zero.4328ptrdiff_t writable = m_writable - m_code;43294330u32 *ptr = (u32 *)(region + offset + writable);4331u32 *maxptr = (u32 *)(region + region_size - offset + writable);4332// If our memory isn't a multiple of u32 then this won't write the last remaining bytes with anything4333// Less than optimal, but there would be nothing we could do but throw a runtime warning anyway.4334// AArch64: 0xD4200000 = BRK 04335while (ptr < maxptr)4336*ptr++ = 0xD4200000;4337}43384339} // namespace434043414342