CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Common/ArmEmitter.cpp
Views: 1401
// Copyright (C) 2003 Dolphin Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official SVN repository and contact information can be found at15// http://code.google.com/p/dolphin-emu/1617#include "ppsspp_config.h"1819#include <stdarg.h>20#include <stddef.h>21#include <stdio.h>22#include <stdlib.h>23#include <string.h>2425#if PPSSPP_PLATFORM(IOS)26#include <libkern/OSCacheControl.h>27#include <sys/mman.h>28#endif2930#include "Common/Log.h"31#include "Common/MemoryUtil.h"32#include "Common/ArmEmitter.h"33#include "Common/CPUDetect.h"3435#ifdef _WIN3236#include "CommonWindows.h"37#endif3839// Want it in release builds too40#ifdef __ANDROID__41#undef _dbg_assert_msg_42#define _dbg_assert_msg_ _assert_msg_43#endif4445namespace ArmGen46{4748inline u32 RotR(u32 a, int amount) {49if (!amount) return a;50return (a >> amount) | (a << (32 - amount));51}5253inline u32 RotL(u32 a, int amount) {54if (!amount) return a;55return (a << amount) | (a >> (32 - amount));56}5758bool TryMakeOperand2(u32 imm, Operand2 &op2) {59// Just brute force it.60for (int i = 0; i < 16; i++) {61int mask = RotR(0xFF, i * 2);62if ((imm & mask) == imm) {63op2 = Operand2((u8)(RotL(imm, i * 2)), (u8)i);64return true;65}66}67return false;68}6970bool TryMakeOperand2_AllowInverse(u32 imm, Operand2 &op2, bool *inverse)71{72if (!TryMakeOperand2(imm, op2)) {73*inverse = true;74return TryMakeOperand2(~imm, op2);75} else {76*inverse = false;77return true;78}79}8081bool TryMakeOperand2_AllowNegation(s32 imm, Operand2 &op2, bool *negated)82{83if (!TryMakeOperand2(imm, op2)) {84*negated = true;85return TryMakeOperand2(-imm, op2);86} else {87*negated = false;88return true;89}90}9192Operand2 AssumeMakeOperand2(u32 imm) {93Operand2 op2;94bool result = TryMakeOperand2(imm, op2);95_dbg_assert_msg_(result, "Could not make assumed Operand2.");96if (!result) {97// Make double sure that we get it logged.98ERROR_LOG(Log::JIT, "Could not make assumed Operand2.");99}100return op2;101}102103bool ARMXEmitter::TrySetValue_TwoOp(ARMReg reg, u32 val)104{105int ops = 0;106for (int i = 0; i < 16; i++)107{108if ((val >> (i*2)) & 0x3)109{110ops++;111i+=3;112}113}114if (ops > 2)115return false;116117bool first = true;118for (int i = 0; i < 16; i++, val >>=2) {119if (val & 0x3) {120first ? MOV(reg, Operand2((u8)val, (u8)((16-i) & 0xF)))121: ORR(reg, reg, Operand2((u8)val, (u8)((16-i) & 0xF)));122first = false;123i+=3;124val >>= 6;125}126}127return true;128}129130bool TryMakeFloatIMM8(u32 val, Operand2 &op2)131{132if ((val & 0x0007FFFF) == 0)133{134// VFP Encoding for Imms: <7> Not(<6>) Repeat(<6>,5) <5:0> Zeros(19)135bool bit6 = (val & 0x40000000) == 0x40000000;136bool canEncode = true;137for (u32 mask = 0x20000000; mask >= 0x02000000; mask >>= 1)138{139if (((val & mask) == mask) == bit6)140canEncode = false;141}142if (canEncode)143{144u32 imm8 = (val & 0x80000000) >> 24; // sign bit145imm8 |= (!bit6 << 6);146imm8 |= (val & 0x01F80000) >> 19;147op2 = IMM(imm8);148return true;149}150}151152return false;153}154155void ARMXEmitter::MOVI2FR(ARMReg dest, float val, bool negate)156{157union {float f; u32 u;} conv;158conv.f = negate ? -val : val;159MOVI2R(dest, conv.u);160}161162void ARMXEmitter::MOVI2F(ARMReg dest, float val, ARMReg tempReg, bool negate)163{164union {float f; u32 u;} conv;165conv.f = negate ? -val : val;166// Try moving directly first if mantisse is empty167Operand2 op2;168if (TryMakeFloatIMM8(conv.u, op2))169VMOV(dest, op2);170else171{172MOVI2R(tempReg, conv.u);173VMOV(dest, tempReg);174}175// Otherwise, possible to use a literal pool and VLDR directly (+- 1020)176}177178void ARMXEmitter::MOVI2F_neon(ARMReg dest, float val, ARMReg tempReg, bool negate)179{180union {float f; u32 u;} conv;181conv.f = negate ? -val : val;182// Try moving directly first if mantisse is empty183Operand2 op2;184if (TryMakeFloatIMM8(conv.u, op2))185VMOV_neon(F_32, dest, conv.u);186else187{188MOVI2R(tempReg, conv.u);189VDUP(F_32, dest, tempReg);190}191// Otherwise, possible to use a literal pool and VLD1 directly (+- 1020)192}193194void ARMXEmitter::ADDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)195{196if (!TryADDI2R(rd, rs, val)) {197MOVI2R(scratch, val);198ADD(rd, rs, scratch);199}200}201202bool ARMXEmitter::TryADDI2R(ARMReg rd, ARMReg rs, u32 val)203{204if (val == 0) {205if (rd != rs)206MOV(rd, rs);207return true;208}209Operand2 op2;210bool negated;211if (TryMakeOperand2_AllowNegation(val, op2, &negated)) {212if (!negated)213ADD(rd, rs, op2);214else215SUB(rd, rs, op2);216return true;217} else {218// Try 16-bit additions and subtractions - easy to test for.219// Should also try other rotations...220if ((val & 0xFFFF0000) == 0) {221// Decompose into two additions.222ADD(rd, rs, Operand2((u8)(val >> 8), 12)); // rotation right by 12*2 == rotation left by 8223ADD(rd, rd, Operand2((u8)(val), 0));224return true;225} else if ((((u32)-(s32)val) & 0xFFFF0000) == 0) {226val = (u32)-(s32)val;227SUB(rd, rs, Operand2((u8)(val >> 8), 12));228SUB(rd, rd, Operand2((u8)(val), 0));229return true;230} else {231return false;232}233}234}235236void ARMXEmitter::SUBI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)237{238if (!TrySUBI2R(rd, rs, val)) {239MOVI2R(scratch, val);240SUB(rd, rs, scratch);241}242}243244bool ARMXEmitter::TrySUBI2R(ARMReg rd, ARMReg rs, u32 val)245{246// Just add a negative.247return TryADDI2R(rd, rs, (u32)-(s32)val);248}249250void ARMXEmitter::ANDI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)251{252if (!TryANDI2R(rd, rs, val)) {253MOVI2R(scratch, val);254AND(rd, rs, scratch);255}256}257258bool ARMXEmitter::TryANDI2R(ARMReg rd, ARMReg rs, u32 val)259{260Operand2 op2;261bool inverse;262if (val == 0) {263// Avoid the ALU, may improve pipeline.264MOV(rd, 0);265return true;266} else if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) {267if (!inverse) {268AND(rd, rs, op2);269} else {270BIC(rd, rs, op2);271}272return true;273} else {274#if PPSSPP_ARCH(ARMV7)275// Check if we have a single pattern of sequential bits.276int seq = -1;277for (int i = 0; i < 32; ++i) {278if (((val >> i) & 1) == 0) {279if (seq == -1) {280// The width is all bits previous to this, set to 1.281seq = i;282}283} else if (seq != -1) {284// Uh oh, more than one sequence.285seq = -2;286}287}288289if (seq > 0) {290UBFX(rd, rs, 0, seq);291return true;292}293#endif294295int ops = 0;296for (int i = 0; i < 32; i += 2) {297u8 bits = RotR(val, i) & 0xFF;298// If either low bit is not set, we need to use a BIC for them.299if ((bits & 3) != 3) {300++ops;301i += 8 - 2;302}303}304305// The worst case is 4 (e.g. 0x55555555.)306#if PPSSPP_ARCH(ARMV7)307if (ops > 3) {308return false;309}310#endif311bool first = true;312for (int i = 0; i < 32; i += 2) {313u8 bits = RotR(val, i) & 0xFF;314if ((bits & 3) != 3) {315u8 rotation = i == 0 ? 0 : 16 - i / 2;316if (first) {317BIC(rd, rs, Operand2(~bits, rotation));318first = false;319} else {320BIC(rd, rd, Operand2(~bits, rotation));321}322// Well, we took care of these other bits while we were at it.323i += 8 - 2;324}325}326return true;327}328}329330void ARMXEmitter::CMPI2R(ARMReg rs, u32 val, ARMReg scratch)331{332if (!TryCMPI2R(rs, val)) {333MOVI2R(scratch, val);334CMP(rs, scratch);335}336}337338bool ARMXEmitter::TryCMPI2R(ARMReg rs, u32 val)339{340Operand2 op2;341bool negated;342if (TryMakeOperand2_AllowNegation(val, op2, &negated)) {343if (!negated)344CMP(rs, op2);345else346CMN(rs, op2);347return true;348} else {349return false;350}351}352353void ARMXEmitter::TSTI2R(ARMReg rs, u32 val, ARMReg scratch)354{355if (!TryTSTI2R(rs, val)) {356MOVI2R(scratch, val);357TST(rs, scratch);358}359}360361bool ARMXEmitter::TryTSTI2R(ARMReg rs, u32 val)362{363Operand2 op2;364if (TryMakeOperand2(val, op2)) {365TST(rs, op2);366return true;367} else {368return false;369}370}371372void ARMXEmitter::ORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)373{374if (!TryORI2R(rd, rs, val)) {375MOVI2R(scratch, val);376ORR(rd, rs, scratch);377}378}379380bool ARMXEmitter::TryORI2R(ARMReg rd, ARMReg rs, u32 val)381{382Operand2 op2;383if (val == 0) {384// Avoid the ALU, may improve pipeline.385if (rd != rs) {386MOV(rd, rs);387}388return true;389} else if (TryMakeOperand2(val, op2)) {390ORR(rd, rs, op2);391return true;392} else {393int ops = 0;394for (int i = 0; i < 32; i += 2) {395u8 bits = RotR(val, i) & 0xFF;396// If either low bit is set, we need to use a ORR for them.397if ((bits & 3) != 0) {398++ops;399i += 8 - 2;400}401}402403// The worst case is 4 (e.g. 0x55555555.) But MVN can make it 2. Not sure if better.404bool inversed;405if (TryMakeOperand2_AllowInverse(val, op2, &inversed) && ops >= 3) {406return false;407#if PPSSPP_ARCH(ARMV7)408} else if (ops > 3) {409return false;410#endif411}412413bool first = true;414for (int i = 0; i < 32; i += 2) {415u8 bits = RotR(val, i) & 0xFF;416if ((bits & 3) != 0) {417u8 rotation = i == 0 ? 0 : 16 - i / 2;418if (first) {419ORR(rd, rs, Operand2(bits, rotation));420first = false;421} else {422ORR(rd, rd, Operand2(bits, rotation));423}424// Well, we took care of these other bits while we were at it.425i += 8 - 2;426}427}428return true;429}430}431432void ARMXEmitter::EORI2R(ARMReg rd, ARMReg rs, u32 val, ARMReg scratch)433{434if (!TryEORI2R(rd, rs, val)) {435MOVI2R(scratch, val);436EOR(rd, rs, scratch);437}438}439440bool ARMXEmitter::TryEORI2R(ARMReg rd, ARMReg rs, u32 val)441{442Operand2 op2;443if (val == 0) {444if (rd != rs) {445MOV(rd, rs);446}447return true;448} else if (TryMakeOperand2(val, op2)) {449EOR(rd, rs, op2);450return true;451} else {452return false;453}454}455456void ARMXEmitter::FlushLitPool()457{458for (LiteralPool& pool : currentLitPool) {459// Search for duplicates460for (LiteralPool& old_pool : currentLitPool) {461if (old_pool.val == pool.val)462pool.loc = old_pool.loc;463}464465// Write the constant to Literal Pool466if (!pool.loc)467{468pool.loc = (intptr_t)code;469Write32(pool.val);470}471s32 offset = (s32)(pool.loc - (intptr_t)pool.ldr_address - 8);472473// Backpatch the LDR474*(u32*)pool.ldr_address |= (offset >= 0) << 23 | abs(offset);475}476// TODO: Save a copy of previous pools in case they are still in range.477currentLitPool.clear();478}479480void ARMXEmitter::AddNewLit(u32 val)481{482LiteralPool pool_item;483pool_item.loc = 0;484pool_item.val = val;485pool_item.ldr_address = code;486currentLitPool.push_back(pool_item);487}488489void ARMXEmitter::MOVI2R(ARMReg reg, u32 val, bool optimize)490{491Operand2 op2;492bool inverse;493494#if PPSSPP_ARCH(ARMV7)495// Unused496if (!optimize)497{498// For backpatching on ARMv7499MOVW(reg, val & 0xFFFF);500MOVT(reg, val, true);501return;502}503#endif504505if (TryMakeOperand2_AllowInverse(val, op2, &inverse)) {506inverse ? MVN(reg, op2) : MOV(reg, op2);507} else {508#if PPSSPP_ARCH(ARMV7)509// Use MOVW+MOVT for ARMv7+510MOVW(reg, val & 0xFFFF);511if(val & 0xFFFF0000)512MOVT(reg, val, true);513#else514if (!TrySetValue_TwoOp(reg,val)) {515bool first = true;516for (int i = 0; i < 32; i += 2) {517u8 bits = RotR(val, i) & 0xFF;518if ((bits & 3) != 0) {519u8 rotation = i == 0 ? 0 : 16 - i / 2;520if (first) {521MOV(reg, Operand2(bits, rotation));522first = false;523} else {524ORR(reg, reg, Operand2(bits, rotation));525}526// Well, we took care of these other bits while we were at it.527i += 8 - 2;528}529}530// Use literal pool for ARMv6.531// Disabled for now as it is crashfing since Vertex Decoder JIT532// AddNewLit(val);533// LDR(reg, R_PC); // To be backpatched later534}535#endif536}537}538539static const char *const armRegStrings[] = {540"r0","r1","r2","r3",541"r4","r5","r6","r7",542"r8","r9","r10","r11",543"r12","r13","r14","PC",544545"s0", "s1", "s2", "s3",546"s4", "s5", "s6", "s7",547"s8", "s9", "s10", "s11",548"s12", "s13", "s14", "s15",549550"s16", "s17", "s18", "s19",551"s20", "s21", "s22", "s23",552"s24", "s25", "s26", "s27",553"s28", "s29", "s30", "s31",554555"d0", "d1", "d2", "d3",556"d4", "d5", "d6", "d7",557"d8", "d9", "d10", "d11",558"d12", "d13", "d14", "d15",559560"d16", "d17", "d18", "d19",561"d20", "d21", "d22", "d23",562"d24", "d25", "d26", "d27",563"d28", "d29", "d30", "d31",564565"q0", "q1", "q2", "q3",566"q4", "q5", "q6", "q7",567"q8", "q9", "q10", "q11",568"q12", "q13", "q14", "q15",569};570571const char *ARMRegAsString(ARMReg reg) {572if ((unsigned int)reg >= sizeof(armRegStrings)/sizeof(armRegStrings[0]))573return "(bad)";574return armRegStrings[(int)reg];575}576577void ARMXEmitter::QuickCallFunction(ARMReg reg, const void *func) {578if (BLInRange(func)) {579BL(func);580} else {581MOVP2R(reg, func);582BL(reg);583}584}585586void ARMXEmitter::SetCodePointer(u8 *ptr, u8 *writePtr)587{588code = ptr;589startcode = code;590lastCacheFlushEnd = ptr;591}592593const u8 *ARMXEmitter::GetCodePointer() const594{595return code;596}597598u8 *ARMXEmitter::GetWritableCodePtr()599{600return code;601}602603void ARMXEmitter::ReserveCodeSpace(u32 bytes)604{605for (u32 i = 0; i < bytes/4; i++)606Write32(0xE1200070); //bkpt 0607}608609const u8 *ARMXEmitter::AlignCode16()610{611ReserveCodeSpace((-(intptr_t)code) & 15);612return code;613}614615const u8 *ARMXEmitter::NopAlignCode16() {616int bytes = ((-(intptr_t)code) & 15);617for (int i = 0; i < bytes / 4; i++) {618Write32(0xE320F000); // one of many possible nops619}620return code;621}622623const u8 *ARMXEmitter::AlignCodePage()624{625ReserveCodeSpace((-(intptr_t)code) & 4095);626return code;627}628629void ARMXEmitter::FlushIcache()630{631FlushIcacheSection(lastCacheFlushEnd, code);632lastCacheFlushEnd = code;633}634635void ARMXEmitter::FlushIcacheSection(u8 *start, u8 *end)636{637#if PPSSPP_PLATFORM(IOS)638// Header file says this is equivalent to: sys_icache_invalidate(start, end - start);639sys_cache_control(kCacheFunctionPrepareForExecution, start, end - start);640#elif PPSSPP_PLATFORM(WINDOWS)641FlushInstructionCache(GetCurrentProcess(), start, end - start);642#elif PPSSPP_ARCH(ARM)643644#if defined(__clang__) || defined(__ANDROID__)645__clear_cache(start, end);646#else647__builtin___clear_cache(start, end);648#endif649650#endif651}652653void ARMXEmitter::SetCC(CCFlags cond)654{655condition = cond << 28;656}657658void ARMXEmitter::NOP(int count)659{660for (int i = 0; i < count; i++) {661Write32(condition | 0x01A00000);662}663}664665void ARMXEmitter::SETEND(bool BE)666{667//SETEND is non-conditional668Write32(0xF1010000 | (BE << 9));669}670void ARMXEmitter::BKPT(u16 arg)671{672Write32(condition | 0x01200070 | (arg << 4 & 0x000FFF00) | (arg & 0x0000000F));673}674void ARMXEmitter::YIELD()675{676Write32(condition | 0x0320F001);677}678679FixupBranch ARMXEmitter::B()680{681FixupBranch branch;682branch.type = 0; // Zero for B683branch.ptr = code;684branch.condition = condition;685//We'll write NOP here for now.686Write32(condition | 0x01A00000);687return branch;688}689FixupBranch ARMXEmitter::BL()690{691FixupBranch branch;692branch.type = 1; // Zero for B693branch.ptr = code;694branch.condition = condition;695//We'll write NOP here for now.696Write32(condition | 0x01A00000);697return branch;698}699700FixupBranch ARMXEmitter::B_CC(CCFlags Cond)701{702FixupBranch branch;703branch.type = 0; // Zero for B704branch.ptr = code;705branch.condition = Cond << 28;706//We'll write NOP here for now.707Write32(condition | 0x01A00000);708return branch;709}710void ARMXEmitter::B_CC(CCFlags Cond, const void *fnptr)711{712ptrdiff_t distance = (intptr_t)fnptr - ((intptr_t)(code) + 8);713_assert_msg_(distance > -0x2000000 && distance < 0x2000000,714"B_CC out of range (%p calls %p)", code, fnptr);715716Write32((Cond << 28) | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF));717}718FixupBranch ARMXEmitter::BL_CC(CCFlags Cond)719{720FixupBranch branch;721branch.type = 1; // Zero for B722branch.ptr = code;723branch.condition = Cond << 28;724//We'll write NOP here for now.725Write32(condition | 0x01A00000);726return branch;727}728void ARMXEmitter::SetJumpTarget(FixupBranch const &branch)729{730ptrdiff_t distance = ((intptr_t)(code) - 8) - (intptr_t)branch.ptr;731_assert_msg_(distance > -0x2000000 && distance < 0x2000000,732"SetJumpTarget out of range (%p calls %p)", code, branch.ptr);733u32 instr = (u32)(branch.condition | ((distance >> 2) & 0x00FFFFFF));734instr |= branch.type == 0 ? /* B */ 0x0A000000 : /* BL */ 0x0B000000;735*(u32*)branch.ptr = instr;736}737void ARMXEmitter::B(const void *fnptr)738{739ptrdiff_t distance = (intptr_t)fnptr - (intptr_t(code) + 8);740_assert_msg_(distance > -0x2000000 && distance < 0x2000000,741"B out of range (%p calls %p)", code, fnptr);742743Write32(condition | 0x0A000000 | ((distance >> 2) & 0x00FFFFFF));744}745746void ARMXEmitter::B(ARMReg src)747{748Write32(condition | 0x012FFF10 | src);749}750751bool ARMXEmitter::BLInRange(const void *fnptr) const {752ptrdiff_t distance = (intptr_t)fnptr - (intptr_t(code) + 8);753if (distance <= -0x2000000 || distance >= 0x2000000)754return false;755else756return true;757}758759void ARMXEmitter::BL(const void *fnptr)760{761ptrdiff_t distance = (intptr_t)fnptr - (intptr_t(code) + 8);762_assert_msg_(distance > -0x2000000 && distance < 0x2000000,763"BL out of range (%p calls %p)", code, fnptr);764Write32(condition | 0x0B000000 | ((distance >> 2) & 0x00FFFFFF));765}766void ARMXEmitter::BL(ARMReg src)767{768Write32(condition | 0x012FFF30 | src);769}770771void ARMXEmitter::PUSH(const int num, ...)772{773u16 RegList = 0;774u8 Reg;775int i;776va_list vl;777va_start(vl, num);778for (i = 0; i < num; i++) {779Reg = va_arg(vl, u32);780RegList |= (1 << Reg);781}782va_end(vl);783Write32(condition | (2349 << 16) | RegList);784}785786void ARMXEmitter::POP(const int num, ...)787{788u16 RegList = 0;789u8 Reg;790int i;791va_list vl;792va_start(vl, num);793for (i=0;i<num;i++)794{795Reg = va_arg(vl, u32);796RegList |= (1 << Reg);797}798va_end(vl);799Write32(condition | (2237 << 16) | RegList);800}801802void ARMXEmitter::WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, Operand2 op2)803{804Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | op2.Imm5() | (op << 4) | src);805}806void ARMXEmitter::WriteShiftedDataOp(u32 op, bool SetFlags, ARMReg dest, ARMReg src, ARMReg op2)807{808Write32(condition | (13 << 21) | (SetFlags << 20) | (dest << 12) | (op2 << 8) | (op << 4) | src);809}810811// IMM, REG, IMMSREG, RSR812// -1 for invalid if the instruction doesn't support that813const s32 InstOps[][4] = {{16, 0, 0, 0}, // AND(s)814{17, 1, 1, 1}, // EOR(s)815{18, 2, 2, 2}, // SUB(s)816{19, 3, 3, 3}, // RSB(s)817{20, 4, 4, 4}, // ADD(s)818{21, 5, 5, 5}, // ADC(s)819{22, 6, 6, 6}, // SBC(s)820{23, 7, 7, 7}, // RSC(s)821{24, 8, 8, 8}, // TST822{25, 9, 9, 9}, // TEQ823{26, 10, 10, 10}, // CMP824{27, 11, 11, 11}, // CMN825{28, 12, 12, 12}, // ORR(s)826{29, 13, 13, 13}, // MOV(s)827{30, 14, 14, 14}, // BIC(s)828{31, 15, 15, 15}, // MVN(s)829{24, -1, -1, -1}, // MOVW830{26, -1, -1, -1}, // MOVT831};832833const char *InstNames[] = { "AND",834"EOR",835"SUB",836"RSB",837"ADD",838"ADC",839"SBC",840"RSC",841"TST",842"TEQ",843"CMP",844"CMN",845"ORR",846"MOV",847"BIC",848"MVN",849"MOVW",850"MOVT",851};852853void ARMXEmitter::AND (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm); }854void ARMXEmitter::ANDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(0, Rd, Rn, Rm, true); }855void ARMXEmitter::EOR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm); }856void ARMXEmitter::EORS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(1, Rd, Rn, Rm, true); }857void ARMXEmitter::SUB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm); }858void ARMXEmitter::SUBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(2, Rd, Rn, Rm, true); }859void ARMXEmitter::RSB (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm); }860void ARMXEmitter::RSBS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(3, Rd, Rn, Rm, true); }861void ARMXEmitter::ADD (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm); }862void ARMXEmitter::ADDS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(4, Rd, Rn, Rm, true); }863void ARMXEmitter::ADC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm); }864void ARMXEmitter::ADCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(5, Rd, Rn, Rm, true); }865void ARMXEmitter::SBC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm); }866void ARMXEmitter::SBCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(6, Rd, Rn, Rm, true); }867void ARMXEmitter::RSC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm); }868void ARMXEmitter::RSCS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(7, Rd, Rn, Rm, true); }869void ARMXEmitter::TST ( ARMReg Rn, Operand2 Rm) { WriteInstruction(8, R0, Rn, Rm, true); }870void ARMXEmitter::TEQ ( ARMReg Rn, Operand2 Rm) { WriteInstruction(9, R0, Rn, Rm, true); }871void ARMXEmitter::CMP ( ARMReg Rn, Operand2 Rm) { WriteInstruction(10, R0, Rn, Rm, true); }872void ARMXEmitter::CMN ( ARMReg Rn, Operand2 Rm) { WriteInstruction(11, R0, Rn, Rm, true); }873void ARMXEmitter::ORR (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm); }874void ARMXEmitter::ORRS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(12, Rd, Rn, Rm, true); }875void ARMXEmitter::MOV (ARMReg Rd, Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm); }876void ARMXEmitter::MOVS(ARMReg Rd, Operand2 Rm) { WriteInstruction(13, Rd, R0, Rm, true); }877void ARMXEmitter::BIC (ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm); }878void ARMXEmitter::BICS(ARMReg Rd, ARMReg Rn, Operand2 Rm) { WriteInstruction(14, Rd, Rn, Rm, true); }879void ARMXEmitter::MVN (ARMReg Rd, Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm); }880void ARMXEmitter::MVNS(ARMReg Rd, Operand2 Rm) { WriteInstruction(15, Rd, R0, Rm, true); }881void ARMXEmitter::MOVW(ARMReg Rd, Operand2 Rm) { WriteInstruction(16, Rd, R0, Rm); }882void ARMXEmitter::MOVT(ARMReg Rd, Operand2 Rm, bool TopBits) { WriteInstruction(17, Rd, R0, TopBits ? Rm.Value >> 16 : Rm); }883884void ARMXEmitter::WriteInstruction (u32 Op, ARMReg Rd, ARMReg Rn, Operand2 Rm, bool SetFlags) // This can get renamed later885{886s32 op = InstOps[Op][Rm.GetType()]; // Type always decided by last operand887u32 Data = Rm.GetData();888if (Rm.GetType() == TYPE_IMM)889{890switch (Op)891{892// MOV cases that support IMM16893case 16:894case 17:895Data = Rm.Imm16();896break;897default:898break;899}900}901if (op == -1)902_assert_msg_(false, "%s not yet support %d", InstNames[Op], Rm.GetType());903Write32(condition | (op << 21) | (SetFlags ? (1 << 20) : 0) | Rn << 16 | Rd << 12 | Data);904}905906// Data Operations907void ARMXEmitter::WriteSignedMultiply(u32 Op, u32 Op2, u32 Op3, ARMReg dest, ARMReg r1, ARMReg r2)908{909Write32(condition | (0x7 << 24) | (Op << 20) | (dest << 16) | (Op2 << 12) | (r1 << 8) | (Op3 << 5) | (1 << 4) | r2);910}911void ARMXEmitter::UDIV(ARMReg dest, ARMReg dividend, ARMReg divisor)912{913_assert_msg_(cpu_info.bIDIVa, "Trying to use integer divide on hardware that doesn't support it.");914WriteSignedMultiply(3, 0xF, 0, dest, divisor, dividend);915}916void ARMXEmitter::SDIV(ARMReg dest, ARMReg dividend, ARMReg divisor)917{918_assert_msg_(cpu_info.bIDIVa, "Trying to use integer divide on hardware that doesn't support it.");919WriteSignedMultiply(1, 0xF, 0, dest, divisor, dividend);920}921922void ARMXEmitter::LSL (ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, false, dest, src, op2);}923void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, Operand2 op2) { WriteShiftedDataOp(0, true, dest, src, op2);}924void ARMXEmitter::LSL (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, false, dest, src, op2);}925void ARMXEmitter::LSLS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(1, true, dest, src, op2);}926void ARMXEmitter::LSR (ARMReg dest, ARMReg src, Operand2 op2) {927_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "LSR must have a non-zero shift (use LSL.)");928WriteShiftedDataOp(2, false, dest, src, op2);929}930void ARMXEmitter::LSRS(ARMReg dest, ARMReg src, Operand2 op2) {931_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "LSRS must have a non-zero shift (use LSLS.)");932WriteShiftedDataOp(2, true, dest, src, op2);933}934void ARMXEmitter::LSR (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(3, false, dest, src, op2);}935void ARMXEmitter::LSRS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(3, true, dest, src, op2);}936void ARMXEmitter::ASR (ARMReg dest, ARMReg src, Operand2 op2) {937_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "ASR must have a non-zero shift (use LSL.)");938WriteShiftedDataOp(4, false, dest, src, op2);939}940void ARMXEmitter::ASRS(ARMReg dest, ARMReg src, Operand2 op2) {941_assert_msg_(op2.GetType() != TYPE_IMM || op2.Imm5() != 0, "ASRS must have a non-zero shift (use LSLS.)");942WriteShiftedDataOp(4, true, dest, src, op2);943}944void ARMXEmitter::ASR (ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(5, false, dest, src, op2);}945void ARMXEmitter::ASRS(ARMReg dest, ARMReg src, ARMReg op2) { WriteShiftedDataOp(5, true, dest, src, op2);}946947void ARMXEmitter::MUL (ARMReg dest, ARMReg src, ARMReg op2)948{949Write32(condition | (dest << 16) | (src << 8) | (9 << 4) | op2);950}951void ARMXEmitter::MULS(ARMReg dest, ARMReg src, ARMReg op2)952{953Write32(condition | (1 << 20) | (dest << 16) | (src << 8) | (9 << 4) | op2);954}955956void ARMXEmitter::Write4OpMultiply(u32 op, ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn) {957Write32(condition | (op << 20) | (destHi << 16) | (destLo << 12) | (rm << 8) | (9 << 4) | rn);958}959960void ARMXEmitter::UMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)961{962Write4OpMultiply(0x8, destLo, destHi, rn, rm);963}964965void ARMXEmitter::SMULL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)966{967Write4OpMultiply(0xC, destLo, destHi, rn, rm);968}969970void ARMXEmitter::UMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)971{972Write4OpMultiply(0xA, destLo, destHi, rn, rm);973}974975void ARMXEmitter::SMLAL(ARMReg destLo, ARMReg destHi, ARMReg rm, ARMReg rn)976{977Write4OpMultiply(0xE, destLo, destHi, rn, rm);978}979980void ARMXEmitter::UBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width)981{982Write32(condition | (0x7E0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn);983}984985void ARMXEmitter::SBFX(ARMReg dest, ARMReg rn, u8 lsb, u8 width)986{987Write32(condition | (0x7A0 << 16) | ((width - 1) << 16) | (dest << 12) | (lsb << 7) | (5 << 4) | rn);988}989990void ARMXEmitter::CLZ(ARMReg rd, ARMReg rm)991{992Write32(condition | (0x16F << 16) | (rd << 12) | (0xF1 << 4) | rm);993}994995void ARMXEmitter::PLD(ARMReg rn, int offset, bool forWrite) {996_dbg_assert_msg_(offset < 0x3ff && offset > -0x3ff, "PLD: Max 12 bits of offset allowed");997998bool U = offset >= 0;999if (offset < 0) offset = -offset;1000bool R = !forWrite;1001// Conditions not allowed1002Write32((0xF5 << 24) | (U << 23) | (R << 22) | (1 << 20) | ((int)rn << 16) | (0xF << 12) | offset);1003}100410051006void ARMXEmitter::BFI(ARMReg rd, ARMReg rn, u8 lsb, u8 width)1007{1008u32 msb = (lsb + width - 1);1009if (msb > 31) msb = 31;1010Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | rn);1011}10121013void ARMXEmitter::BFC(ARMReg rd, u8 lsb, u8 width)1014{1015u32 msb = (lsb + width - 1);1016if (msb > 31) msb = 31;1017Write32(condition | (0x7C0 << 16) | (msb << 16) | (rd << 12) | (lsb << 7) | (1 << 4) | 15);1018}10191020void ARMXEmitter::SXTB (ARMReg dest, ARMReg op2)1021{1022Write32(condition | (0x6AF << 16) | (dest << 12) | (7 << 4) | op2);1023}10241025void ARMXEmitter::SXTH (ARMReg dest, ARMReg op2, u8 rotation)1026{1027SXTAH(dest, (ARMReg)15, op2, rotation);1028}1029void ARMXEmitter::SXTAH(ARMReg dest, ARMReg src, ARMReg op2, u8 rotation)1030{1031// bits ten and 11 are the rotation amount, see 8.8.232 for more1032// information1033Write32(condition | (0x6B << 20) | (src << 16) | (dest << 12) | (rotation << 10) | (7 << 4) | op2);1034}1035void ARMXEmitter::RBIT(ARMReg dest, ARMReg src)1036{1037Write32(condition | (0x6F << 20) | (0xF << 16) | (dest << 12) | (0xF3 << 4) | src);1038}1039void ARMXEmitter::REV (ARMReg dest, ARMReg src)1040{1041Write32(condition | (0x6BF << 16) | (dest << 12) | (0xF3 << 4) | src);1042}1043void ARMXEmitter::REV16(ARMReg dest, ARMReg src)1044{1045Write32(condition | (0x6BF << 16) | (dest << 12) | (0xFB << 4) | src);1046}10471048void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, Operand2 op2)1049{1050Write32(condition | (0x320F << 12) | (write_nzcvq << 19) | (write_g << 18) | op2.Imm12Mod());1051}1052void ARMXEmitter::_MSR (bool write_nzcvq, bool write_g, ARMReg src)1053{1054Write32(condition | (0x120F << 12) | (write_nzcvq << 19) | (write_g << 18) | src);1055}1056void ARMXEmitter::MRS (ARMReg dest)1057{1058Write32(condition | (16 << 20) | (15 << 16) | (dest << 12));1059}1060void ARMXEmitter::LDREX(ARMReg dest, ARMReg base)1061{1062Write32(condition | (25 << 20) | (base << 16) | (dest << 12) | 0xF9F);1063}1064void ARMXEmitter::STREX(ARMReg result, ARMReg base, ARMReg op)1065{1066_assert_msg_((result != base && result != op), "STREX dest can't be other two registers");1067Write32(condition | (24 << 20) | (base << 16) | (result << 12) | (0xF9 << 4) | op);1068}1069void ARMXEmitter::DMB ()1070{1071Write32(0xF57FF05E);1072}1073void ARMXEmitter::SVC(Operand2 op)1074{1075Write32(condition | (0x0F << 24) | op.Imm24());1076}10771078// IMM, REG, IMMSREG, RSR1079// -1 for invalid if the instruction doesn't support that1080const s32 LoadStoreOps[][4] = {1081{0x40, 0x60, 0x60, -1}, // STR1082{0x41, 0x61, 0x61, -1}, // LDR1083{0x44, 0x64, 0x64, -1}, // STRB1084{0x45, 0x65, 0x65, -1}, // LDRB1085// Special encodings1086{ 0x4, 0x0, -1, -1}, // STRH1087{ 0x5, 0x1, -1, -1}, // LDRH1088{ 0x5, 0x1, -1, -1}, // LDRSB1089{ 0x5, 0x1, -1, -1}, // LDRSH1090};1091const char *LoadStoreNames[] = {1092"STR",1093"LDR",1094"STRB",1095"LDRB",1096"STRH",1097"LDRH",1098"LDRSB",1099"LDRSH",1100};11011102void ARMXEmitter::WriteStoreOp(u32 Op, ARMReg Rt, ARMReg Rn, Operand2 Rm, bool RegAdd)1103{1104s32 op = LoadStoreOps[Op][Rm.GetType()]; // Type always decided by last operand1105u32 Data;11061107// Qualcomm chipsets get /really/ angry if you don't use index, even if the offset is zero.1108// Some of these encodings require Index at all times anyway. Doesn't really matter.1109// bool Index = op2 != 0 ? true : false;1110bool Index = true;1111bool Add = false;11121113// Special Encoding (misc addressing mode)1114bool SpecialOp = false;1115bool Half = false;1116bool SignedLoad = false;11171118if (op == -1)1119_assert_msg_(false, "%s does not support %d", LoadStoreNames[Op], Rm.GetType());11201121switch (Op)1122{1123case 4: // STRH1124SpecialOp = true;1125Half = true;1126SignedLoad = false;1127break;1128case 5: // LDRH1129SpecialOp = true;1130Half = true;1131SignedLoad = false;1132break;1133case 6: // LDRSB1134SpecialOp = true;1135Half = false;1136SignedLoad = true;1137break;1138case 7: // LDRSH1139SpecialOp = true;1140Half = true;1141SignedLoad = true;1142break;1143}1144switch (Rm.GetType())1145{1146case TYPE_IMM:1147{1148s32 Temp = (s32)Rm.Value;1149Data = abs(Temp);1150// The offset is encoded differently on this one.1151if (SpecialOp)1152Data = ((Data & 0xF0) << 4) | (Data & 0xF);1153if (Temp >= 0) Add = true;1154}1155break;1156case TYPE_REG:1157Data = Rm.GetData();1158Add = RegAdd;1159break;1160case TYPE_IMMSREG:1161if (!SpecialOp)1162{1163Data = Rm.GetData();1164Add = RegAdd;1165break;1166}1167// Intentional fallthrough: TYPE_IMMSREG not supported for misc addressing.1168default:1169// RSR not supported for any of these1170// We already have the warning above1171BKPT(0x2);1172return;1173break;1174}1175if (SpecialOp)1176{1177// Add SpecialOp things1178Data = (0x9 << 4) | (SignedLoad << 6) | (Half << 5) | Data;1179}1180Write32(condition | (op << 20) | (Index << 24) | (Add << 23) | (Rn << 16) | (Rt << 12) | Data);1181}11821183void ARMXEmitter::LDR (ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(1, dest, base, op2, RegAdd);}1184void ARMXEmitter::LDRB(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(3, dest, base, op2, RegAdd);}1185void ARMXEmitter::LDRH(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(5, dest, base, op2, RegAdd);}1186void ARMXEmitter::LDRSB(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(6, dest, base, op2, RegAdd);}1187void ARMXEmitter::LDRSH(ARMReg dest, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(7, dest, base, op2, RegAdd);}1188void ARMXEmitter::STR (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(0, result, base, op2, RegAdd);}1189void ARMXEmitter::STRH (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(4, result, base, op2, RegAdd);}1190void ARMXEmitter::STRB (ARMReg result, ARMReg base, Operand2 op2, bool RegAdd) { WriteStoreOp(2, result, base, op2, RegAdd);}11911192#define VA_TO_REGLIST(RegList, Regnum) \1193{ \1194u8 Reg; \1195va_list vl; \1196va_start(vl, Regnum); \1197for (int i = 0; i < Regnum; i++) \1198{ \1199Reg = va_arg(vl, u32); \1200RegList |= (1 << Reg); \1201} \1202va_end(vl); \1203}12041205void ARMXEmitter::WriteRegStoreOp(u32 op, ARMReg dest, bool WriteBack, u16 RegList)1206{1207Write32(condition | (op << 20) | (WriteBack << 21) | (dest << 16) | RegList);1208}1209void ARMXEmitter::WriteVRegStoreOp(u32 op, ARMReg Rn, bool Double, bool WriteBack, ARMReg Vd, u8 numregs)1210{1211_dbg_assert_msg_(!WriteBack || Rn != R_PC, "VLDM/VSTM cannot use WriteBack with PC (PC is deprecated anyway.)");1212Write32(condition | (op << 20) | (WriteBack << 21) | (Rn << 16) | EncodeVd(Vd) | ((0xA | (int)Double) << 8) | (numregs << (int)Double));1213}1214void ARMXEmitter::STMFD(ARMReg dest, bool WriteBack, const int Regnum, ...)1215{1216u16 RegList = 0;1217VA_TO_REGLIST(RegList, Regnum);1218WriteRegStoreOp(0x80 | 0x10 | 0, dest, WriteBack, RegList);1219}1220void ARMXEmitter::LDMFD(ARMReg dest, bool WriteBack, const int Regnum, ...)1221{1222u16 RegList = 0;1223VA_TO_REGLIST(RegList, Regnum);1224WriteRegStoreOp(0x80 | 0x08 | 1, dest, WriteBack, RegList);1225}1226void ARMXEmitter::STMIA(ARMReg dest, bool WriteBack, const int Regnum, ...)1227{1228u16 RegList = 0;1229VA_TO_REGLIST(RegList, Regnum);1230WriteRegStoreOp(0x80 | 0x08 | 0, dest, WriteBack, RegList);1231}1232void ARMXEmitter::LDMIA(ARMReg dest, bool WriteBack, const int Regnum, ...)1233{1234u16 RegList = 0;1235VA_TO_REGLIST(RegList, Regnum);1236WriteRegStoreOp(0x80 | 0x08 | 1, dest, WriteBack, RegList);1237}1238void ARMXEmitter::STM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...)1239{1240u16 RegList = 0;1241VA_TO_REGLIST(RegList, Regnum);1242WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 0, dest, WriteBack, RegList);1243}1244void ARMXEmitter::LDM(ARMReg dest, bool Add, bool Before, bool WriteBack, const int Regnum, ...)1245{1246u16 RegList = 0;1247VA_TO_REGLIST(RegList, Regnum);1248WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 1, dest, WriteBack, RegList);1249}12501251void ARMXEmitter::STMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList)1252{1253WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 0, dest, WriteBack, RegList);1254}1255void ARMXEmitter::LDMBitmask(ARMReg dest, bool Add, bool Before, bool WriteBack, const u16 RegList)1256{1257WriteRegStoreOp(0x80 | (Before << 4) | (Add << 3) | 1, dest, WriteBack, RegList);1258}12591260#undef VA_TO_REGLIST12611262// NEON Specific1263void ARMXEmitter::VABD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1264{1265_assert_msg_(Vd >= D0, "Pass invalid register to VABD(float)");1266bool register_quad = Vd >= Q0;12671268// Gets encoded as a double register1269Vd = SubBase(Vd);1270Vn = SubBase(Vn);1271Vm = SubBase(Vm);12721273Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \1274| ((Vd & 0xF) << 12) | (0xD << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \1275| ((Vm & 0x10) << 2) | (Vm & 0xF));1276}1277void ARMXEmitter::VADD(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1278{1279_assert_msg_(Vd >= D0, "Pass invalid register to VADD(integer)");12801281bool register_quad = Vd >= Q0;12821283// Gets encoded as a double register1284Vd = SubBase(Vd);1285Vn = SubBase(Vn);1286Vm = SubBase(Vm);12871288Write32((0xF2 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \1289| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (register_quad << 6) \1290| ((Vm & 0x10) << 1) | (Vm & 0xF));12911292}1293void ARMXEmitter::VSUB(IntegerSize Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1294{1295_assert_msg_(Vd >= Q0, "Pass invalid register to VSUB(integer)");12961297// Gets encoded as a double register1298Vd = SubBase(Vd);1299Vn = SubBase(Vn);1300Vm = SubBase(Vm);13011302Write32((0xF3 << 24) | ((Vd & 0x10) << 18) | (Size << 20) | ((Vn & 0xF) << 16) \1303| ((Vd & 0xF) << 12) | (0x8 << 8) | ((Vn & 0x10) << 3) | (1 << 6) \1304| ((Vm & 0x10) << 2) | (Vm & 0xF));1305}13061307extern const VFPEnc VFPOps[16][2] = {1308{{0xE0, 0xA0}, { -1, -1}}, // 0: VMLA1309{{0xE1, 0xA4}, { -1, -1}}, // 1: VNMLA1310{{0xE0, 0xA4}, { -1, -1}}, // 2: VMLS1311{{0xE1, 0xA0}, { -1, -1}}, // 3: VNMLS1312{{0xE3, 0xA0}, { -1, -1}}, // 4: VADD1313{{0xE3, 0xA4}, { -1, -1}}, // 5: VSUB1314{{0xE2, 0xA0}, { -1, -1}}, // 6: VMUL1315{{0xE2, 0xA4}, { -1, -1}}, // 7: VNMUL1316{{0xEB, 0xAC}, { -1 /* 0x3B */, -1 /* 0x70 */}}, // 8: VABS(Vn(0x0) used for encoding)1317{{0xE8, 0xA0}, { -1, -1}}, // 9: VDIV1318{{0xEB, 0xA4}, { -1 /* 0x3B */, -1 /* 0x78 */}}, // 10: VNEG(Vn(0x1) used for encoding)1319{{0xEB, 0xAC}, { -1, -1}}, // 11: VSQRT (Vn(0x1) used for encoding)1320{{0xEB, 0xA4}, { -1, -1}}, // 12: VCMP (Vn(0x4 | #0 ? 1 : 0) used for encoding)1321{{0xEB, 0xAC}, { -1, -1}}, // 13: VCMPE (Vn(0x4 | #0 ? 1 : 0) used for encoding)1322{{ -1, -1}, {0x3B, 0x30}}, // 14: VABSi1323};13241325const char *VFPOpNames[16] = {1326"VMLA",1327"VNMLA",1328"VMLS",1329"VNMLS",1330"VADD",1331"VSUB",1332"VMUL",1333"VNMUL",1334"VABS",1335"VDIV",1336"VNEG",1337"VSQRT",1338"VCMP",1339"VCMPE",1340"VABSi",1341};13421343u32 EncodeVd(ARMReg Vd)1344{1345bool quad_reg = Vd >= Q0;1346bool double_reg = Vd >= D0;13471348ARMReg Reg = SubBase(Vd);13491350if (quad_reg)1351return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12);1352else {1353if (double_reg)1354return ((Reg & 0x10) << 18) | ((Reg & 0xF) << 12);1355else1356return ((Reg & 0x1) << 22) | ((Reg & 0x1E) << 11);1357}1358}1359u32 EncodeVn(ARMReg Vn)1360{1361bool quad_reg = Vn >= Q0;1362bool double_reg = Vn >= D0;13631364ARMReg Reg = SubBase(Vn);1365if (quad_reg)1366return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3);1367else {1368if (double_reg)1369return ((Reg & 0xF) << 16) | ((Reg & 0x10) << 3);1370else1371return ((Reg & 0x1E) << 15) | ((Reg & 0x1) << 7);1372}1373}1374u32 EncodeVm(ARMReg Vm)1375{1376bool quad_reg = Vm >= Q0;1377bool double_reg = Vm >= D0;13781379ARMReg Reg = SubBase(Vm);13801381if (quad_reg)1382return ((Reg & 0x10) << 1) | (Reg & 0xF);1383else {1384if (double_reg)1385return ((Reg & 0x10) << 1) | (Reg & 0xF);1386else1387return ((Reg & 0x1) << 5) | (Reg >> 1);1388}1389}13901391u32 encodedSize(u32 value)1392{1393if (value & I_8)1394return 0;1395else if (value & I_16)1396return 1;1397else if ((value & I_32) || (value & F_32))1398return 2;1399else if (value & I_64)1400return 3;1401else1402_dbg_assert_msg_(false, "Passed invalid size to integer NEON instruction");1403return 0;1404}14051406ARMReg SubBase(ARMReg Reg)1407{1408if (Reg >= S0)1409{1410if (Reg >= D0)1411{1412if (Reg >= Q0)1413return (ARMReg)((Reg - Q0) * 2); // Always gets encoded as a double register1414return (ARMReg)(Reg - D0);1415}1416return (ARMReg)(Reg - S0);1417}1418return Reg;1419}14201421ARMReg DScalar(ARMReg dreg, int subScalar) {1422int dr = (int)(SubBase(dreg)) & 0xF;1423int scalar = ((subScalar << 4) | dr);1424ARMReg ret = (ARMReg)(D0 + scalar);1425// ILOG("Scalar: %i D0: %i AR: %i", scalar, (int)D0, (int)ret);1426return ret;1427}14281429// Convert to a DScalar1430ARMReg QScalar(ARMReg qreg, int subScalar) {1431int dr = (int)(SubBase(qreg)) & 0xF;1432if (subScalar & 2) {1433dr++;1434}1435int scalar = (((subScalar & 1) << 4) | dr);1436ARMReg ret = (ARMReg)(D0 + scalar);1437return ret;1438}14391440void ARMXEmitter::WriteVFPDataOp(u32 Op, ARMReg Vd, ARMReg Vn, ARMReg Vm)1441{1442bool quad_reg = Vd >= Q0;1443bool double_reg = Vd >= D0 && Vd < Q0;14441445VFPEnc enc = VFPOps[Op][quad_reg];1446if (enc.opc1 == -1 && enc.opc2 == -1)1447_assert_msg_(false, "%s does not support %s", VFPOpNames[Op], quad_reg ? "NEON" : "VFP");1448u32 VdEnc = EncodeVd(Vd);1449u32 VnEnc = EncodeVn(Vn);1450u32 VmEnc = EncodeVm(Vm);1451u32 cond = quad_reg ? (0xF << 28) : condition;14521453Write32(cond | (enc.opc1 << 20) | VnEnc | VdEnc | (enc.opc2 << 4) | (quad_reg << 6) | (double_reg << 8) | VmEnc);1454}1455void ARMXEmitter::VMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(0, Vd, Vn, Vm); }1456void ARMXEmitter::VNMLA(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(1, Vd, Vn, Vm); }1457void ARMXEmitter::VMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(2, Vd, Vn, Vm); }1458void ARMXEmitter::VNMLS(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(3, Vd, Vn, Vm); }1459void ARMXEmitter::VADD(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(4, Vd, Vn, Vm); }1460void ARMXEmitter::VSUB(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(5, Vd, Vn, Vm); }1461void ARMXEmitter::VMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(6, Vd, Vn, Vm); }1462void ARMXEmitter::VNMUL(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(7, Vd, Vn, Vm); }1463void ARMXEmitter::VABS(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(8, Vd, D0, Vm); }1464void ARMXEmitter::VDIV(ARMReg Vd, ARMReg Vn, ARMReg Vm){ WriteVFPDataOp(9, Vd, Vn, Vm); }1465void ARMXEmitter::VNEG(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(10, Vd, D1, Vm); }1466void ARMXEmitter::VSQRT(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(11, Vd, D1, Vm); }1467void ARMXEmitter::VCMP(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(12, Vd, D4, Vm); }1468void ARMXEmitter::VCMPE(ARMReg Vd, ARMReg Vm){ WriteVFPDataOp(13, Vd, D4, Vm); }1469void ARMXEmitter::VCMP(ARMReg Vd){ WriteVFPDataOp(12, Vd, D5, D0); }1470void ARMXEmitter::VCMPE(ARMReg Vd){ WriteVFPDataOp(13, Vd, D5, D0); }14711472void ARMXEmitter::VLDMIA(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)1473{1474WriteVRegStoreOp(0x80 | 0x40 | 0x8 | 1, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);1475}14761477void ARMXEmitter::VSTMIA(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)1478{1479WriteVRegStoreOp(0x80 | 0x40 | 0x8, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);1480}14811482void ARMXEmitter::VLDMDB(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)1483{1484_dbg_assert_msg_(WriteBack, "Writeback is required for VLDMDB");1485WriteVRegStoreOp(0x80 | 0x040 | 0x10 | 1, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);1486}14871488void ARMXEmitter::VSTMDB(ARMReg ptr, bool WriteBack, ARMReg firstvreg, int numvregs)1489{1490_dbg_assert_msg_(WriteBack, "Writeback is required for VSTMDB");1491WriteVRegStoreOp(0x80 | 0x040 | 0x10, ptr, firstvreg >= D0, WriteBack, firstvreg, numvregs);1492}14931494void ARMXEmitter::VLDR(ARMReg Dest, ARMReg Base, s16 offset)1495{1496_assert_msg_(Dest >= S0 && Dest <= D31, "Passed Invalid dest register to VLDR");1497_assert_msg_(Base <= R15, "Passed invalid Base register to VLDR");14981499bool Add = offset >= 0 ? true : false;1500u32 imm = abs(offset);15011502_assert_msg_((imm & 0xC03) == 0, "VLDR: Offset needs to be word aligned and small enough");15031504if (imm & 0xC03)1505ERROR_LOG(Log::JIT, "VLDR: Bad offset %08x", imm);15061507bool single_reg = Dest < D0;15081509Dest = SubBase(Dest);15101511if (single_reg)1512{1513Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x1) << 22) | (1 << 20) | (Base << 16) \1514| ((Dest & 0x1E) << 11) | (10 << 8) | (imm >> 2));1515}1516else1517{1518Write32(condition | (0xD << 24) | (Add << 23) | ((Dest & 0x10) << 18) | (1 << 20) | (Base << 16) \1519| ((Dest & 0xF) << 12) | (11 << 8) | (imm >> 2));1520}1521}1522void ARMXEmitter::VSTR(ARMReg Src, ARMReg Base, s16 offset)1523{1524_assert_msg_(Src >= S0 && Src <= D31, "Passed invalid src register to VSTR");1525_assert_msg_(Base <= R15, "Passed invalid base register to VSTR");15261527bool Add = offset >= 0 ? true : false;1528u32 imm = abs(offset);15291530_assert_msg_((imm & 0xC03) == 0, "VSTR: Offset needs to be word aligned and small enough");15311532if (imm & 0xC03)1533ERROR_LOG(Log::JIT, "VSTR: Bad offset %08x", imm);15341535bool single_reg = Src < D0;15361537Src = SubBase(Src);15381539if (single_reg)1540{1541Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x1) << 22) | (Base << 16) \1542| ((Src & 0x1E) << 11) | (10 << 8) | (imm >> 2));1543}1544else1545{1546Write32(condition | (0xD << 24) | (Add << 23) | ((Src & 0x10) << 18) | (Base << 16) \1547| ((Src & 0xF) << 12) | (11 << 8) | (imm >> 2));1548}1549}15501551void ARMXEmitter::VMRS_APSR() {1552Write32(condition | 0x0EF10A10 | (15 << 12));1553}1554void ARMXEmitter::VMRS(ARMReg Rt) {1555Write32(condition | (0xEF << 20) | (1 << 16) | (Rt << 12) | 0xA10);1556}1557void ARMXEmitter::VMSR(ARMReg Rt) {1558Write32(condition | (0xEE << 20) | (1 << 16) | (Rt << 12) | 0xA10);1559}15601561void ARMXEmitter::VMOV(ARMReg Dest, Operand2 op2)1562{1563int sz = Dest >= D0 ? (1 << 8) : 0;1564Write32(condition | (0xEB << 20) | EncodeVd(Dest) | (5 << 9) | sz | op2.Imm8VFP());1565}15661567void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, u32 imm)1568{1569_assert_msg_(Vd >= D0, "VMOV_neon #imm must target a double or quad");1570bool register_quad = Vd >= Q0;15711572int cmode = 0;1573int op = 0;1574Operand2 op2 = IMM(0);15751576u32 imm8 = imm & 0xFF;1577imm8 = imm8 | (imm8 << 8) | (imm8 << 16) | (imm8 << 24);15781579if (Size == I_8) {1580imm = imm8;1581} else if (Size == I_16) {1582imm &= 0xFFFF;1583imm = imm | (imm << 16);1584}15851586if ((imm & 0x000000FF) == imm) {1587op = 0;1588cmode = 0 << 1;1589op2 = IMM(imm);1590} else if ((imm & 0x0000FF00) == imm) {1591op = 0;1592cmode = 1 << 1;1593op2 = IMM(imm >> 8);1594} else if ((imm & 0x00FF0000) == imm) {1595op = 0;1596cmode = 2 << 1;1597op2 = IMM(imm >> 16);1598} else if ((imm & 0xFF000000) == imm) {1599op = 0;1600cmode = 3 << 1;1601op2 = IMM(imm >> 24);1602} else if ((imm & 0x00FF00FF) == imm && (imm >> 16) == (imm & 0x00FF)) {1603op = 0;1604cmode = 4 << 1;1605op2 = IMM(imm & 0xFF);1606} else if ((imm & 0xFF00FF00) == imm && (imm >> 16) == (imm & 0xFF00)) {1607op = 0;1608cmode = 5 << 1;1609op2 = IMM(imm & 0xFF);1610} else if ((imm & 0x0000FFFF) == (imm | 0x000000FF)) {1611op = 0;1612cmode = (6 << 1) | 0;1613op2 = IMM(imm >> 8);1614} else if ((imm & 0x00FFFFFF) == (imm | 0x0000FFFF)) {1615op = 0;1616cmode = (6 << 1) | 1;1617op2 = IMM(imm >> 16);1618} else if (imm == imm8) {1619op = 0;1620cmode = (7 << 1) | 0;1621op2 = IMM(imm & 0xFF);1622} else if (TryMakeFloatIMM8(imm, op2)) {1623op = 0;1624cmode = (7 << 1) | 1;1625} else {1626// 64-bit constant form - technically we could take a u64.1627bool canEncode = true;1628u8 imm8 = 0;1629for (int i = 0, i8 = 0; i < 32; i += 8, ++i8) {1630u8 b = (imm >> i) & 0xFF;1631if (b == 0xFF) {1632imm8 |= 1 << i8;1633} else if (b != 0x00) {1634canEncode = false;1635}1636}1637if (canEncode) {1638// We don't want zeros in the second lane.1639op = 1;1640cmode = 7 << 1;1641op2 = IMM(imm8 | (imm8 << 4));1642} else {1643_assert_msg_(false, "VMOV_neon #imm invalid constant value");1644}1645}16461647// No condition allowed.1648Write32((15 << 28) | (0x28 << 20) | EncodeVd(Vd) | (cmode << 8) | (register_quad << 6) | (op << 5) | (1 << 4) | op2.Imm8ASIMD());1649}16501651void ARMXEmitter::VMOV_neon(u32 Size, ARMReg Vd, ARMReg Rt, int lane)1652{1653int opc1 = 0;1654int opc2 = 0;16551656switch (Size & ~(I_SIGNED | I_UNSIGNED))1657{1658case I_8: opc1 = 2 | (lane >> 2); opc2 = lane & 3; break;1659case I_16: opc1 = lane >> 1; opc2 = 1 | ((lane & 1) << 1); break;1660case I_32:1661case F_32:1662_assert_msg_((Size & I_UNSIGNED) == 0, "Cannot use UNSIGNED for I_32 or F_32");1663opc1 = lane & 1;1664break;1665default:1666_assert_msg_(false, "VMOV_neon unsupported size");1667}16681669if (Vd < S0 && Rt >= D0 && Rt < Q0)1670{1671// Oh, reading to reg, our params are backwards.1672ARMReg Src = Rt;1673ARMReg Dest = Vd;16741675_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED | F_32 | I_32)) != 0, "Must specify I_SIGNED or I_UNSIGNED in VMOV, unless F_32/I_32");1676int U = (Size & I_UNSIGNED) ? (1 << 23) : 0;16771678Write32(condition | (0xE1 << 20) | U | (opc1 << 21) | EncodeVn(Src) | (Dest << 12) | (0xB << 8) | (opc2 << 5) | (1 << 4));1679}1680else if (Rt < S0 && Vd >= D0 && Vd < Q0)1681{1682ARMReg Src = Rt;1683ARMReg Dest = Vd;1684Write32(condition | (0xE0 << 20) | (opc1 << 21) | EncodeVn(Dest) | (Src << 12) | (0xB << 8) | (opc2 << 5) | (1 << 4));1685}1686else1687_assert_msg_(false, "VMOV_neon unsupported arguments (Dx -> Rx or Rx -> Dx)");1688}16891690void ARMXEmitter::VMOV(ARMReg Vd, ARMReg Rt, ARMReg Rt2)1691{1692if (Vd < S0 && Rt < S0 && Rt2 >= D0)1693{1694// Oh, reading to regs, our params are backwards.1695ARMReg Src = Rt2;1696ARMReg Dest1 = Vd;1697ARMReg Dest2 = Rt;1698Write32(condition | (0xC5 << 20) | (Dest2 << 16) | (Dest1 << 12) | (0xB << 8) | EncodeVm(Src) | (1 << 4));1699}1700else if (Vd >= D0 && Rt < S0 && Rt2 < S0)1701{1702ARMReg Dest = Vd;1703ARMReg Src1 = Rt;1704ARMReg Src2 = Rt2;1705Write32(condition | (0xC4 << 20) | (Src2 << 16) | (Src1 << 12) | (0xB << 8) | EncodeVm(Dest) | (1 << 4));1706}1707else1708_assert_msg_(false, "VMOV_neon requires either Dm, Rt, Rt2 or Rt, Rt2, Dm.");1709}17101711void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src, bool high)1712{1713_assert_msg_(Src < S0, "This VMOV doesn't support SRC other than ARM Reg");1714_assert_msg_(Dest >= D0, "This VMOV doesn't support DEST other than VFP");17151716Dest = SubBase(Dest);17171718Write32(condition | (0xE << 24) | (high << 21) | ((Dest & 0xF) << 16) | (Src << 12) \1719| (0xB << 8) | ((Dest & 0x10) << 3) | (1 << 4));1720}17211722void ARMXEmitter::VMOV(ARMReg Dest, ARMReg Src)1723{1724if (Dest == Src) {1725WARN_LOG(Log::JIT, "VMOV %s, %s - same register", ARMRegAsString(Src), ARMRegAsString(Dest));1726}1727if (Dest > R15)1728{1729if (Src < S0)1730{1731if (Dest < D0)1732{1733// Moving to a Neon register FROM ARM Reg1734Dest = (ARMReg)(Dest - S0);1735Write32(condition | (0xE0 << 20) | ((Dest & 0x1E) << 15) | (Src << 12) \1736| (0xA << 8) | ((Dest & 0x1) << 7) | (1 << 4));1737return;1738}1739else1740{1741// Move 64bit from Arm reg1742_assert_msg_(false, "This VMOV doesn't support moving 64bit ARM to NEON");1743return;1744}1745}1746}1747else1748{1749if (Src > R15)1750{1751if (Src < D0)1752{1753// Moving to ARM Reg from Neon Register1754Src = (ARMReg)(Src - S0);1755Write32(condition | (0xE1 << 20) | ((Src & 0x1E) << 15) | (Dest << 12) \1756| (0xA << 8) | ((Src & 0x1) << 7) | (1 << 4));1757return;1758}1759else1760{1761// Move 64bit To Arm reg1762_assert_msg_(false, "This VMOV doesn't support moving 64bit ARM From NEON");1763return;1764}1765}1766else1767{1768// Move Arm reg to Arm reg1769_assert_msg_(false, "VMOV doesn't support moving ARM registers");1770}1771}1772// Moving NEON registers1773int SrcSize = Src < D0 ? 1 : Src < Q0 ? 2 : 4;1774int DestSize = Dest < D0 ? 1 : Dest < Q0 ? 2 : 4;1775bool Single = DestSize == 1;1776bool Quad = DestSize == 4;17771778_assert_msg_(SrcSize == DestSize, "VMOV doesn't support moving different register sizes");1779if (SrcSize != DestSize) {1780ERROR_LOG(Log::JIT, "SrcSize: %i (%s) DestDize: %i (%s)", SrcSize, ARMRegAsString(Src), DestSize, ARMRegAsString(Dest));1781}17821783Dest = SubBase(Dest);1784Src = SubBase(Src);17851786if (Single)1787{1788Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | ((Dest & 0x1E) << 11) \1789| (0x5 << 9) | (1 << 6) | ((Src & 0x1) << 5) | ((Src & 0x1E) >> 1));1790}1791else1792{1793// Double and quad1794if (Quad)1795{1796// Gets encoded as a Double register1797Write32((0xF2 << 24) | ((Dest & 0x10) << 18) | (2 << 20) | ((Src & 0xF) << 16) \1798| ((Dest & 0xF) << 12) | (1 << 8) | ((Src & 0x10) << 3) | (1 << 6) \1799| ((Src & 0x10) << 1) | (1 << 4) | (Src & 0xF));18001801}1802else1803{1804Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | ((Dest & 0xF) << 12) \1805| (0x2D << 6) | ((Src & 0x10) << 1) | (Src & 0xF));1806}1807}1808}18091810void ARMXEmitter::VCVT(ARMReg Dest, ARMReg Source, int flags)1811{1812bool single_reg = (Dest < D0) && (Source < D0);1813bool single_double = !single_reg && (Source < D0 || Dest < D0);1814bool single_to_double = Source < D0;1815int op = ((flags & TO_INT) ? (flags & ROUND_TO_ZERO) : (flags & IS_SIGNED)) ? 1 : 0;1816int op2 = ((flags & TO_INT) ? (flags & IS_SIGNED) : 0) ? 1 : 0;1817Dest = SubBase(Dest);1818Source = SubBase(Source);18191820if (single_double)1821{1822// S32<->F641823if (flags & TO_INT)1824{1825if (single_to_double)1826{1827Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) \1828| ((Dest & 0xF) << 12) | (op << 7) | (0x2D << 6) | ((Source & 0x1) << 5) | (Source >> 1));1829} else {1830Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \1831| ((Dest & 0x1E) << 11) | (op << 7) | (0x2D << 6) | ((Source & 0x10) << 1) | (Source & 0xF));1832}1833}1834// F32<->F641835else {1836if (single_to_double)1837{1838Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x3 << 20) | (0x7 << 16) \1839| ((Dest & 0xF) << 12) | (0x2F << 6) | ((Source & 0x1) << 5) | (Source >> 1));1840} else {1841Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x3 << 20) | (0x7 << 16) \1842| ((Dest & 0x1E) << 11) | (0x2B << 6) | ((Source & 0x10) << 1) | (Source & 0xF));1843}1844}1845} else if (single_reg) {1846Write32(condition | (0x1D << 23) | ((Dest & 0x1) << 22) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \1847| ((Dest & 0x1E) << 11) | (op << 7) | (0x29 << 6) | ((Source & 0x1) << 5) | (Source >> 1));1848} else {1849Write32(condition | (0x1D << 23) | ((Dest & 0x10) << 18) | (0x7 << 19) | ((flags & TO_INT) << 18) | (op2 << 16) \1850| ((Dest & 0xF) << 12) | (1 << 8) | (op << 7) | (0x29 << 6) | ((Source & 0x10) << 1) | (Source & 0xF));1851}1852}18531854void ARMXEmitter::VABA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1855{1856_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);1857_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);1858bool register_quad = Vd >= Q0;18591860Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \1861| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x71 << 4) | (register_quad << 6) | EncodeVm(Vm));1862}18631864void ARMXEmitter::VABAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1865{1866_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);1867_dbg_assert_msg_(Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__);1868_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);1869_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);18701871Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \1872| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x50 << 4) | EncodeVm(Vm));1873}18741875void ARMXEmitter::VABD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1876{1877_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);1878bool register_quad = Vd >= Q0;18791880if (Size & F_32)1881Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD << 8) | EncodeVm(Vm));1882else1883Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | EncodeVn(Vn) \1884| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | (register_quad << 6) | EncodeVm(Vm));1885}18861887void ARMXEmitter::VABDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1888{1889_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);1890_dbg_assert_msg_(Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__);1891_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);1892_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);18931894Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | EncodeVn(Vn) \1895| (encodedSize(Size) << 20) | EncodeVd(Vd) | (0x70 << 4) | EncodeVm(Vm));1896}18971898void ARMXEmitter::VABS(u32 Size, ARMReg Vd, ARMReg Vm)1899{1900_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);1901bool register_quad = Vd >= Q0;19021903Write32((0xF3 << 24) | (0xB1 << 16) | (encodedSize(Size) << 18) | EncodeVd(Vd) \1904| ((Size & F_32 ? 1 : 0) << 10) | (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm));1905}19061907void ARMXEmitter::VACGE(ARMReg Vd, ARMReg Vn, ARMReg Vm)1908{1909// Only Float1910_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);1911bool register_quad = Vd >= Q0;19121913Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) \1914| (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));1915}19161917void ARMXEmitter::VACGT(ARMReg Vd, ARMReg Vn, ARMReg Vm)1918{1919// Only Float1920_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);1921bool register_quad = Vd >= Q0;19221923Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) \1924| (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));1925}19261927void ARMXEmitter::VACLE(ARMReg Vd, ARMReg Vn, ARMReg Vm)1928{1929VACGE(Vd, Vm, Vn);1930}19311932void ARMXEmitter::VACLT(ARMReg Vd, ARMReg Vn, ARMReg Vm)1933{1934VACGT(Vd, Vn, Vm);1935}19361937void ARMXEmitter::VADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1938{1939_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);19401941bool register_quad = Vd >= Q0;19421943if (Size & F_32)1944Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD << 8) | (register_quad << 6) | EncodeVm(Vm));1945else1946Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \1947| (0x8 << 8) | (register_quad << 6) | EncodeVm(Vm));1948}19491950void ARMXEmitter::VADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1951{1952_dbg_assert_msg_(Vd < Q0, "Pass invalid register to %s", __FUNCTION__);1953_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);1954_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);1955_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);19561957Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \1958| EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm));1959}19601961void ARMXEmitter::VADDL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1962{1963_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);1964_dbg_assert_msg_(Vn >= D0 && Vn < Q0, "Pass invalid register to %s", __FUNCTION__);1965_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);1966_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);19671968Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \1969| EncodeVd(Vd) | EncodeVm(Vm));1970}1971void ARMXEmitter::VADDW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)1972{1973_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);1974_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);1975_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);1976_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);19771978Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) \1979| EncodeVd(Vd) | (1 << 8) | EncodeVm(Vm));1980}1981void ARMXEmitter::VAND(ARMReg Vd, ARMReg Vn, ARMReg Vm)1982{1983_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);1984_dbg_assert_msg_(!(Vd == Vn && Vn == Vm), "All operands the same for %s is a nop", __FUNCTION__);1985// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);1986bool register_quad = Vd >= Q0;19871988Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));1989}1990void ARMXEmitter::VBIC(ARMReg Vd, ARMReg Vn, ARMReg Vm)1991{1992_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);1993// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);1994bool register_quad = Vd >= Q0;19951996Write32((0xF2 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));1997}1998void ARMXEmitter::VEOR(ARMReg Vd, ARMReg Vn, ARMReg Vm)1999{2000_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s: %i", __FUNCTION__, Vd);2001bool register_quad = Vd >= Q0;20022003Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));2004}2005void ARMXEmitter::VBIF(ARMReg Vd, ARMReg Vn, ARMReg Vm)2006{2007_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2008// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);2009bool register_quad = Vd >= Q0;20102011Write32((0xF3 << 24) | (3 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));2012}2013void ARMXEmitter::VBIT(ARMReg Vd, ARMReg Vn, ARMReg Vm)2014{2015_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2016// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);2017bool register_quad = Vd >= Q0;20182019Write32((0xF3 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));2020}2021void ARMXEmitter::VBSL(ARMReg Vd, ARMReg Vn, ARMReg Vm)2022{2023_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2024// _dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);2025bool register_quad = Vd >= Q0;20262027Write32((0xF3 << 24) | (1 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));2028}2029void ARMXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2030{2031_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);20322033bool register_quad = Vd >= Q0;2034if (Size & F_32)2035Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm));2036else2037Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \2038| (0x81 << 4) | (register_quad << 6) | EncodeVm(Vm));20392040}2041void ARMXEmitter::VCEQ(u32 Size, ARMReg Vd, ARMReg Vm)2042{2043_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);20442045bool register_quad = Vd >= Q0;20462047Write32((0xF2 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \2048| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x10 << 4) | (register_quad << 6) | EncodeVm(Vm));2049}2050void ARMXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2051{2052_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);20532054bool register_quad = Vd >= Q0;2055if (Size & F_32)2056Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm));2057else2058Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \2059| (0x31 << 4) | (register_quad << 6) | EncodeVm(Vm));2060}2061void ARMXEmitter::VCGE(u32 Size, ARMReg Vd, ARMReg Vm)2062{2063_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);20642065bool register_quad = Vd >= Q0;2066Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) \2067| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x8 << 4) | (register_quad << 6) | EncodeVm(Vm));2068}2069void ARMXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2070{2071_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);20722073bool register_quad = Vd >= Q0;2074if (Size & F_32)2075Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xE0 << 4) | (register_quad << 6) | EncodeVm(Vm));2076else2077Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) \2078| (0x30 << 4) | (register_quad << 6) | EncodeVm(Vm));2079}2080void ARMXEmitter::VCGT(u32 Size, ARMReg Vd, ARMReg Vm)2081{2082_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);20832084bool register_quad = Vd >= Q0;2085Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \2086| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (register_quad << 6) | EncodeVm(Vm));2087}2088void ARMXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2089{2090VCGE(Size, Vd, Vm, Vn);2091}2092void ARMXEmitter::VCLE(u32 Size, ARMReg Vd, ARMReg Vm)2093{2094_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);20952096bool register_quad = Vd >= Q0;2097Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \2098| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (3 << 7) | (register_quad << 6) | EncodeVm(Vm));2099}2100void ARMXEmitter::VCLS(u32 Size, ARMReg Vd, ARMReg Vm)2101{2102_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2103_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);21042105bool register_quad = Vd >= Q0;2106Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \2107| EncodeVd(Vd) | (1 << 10) | (register_quad << 6) | EncodeVm(Vm));2108}2109void ARMXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2110{2111VCGT(Size, Vd, Vm, Vn);2112}2113void ARMXEmitter::VCLT(u32 Size, ARMReg Vd, ARMReg Vm)2114{2115_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);21162117bool register_quad = Vd >= Q0;2118Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) | (1 << 16) \2119| EncodeVd(Vd) | ((Size & F_32 ? 1 : 0) << 10) | (0x20 << 4) | (register_quad << 6) | EncodeVm(Vm));2120}2121void ARMXEmitter::VCLZ(u32 Size, ARMReg Vd, ARMReg Vm)2122{2123_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);21242125bool register_quad = Vd >= Q0;2126Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \2127| EncodeVd(Vd) | (0x48 << 4) | (register_quad << 6) | EncodeVm(Vm));2128}2129void ARMXEmitter::VCNT(u32 Size, ARMReg Vd, ARMReg Vm)2130{2131_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2132_dbg_assert_msg_(Size & I_8, "Can only use I_8 with %s", __FUNCTION__);21332134bool register_quad = Vd >= Q0;2135Write32((0xF3 << 24) | (0xD << 20) | (encodedSize(Size) << 18) \2136| EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm));2137}2138void ARMXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Vm, u8 index)2139{2140_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2141_dbg_assert_msg_(Vm >= D0, "Pass invalid register to %s", __FUNCTION__);21422143bool register_quad = Vd >= Q0;2144u32 imm4 = 0;2145if (Size & I_8)2146imm4 = (index << 1) | 1;2147else if (Size & I_16)2148imm4 = (index << 2) | 2;2149else if (Size & (I_32 | F_32))2150imm4 = (index << 3) | 4;2151Write32((0xF3 << 24) | (0xB << 20) | (imm4 << 16) \2152| EncodeVd(Vd) | (0xC << 8) | (register_quad << 6) | EncodeVm(Vm));2153}2154void ARMXEmitter::VDUP(u32 Size, ARMReg Vd, ARMReg Rt)2155{2156_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2157_dbg_assert_msg_(Rt < S0, "Pass invalid register to %s", __FUNCTION__);21582159bool register_quad = Vd >= Q0;2160Vd = SubBase(Vd);2161u8 sizeEncoded = 0;2162if (Size & I_8)2163sizeEncoded = 2;2164else if (Size & I_16)2165sizeEncoded = 1;2166else if (Size & I_32)2167sizeEncoded = 0;21682169Write32((0xEE << 24) | (0x8 << 20) | ((sizeEncoded & 2) << 21) | (register_quad << 21) \2170| ((Vd & 0xF) << 16) | (Rt << 12) | (0xB1 << 4) | ((Vd & 0x10) << 3) | ((sizeEncoded & 1) << 5));2171}2172void ARMXEmitter::VEXT(ARMReg Vd, ARMReg Vn, ARMReg Vm, u8 index)2173{2174_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2175bool register_quad = Vd >= Q0;21762177Write32((0xF2 << 24) | (0xB << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (index & 0xF) \2178| (register_quad << 6) | EncodeVm(Vm));2179}2180void ARMXEmitter::VFMA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2181{2182_dbg_assert_msg_(Size == F_32, "Passed invalid size to FP-only NEON instruction");2183_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2184_dbg_assert_msg_(cpu_info.bVFPv4, "Can't use %s when CPU doesn't support it", __FUNCTION__);2185bool register_quad = Vd >= Q0;21862187Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm));2188}2189void ARMXEmitter::VFMS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2190{2191_dbg_assert_msg_(Size == F_32, "Passed invalid size to FP-only NEON instruction");2192_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2193_dbg_assert_msg_(cpu_info.bVFPv4, "Can't use %s when CPU doesn't support it", __FUNCTION__);2194bool register_quad = Vd >= Q0;21952196Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xC1 << 4) | (register_quad << 6) | EncodeVm(Vm));2197}2198void ARMXEmitter::VHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2199{2200_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2201_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);22022203bool register_quad = Vd >= Q0;22042205Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \2206| EncodeVn(Vn) | EncodeVd(Vd) | (register_quad << 6) | EncodeVm(Vm));2207}2208void ARMXEmitter::VHSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2209{2210_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2211_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);22122213bool register_quad = Vd >= Q0;22142215Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \2216| EncodeVn(Vn) | EncodeVd(Vd) | (1 << 9) | (register_quad << 6) | EncodeVm(Vm));2217}2218void ARMXEmitter::VMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2219{2220_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);22212222bool register_quad = Vd >= Q0;22232224if (Size & F_32)2225Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm));2226else2227Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \2228| EncodeVn(Vn) | EncodeVd(Vd) | (0x60 << 4) | (register_quad << 6) | EncodeVm(Vm));2229}2230void ARMXEmitter::VMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2231{2232_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);22332234bool register_quad = Vd >= Q0;22352236if (Size & F_32)2237Write32((0xF2 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | (register_quad << 6) | EncodeVm(Vm));2238else2239Write32((0xF2 << 24) | (((Size & I_UNSIGNED) ? 1 : 0) << 23) | (encodedSize(Size) << 20) \2240| EncodeVn(Vn) | EncodeVd(Vd) | (0x61 << 4) | (register_quad << 6) | EncodeVm(Vm));2241}2242void ARMXEmitter::VMLA(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2243{2244_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);22452246bool register_quad = Vd >= Q0;22472248if (Size & F_32)2249Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));2250else2251Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm));2252}2253void ARMXEmitter::VMLS(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2254{2255_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);22562257bool register_quad = Vd >= Q0;22582259if (Size & F_32)2260Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));2261else2262Write32((0xF2 << 24) | (1 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (register_quad << 6) | EncodeVm(Vm));2263}2264void ARMXEmitter::VMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2265{2266_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);2267_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);2268_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);2269_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);22702271Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \2272| EncodeVn(Vn) | EncodeVd(Vd) | (0x80 << 4) | EncodeVm(Vm));2273}2274void ARMXEmitter::VMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2275{2276_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);2277_dbg_assert_msg_(Vn >= Q0, "Pass invalid register to %s", __FUNCTION__);2278_dbg_assert_msg_(Vm >= D0 && Vm < Q0, "Pass invalid register to %s", __FUNCTION__);2279_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float.", __FUNCTION__);22802281Write32((0xF2 << 24) | ((Size & I_UNSIGNED ? 1 : 0) << 24) | (encodedSize(Size) << 20) \2282| EncodeVn(Vn) | EncodeVd(Vd) | (0xA0 << 4) | EncodeVm(Vm));2283}2284void ARMXEmitter::VMUL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2285{2286_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);22872288bool register_quad = Vd >= Q0;22892290if (Size & F_32)2291Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD1 << 4) | (register_quad << 6) | EncodeVm(Vm));2292else2293Write32((0xF2 << 24) | ((Size & I_POLYNOMIAL) ? (1 << 24) : 0) | (encodedSize(Size) << 20) | \2294EncodeVn(Vn) | EncodeVd(Vd) | (0x91 << 4) | (register_quad << 6) | EncodeVm(Vm));2295}2296void ARMXEmitter::VMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2297{2298_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2299_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);23002301Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2302(0xC0 << 4) | ((Size & I_POLYNOMIAL) ? 1 << 9 : 0) | EncodeVm(Vm));2303}2304void ARMXEmitter::VMLA_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2305{2306_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);23072308bool register_quad = Vd >= Q0;23092310// No idea if the Non-Q case here works. Not really that interested.2311if (Size & F_32)2312Write32((0xF2 << 24) | (register_quad << 24) | (1 << 23) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x14 << 4) | EncodeVm(Vm));2313else2314_dbg_assert_msg_(false, "VMLA_scalar only supports float atm");2315//else2316// Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x90 << 4) | (1 << 6) | EncodeVm(Vm));2317// Unsigned support missing2318}2319void ARMXEmitter::VMUL_scalar(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2320{2321_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);23222323bool register_quad = Vd >= Q0;23242325int VmEnc = EncodeVm(Vm);2326// No idea if the Non-Q case here works. Not really that interested.2327if (Size & F_32) // Q flag2328Write32((0xF2 << 24) | (register_quad << 24) | (1 << 23) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x94 << 4) | VmEnc);2329else2330_dbg_assert_msg_(false, "VMUL_scalar only supports float atm");23312332// Write32((0xF2 << 24) | ((Size & I_POLYNOMIAL) ? (1 << 24) : 0) | (1 << 23) | (encodedSize(Size) << 20) |2333// EncodeVn(Vn) | EncodeVd(Vd) | (0x84 << 4) | (register_quad << 6) | EncodeVm(Vm));2334// Unsigned support missing2335}23362337void ARMXEmitter::VMVN(ARMReg Vd, ARMReg Vm)2338{2339_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);23402341bool register_quad = Vd >= Q0;23422343Write32((0xF3B << 20) | \2344EncodeVd(Vd) | (0xB << 7) | (register_quad << 6) | EncodeVm(Vm));2345}23462347void ARMXEmitter::VNEG(u32 Size, ARMReg Vd, ARMReg Vm)2348{2349_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);23502351bool register_quad = Vd >= Q0;23522353Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 16) | \2354EncodeVd(Vd) | ((Size & F_32) ? 1 << 10 : 0) | (0xE << 6) | (register_quad << 6) | EncodeVm(Vm));2355}2356void ARMXEmitter::VORN(ARMReg Vd, ARMReg Vn, ARMReg Vm)2357{2358_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);23592360bool register_quad = Vd >= Q0;23612362Write32((0xF2 << 24) | (3 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));2363}2364void ARMXEmitter::VORR(ARMReg Vd, ARMReg Vn, ARMReg Vm)2365{2366_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2367_dbg_assert_msg_(!(Vd == Vn && Vn == Vm), "All operands the same for %s is a nop", __FUNCTION__);23682369bool register_quad = Vd >= Q0;23702371Write32((0xF2 << 24) | (2 << 20) | EncodeVn(Vn) | EncodeVd(Vd) | (0x11 << 4) | (register_quad << 6) | EncodeVm(Vm));2372}2373void ARMXEmitter::VPADAL(u32 Size, ARMReg Vd, ARMReg Vm)2374{2375_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2376_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);23772378bool register_quad = Vd >= Q0;23792380Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \2381(0x60 << 4) | ((Size & I_UNSIGNED) ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm));2382}2383void ARMXEmitter::VPADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2384{2385_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);23862387if (Size & F_32)2388Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xD0 << 4) | EncodeVm(Vm));2389else2390Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2391(0xB1 << 4) | EncodeVm(Vm));2392}2393void ARMXEmitter::VPADDL(u32 Size, ARMReg Vd, ARMReg Vm)2394{2395_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2396_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);23972398bool register_quad = Vd >= Q0;23992400Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \2401(0x20 << 4) | (Size & I_UNSIGNED ? 1 << 7 : 0) | (register_quad << 6) | EncodeVm(Vm));2402}2403void ARMXEmitter::VPMAX(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2404{2405_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);24062407if (Size & F_32)2408Write32((0xF3 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | EncodeVm(Vm));2409else2410Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2411(0xA0 << 4) | EncodeVm(Vm));2412}2413void ARMXEmitter::VPMIN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2414{2415_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);24162417if (Size & F_32)2418Write32((0xF3 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF0 << 4) | EncodeVm(Vm));2419else2420Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2421(0xA1 << 4) | EncodeVm(Vm));2422}2423void ARMXEmitter::VQABS(u32 Size, ARMReg Vd, ARMReg Vm)2424{2425_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2426_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);24272428bool register_quad = Vd >= Q0;24292430Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \2431(0x70 << 4) | (register_quad << 6) | EncodeVm(Vm));2432}2433void ARMXEmitter::VQADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2434{2435_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2436_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);24372438bool register_quad = Vd >= Q0;24392440Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2441(0x1 << 4) | (register_quad << 6) | EncodeVm(Vm));2442}2443void ARMXEmitter::VQDMLAL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2444{2445_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2446_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);24472448Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2449(0x90 << 4) | EncodeVm(Vm));2450}2451void ARMXEmitter::VQDMLSL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2452{2453_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2454_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);24552456Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2457(0xB0 << 4) | EncodeVm(Vm));2458}2459void ARMXEmitter::VQDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2460{2461_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2462_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);24632464Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2465(0xB0 << 4) | EncodeVm(Vm));2466}2467void ARMXEmitter::VQDMULL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2468{2469_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2470_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);24712472Write32((0xF2 << 24) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2473(0xD0 << 4) | EncodeVm(Vm));2474}2475void ARMXEmitter::VQNEG(u32 Size, ARMReg Vd, ARMReg Vm)2476{2477_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2478_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);24792480bool register_quad = Vd >= Q0;24812482Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | EncodeVd(Vd) | \2483(0x78 << 4) | (register_quad << 6) | EncodeVm(Vm));2484}2485void ARMXEmitter::VQRDMULH(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2486{2487_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2488_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);24892490Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2491(0xB0 << 4) | EncodeVm(Vm));2492}2493void ARMXEmitter::VQRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2494{2495_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2496_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);24972498bool register_quad = Vd >= Q0;24992500Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2501(0x51 << 4) | (register_quad << 6) | EncodeVm(Vm));2502}2503void ARMXEmitter::VQSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2504{2505_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2506_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);25072508bool register_quad = Vd >= Q0;25092510Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2511(0x41 << 4) | (register_quad << 6) | EncodeVm(Vm));2512}2513void ARMXEmitter::VQSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2514{2515_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2516_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);25172518bool register_quad = Vd >= Q0;25192520Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2521(0x21 << 4) | (register_quad << 6) | EncodeVm(Vm));2522}2523void ARMXEmitter::VRADDHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2524{2525_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2526_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);25272528Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2529(0x40 << 4) | EncodeVm(Vm));2530}2531void ARMXEmitter::VRECPE(u32 Size, ARMReg Vd, ARMReg Vm)2532{2533_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);25342535bool register_quad = Vd >= Q0;25362537Write32((0xF3 << 24) | (0xB << 20) | (0xB << 16) | EncodeVd(Vd) | \2538(0x40 << 4) | (Size & F_32 ? 1 << 8 : 0) | (register_quad << 6) | EncodeVm(Vm));2539}2540void ARMXEmitter::VRECPS(ARMReg Vd, ARMReg Vn, ARMReg Vm)2541{2542_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);25432544bool register_quad = Vd >= Q0;25452546Write32((0xF2 << 24) | EncodeVn(Vn) | EncodeVd(Vd) | (0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm));2547}2548void ARMXEmitter::VRHADD(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2549{2550_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2551_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);25522553bool register_quad = Vd >= Q0;25542555Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2556(0x10 << 4) | (register_quad << 6) | EncodeVm(Vm));2557}2558void ARMXEmitter::VRSHL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2559{2560_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2561_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);25622563bool register_quad = Vd >= Q0;25642565Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2566(0x50 << 4) | (register_quad << 6) | EncodeVm(Vm));2567}2568void ARMXEmitter::VRSQRTE(u32 Size, ARMReg Vd, ARMReg Vm)2569{2570_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);25712572bool register_quad = Vd >= Q0;2573Vd = SubBase(Vd);2574Vm = SubBase(Vm);25752576Write32((0xF3 << 24) | (0xB << 20) | ((Vd & 0x10) << 18) | (0xB << 16)2577| ((Vd & 0xF) << 12) | (9 << 7) | (Size & F_32 ? (1 << 8) : 0) | (register_quad << 6)2578| ((Vm & 0x10) << 1) | (Vm & 0xF));2579}2580void ARMXEmitter::VRSQRTS(ARMReg Vd, ARMReg Vn, ARMReg Vm)2581{2582_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);25832584bool register_quad = Vd >= Q0;25852586Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | \2587(0xF1 << 4) | (register_quad << 6) | EncodeVm(Vm));2588}2589void ARMXEmitter::VRSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2590{2591_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2592_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);25932594Write32((0xF3 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2595(0x60 << 4) | EncodeVm(Vm));2596}2597void ARMXEmitter::VSHL(u32 Size, ARMReg Vd, ARMReg Vm, ARMReg Vn)2598{2599_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2600_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);26012602bool register_quad = Vd >= Q0;26032604Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2605(0x40 << 4) | (register_quad << 6) | EncodeVm(Vm));2606}26072608static int EncodeSizeShift(u32 Size, int amount, bool inverse, bool halve) {2609int sz = 0;2610switch (Size & 0xF) {2611case I_8: sz = 8; break;2612case I_16: sz = 16; break;2613case I_32: sz = 32; break;2614case I_64: sz = 64; break;2615}2616if (inverse && halve) {2617_dbg_assert_msg_(amount <= sz / 2, "Amount %d too large for narrowing shift (max %d)", amount, sz/2);2618return (sz / 2) + (sz / 2) - amount;2619} else if (inverse) {2620return sz + (sz - amount);2621} else {2622return sz + amount;2623}2624}26252626void ARMXEmitter::EncodeShiftByImm(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount, u8 opcode, bool register_quad, bool inverse, bool halve) {2627_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);2628_dbg_assert_msg_(!(Size & F_32), "%s doesn't support float", __FUNCTION__);2629int imm7 = EncodeSizeShift(Size, shiftAmount, inverse, halve);2630int L = (imm7 >> 6) & 1;2631int U = (Size & I_UNSIGNED) ? 1 : 0;2632u32 value = (0xF2 << 24) | (U << 24) | (1 << 23) | ((imm7 & 0x3f) << 16) | EncodeVd(Vd) | (opcode << 8) | (L << 7) | (register_quad << 6) | (1 << 4) | EncodeVm(Vm);2633Write32(value);2634}26352636void ARMXEmitter::VSHL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {2637EncodeShiftByImm((Size & ~I_UNSIGNED), Vd, Vm, shiftAmount, 0x5, Vd >= Q0, false, false);2638}26392640void ARMXEmitter::VSHLL(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {2641if ((u32)shiftAmount == (8 * (Size & 0xF))) {2642// Entirely different encoding (A2) for size == shift! Bleh.2643int sz = 0;2644switch (Size & 0xF) {2645case I_8: sz = 0; break;2646case I_16: sz = 1; break;2647case I_32: sz = 2; break;2648case I_64:2649_dbg_assert_msg_(false, "Cannot VSHLL 64-bit elements");2650}2651int imm6 = 0x32 | (sz << 2);2652u32 value = (0xF3 << 24) | (1 << 23) | (imm6 << 16) | EncodeVd(Vd) | (0x3 << 8) | EncodeVm(Vm);2653Write32(value);2654} else {2655EncodeShiftByImm((Size & ~I_UNSIGNED), Vd, Vm, shiftAmount, 0xA, false, false, false);2656}2657}26582659void ARMXEmitter::VSHR(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {2660EncodeShiftByImm(Size, Vd, Vm, shiftAmount, 0x0, Vd >= Q0, true, false);2661}26622663void ARMXEmitter::VSHRN(u32 Size, ARMReg Vd, ARMReg Vm, int shiftAmount) {2664// Reduce Size by 1 to encode correctly.2665EncodeShiftByImm(Size, Vd, Vm, shiftAmount, 0x8, false, true, true);2666}26672668void ARMXEmitter::VSUB(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2669{2670_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);26712672bool register_quad = Vd >= Q0;26732674if (Size & F_32)2675Write32((0xF2 << 24) | (1 << 21) | EncodeVn(Vn) | EncodeVd(Vd) | \2676(0xD0 << 4) | (register_quad << 6) | EncodeVm(Vm));2677else2678Write32((0xF3 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2679(0x80 << 4) | (register_quad << 6) | EncodeVm(Vm));2680}2681void ARMXEmitter::VSUBHN(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2682{2683_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);26842685Write32((0xF2 << 24) | (1 << 23) | ((encodedSize(Size) - 1) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2686(0x60 << 4) | EncodeVm(Vm));2687}2688void ARMXEmitter::VSUBL(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2689{2690_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);26912692Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2693(0x20 << 4) | EncodeVm(Vm));2694}2695void ARMXEmitter::VSUBW(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2696{2697_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);26982699Write32((0xF2 << 24) | (Size & I_UNSIGNED ? 1 << 24 : 0) | (1 << 23) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2700(0x30 << 4) | EncodeVm(Vm));2701}2702void ARMXEmitter::VSWP(ARMReg Vd, ARMReg Vm)2703{2704_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);27052706bool register_quad = Vd >= Q0;27072708Write32((0xF3 << 24) | (0xB << 20) | (1 << 17) | EncodeVd(Vd) | \2709(register_quad << 6) | EncodeVm(Vm));2710}2711void ARMXEmitter::VTRN(u32 Size, ARMReg Vd, ARMReg Vm)2712{2713_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);27142715bool register_quad = Vd >= Q0;27162717Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \2718(1 << 7) | (register_quad << 6) | EncodeVm(Vm));2719}2720void ARMXEmitter::VTST(u32 Size, ARMReg Vd, ARMReg Vn, ARMReg Vm)2721{2722_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);27232724bool register_quad = Vd >= Q0;27252726Write32((0xF2 << 24) | (encodedSize(Size) << 20) | EncodeVn(Vn) | EncodeVd(Vd) | \2727(0x81 << 4) | (register_quad << 6) | EncodeVm(Vm));2728}2729void ARMXEmitter::VUZP(u32 Size, ARMReg Vd, ARMReg Vm)2730{2731_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);27322733bool register_quad = Vd >= Q0;27342735Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \2736(0x10 << 4) | (register_quad << 6) | EncodeVm(Vm));2737}2738void ARMXEmitter::VZIP(u32 Size, ARMReg Vd, ARMReg Vm)2739{2740_dbg_assert_msg_(Vd >= D0, "Pass invalid register to %s", __FUNCTION__);27412742bool register_quad = Vd >= Q0;27432744Write32((0xF3 << 24) | (0xB << 20) | (encodedSize(Size) << 18) | (1 << 17) | EncodeVd(Vd) | \2745(0x18 << 4) | (register_quad << 6) | EncodeVm(Vm));2746}27472748void ARMXEmitter::VMOVL(u32 Size, ARMReg Vd, ARMReg Vm)2749{2750_dbg_assert_msg_(Vd >= Q0, "Pass invalid register to %s", __FUNCTION__);2751_dbg_assert_msg_(Vm >= D0 && Vm <= D31, "Pass invalid register to %s", __FUNCTION__);2752_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED)) != 0, "Must specify I_SIGNED or I_UNSIGNED in VMOVL");27532754bool unsign = (Size & I_UNSIGNED) != 0;2755int imm3 = 0;2756if (Size & I_8) imm3 = 1;2757if (Size & I_16) imm3 = 2;2758if (Size & I_32) imm3 = 4;27592760Write32((0xF2 << 24) | (unsign << 24) | (1 << 23) | (imm3 << 19) | EncodeVd(Vd) | \2761(0xA1 << 4) | EncodeVm(Vm));2762}27632764void ARMXEmitter::VMOVN(u32 Size, ARMReg Vd, ARMReg Vm)2765{2766_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);2767_dbg_assert_msg_(Vd >= D0 && Vd <= D31, "Pass invalid register to %s", __FUNCTION__);2768_dbg_assert_msg_((Size & I_8) == 0, "%s cannot narrow from I_8", __FUNCTION__);27692770// For consistency with assembler syntax and VMOVL - encode one size down.2771u32 halfSize = encodedSize(Size) - 1;27722773Write32((0xF3B << 20) | (halfSize << 18) | (1 << 17) | EncodeVd(Vd) | (1 << 9) | EncodeVm(Vm));2774}27752776void ARMXEmitter::VQMOVN(u32 Size, ARMReg Vd, ARMReg Vm)2777{2778_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);2779_dbg_assert_msg_(Vd >= D0 && Vd <= D31, "Pass invalid register to %s", __FUNCTION__);2780_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED)) != 0, "Must specify I_SIGNED or I_UNSIGNED in %s NEON", __FUNCTION__);2781_dbg_assert_msg_((Size & I_8) == 0, "%s cannot narrow from I_8", __FUNCTION__);27822783u32 halfSize = encodedSize(Size) - 1;2784u32 op = (1 << 7) | (Size & I_UNSIGNED ? 1 << 6 : 0);27852786Write32((0xF3B << 20) | (halfSize << 18) | (1 << 17) | EncodeVd(Vd) | (1 << 9) | op | EncodeVm(Vm));2787}27882789void ARMXEmitter::VQMOVUN(u32 Size, ARMReg Vd, ARMReg Vm)2790{2791_dbg_assert_msg_(Vm >= Q0, "Pass invalid register to %s", __FUNCTION__);2792_dbg_assert_msg_(Vd >= D0 && Vd <= D31, "Pass invalid register to %s", __FUNCTION__);2793_dbg_assert_msg_((Size & I_8) == 0, "%s cannot narrow from I_8", __FUNCTION__);27942795u32 halfSize = encodedSize(Size) - 1;2796u32 op = (1 << 6);27972798Write32((0xF3B << 20) | (halfSize << 18) | (1 << 17) | EncodeVd(Vd) | (1 << 9) | op | EncodeVm(Vm));2799}28002801void ARMXEmitter::VCVT(u32 Size, ARMReg Vd, ARMReg Vm)2802{2803_dbg_assert_msg_((Size & (I_UNSIGNED | I_SIGNED)) != 0, "Must specify I_SIGNED or I_UNSIGNED in VCVT NEON");28042805bool register_quad = Vd >= Q0;2806bool toInteger = (Size & I_32) != 0;2807bool isUnsigned = (Size & I_UNSIGNED) != 0;2808int op = (toInteger << 1) | (int)isUnsigned;28092810Write32((0xF3 << 24) | (0xBB << 16) | EncodeVd(Vd) | (0x3 << 9) | (op << 7) | (register_quad << 6) | EncodeVm(Vm));2811}28122813static int RegCountToType(int nRegs, NEONAlignment align) {2814switch (nRegs) {2815case 1:2816_dbg_assert_msg_(!((int)align & 1), "align & 1 must be == 0");2817return 7;2818case 2:2819_dbg_assert_msg_(!((int)align == 3), "align must be != 3");2820return 10;2821case 3:2822_dbg_assert_msg_(!((int)align & 1), "align & 1 must be == 0");2823return 6;2824case 4:2825return 2;2826default:2827_dbg_assert_msg_(false, "Invalid number of registers passed to vector load/store");2828return 0;2829}2830}28312832void ARMXEmitter::WriteVLDST1(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm)2833{2834u32 spacing = RegCountToType(regCount, align); // Only support loading to 1 reg2835// Gets encoded as a double register2836Vd = SubBase(Vd);28372838Write32((0xF4 << 24) | ((Vd & 0x10) << 18) | (load << 21) | (Rn << 16)2839| ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6)2840| (align << 4) | Rm);2841}28422843void ARMXEmitter::VLD1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm) {2844WriteVLDST1(true, Size, Vd, Rn, regCount, align, Rm);2845}28462847void ARMXEmitter::VST1(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm) {2848WriteVLDST1(false, Size, Vd, Rn, regCount, align, Rm);2849}28502851void ARMXEmitter::WriteVLDST1_lane(bool load, u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm)2852{2853bool register_quad = Vd >= Q0;28542855Vd = SubBase(Vd);2856// Support quad lanes by converting to D lanes2857if (register_quad && lane > 1) {2858Vd = (ARMReg)((int)Vd + 1);2859lane -= 2;2860}2861int encSize = encodedSize(Size);2862int index_align = 0;2863switch (encSize) {2864case 0: index_align = lane << 1; break;2865case 1: index_align = lane << 2; if (aligned) index_align |= 1; break;2866case 2: index_align = lane << 3; if (aligned) index_align |= 3; break;2867default:2868break;2869}28702871Write32((0xF4 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (load << 21) | (Rn << 16)2872| ((Vd & 0xF) << 12) | (encSize << 10)2873| (index_align << 4) | Rm);2874}28752876void ARMXEmitter::VLD1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm) {2877WriteVLDST1_lane(true, Size, Vd, Rn, lane, aligned, Rm);2878}28792880void ARMXEmitter::VST1_lane(u32 Size, ARMReg Vd, ARMReg Rn, int lane, bool aligned, ARMReg Rm) {2881WriteVLDST1_lane(false, Size, Vd, Rn, lane, aligned, Rm);2882}28832884void ARMXEmitter::VLD1_all_lanes(u32 Size, ARMReg Vd, ARMReg Rn, bool aligned, ARMReg Rm) {2885bool register_quad = Vd >= Q0;28862887Vd = SubBase(Vd);28882889int T = register_quad; // two D registers28902891Write32((0xF4 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (1 << 21) | (Rn << 16)2892| ((Vd & 0xF) << 12) | (0xC << 8) | (encodedSize(Size) << 6)2893| (T << 5) | (aligned << 4) | Rm);2894}28952896/*2897void ARMXEmitter::VLD2(u32 Size, ARMReg Vd, ARMReg Rn, int regCount, NEONAlignment align, ARMReg Rm)2898{2899u32 spacing = 0x8; // Single spaced registers2900// Gets encoded as a double register2901Vd = SubBase(Vd);29022903Write32((0xF4 << 24) | ((Vd & 0x10) << 18) | (1 << 21) | (Rn << 16)2904| ((Vd & 0xF) << 12) | (spacing << 8) | (encodedSize(Size) << 6)2905| (align << 4) | Rm);2906}2907*/29082909void ARMXEmitter::WriteVimm(ARMReg Vd, int cmode, u8 imm, int op) {2910bool register_quad = Vd >= Q0;29112912Write32((0xF28 << 20) | ((imm >> 7) << 24) | (((imm >> 4) & 0x7) << 16) | (imm & 0xF) |2913EncodeVd(Vd) | (register_quad << 6) | (op << 5) | (1 << 4) | ((cmode & 0xF) << 8));2914}29152916void ARMXEmitter::VMOV_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {2917// Only let through the modes that apply.2918switch (type) {2919case VIMM___x___x:2920case VIMM__x___x_:2921case VIMM_x___x__:2922case VIMMx___x___:2923if (Size != I_32)2924goto error;2925WriteVimm(Vd, (int)type, imm, 0);2926break;2927case VIMM_x_x_x_x:2928case VIMMx_x_x_x_:2929if (Size != I_16)2930goto error;2931WriteVimm(Vd, (int)type, imm, 0);2932break;2933case VIMMxxxxxxxx: // replicate the byte2934if (Size != I_8)2935goto error;2936WriteVimm(Vd, (int)type, imm, 0);2937break;2938case VIMMbits2bytes:2939if (Size != I_64)2940goto error;2941WriteVimm(Vd, (int)type, imm, 1);2942break;2943default:2944goto error;2945}2946return;29472948error:2949_dbg_assert_msg_(false, "Bad Size or type specified in %s: Size %i Type %i", __FUNCTION__, (int)Size, type);2950}29512952void ARMXEmitter::VMOV_immf(ARMReg Vd, float value) { // This only works with a select few values. I've hardcoded 1.0f.2953u8 bits = 0;29542955if (value == 0.0f) {2956VEOR(Vd, Vd, Vd);2957return;2958}29592960// TODO: Do something more sophisticated here.2961if (value == 1.5f) {2962bits = 0x78;2963} else if (value == 1.0f) {2964bits = 0x70;2965} else if (value == -1.0f) {2966bits = 0xF0;2967} else {2968_dbg_assert_msg_(false, "%s: Invalid floating point immediate", __FUNCTION__);2969}2970WriteVimm(Vd, VIMMf000f000, bits, 0);2971}29722973void ARMXEmitter::VORR_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {2974// Only let through the modes that apply.2975switch (type) {2976case VIMM___x___x:2977case VIMM__x___x_:2978case VIMM_x___x__:2979case VIMMx___x___:2980if (Size != I_32)2981goto error;2982WriteVimm(Vd, (int)type | 1, imm, 0);2983break;2984case VIMM_x_x_x_x:2985case VIMMx_x_x_x_:2986if (Size != I_16)2987goto error;2988WriteVimm(Vd, (int)type | 1, imm, 0);2989break;2990default:2991goto error;2992}2993return;2994error:2995_dbg_assert_msg_(false, "Bad Size or type specified in VORR_imm");2996}29972998void ARMXEmitter::VBIC_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {2999// Only let through the modes that apply.3000switch (type) {3001case VIMM___x___x:3002case VIMM__x___x_:3003case VIMM_x___x__:3004case VIMMx___x___:3005if (Size != I_32)3006goto error;3007WriteVimm(Vd, (int)type | 1, imm, 1);3008break;3009case VIMM_x_x_x_x:3010case VIMMx_x_x_x_:3011if (Size != I_16)3012goto error;3013WriteVimm(Vd, (int)type | 1, imm, 1);3014break;3015default:3016goto error;3017}3018return;3019error:3020_dbg_assert_msg_(false, "Bad Size or type specified in VBIC_imm");3021}302230233024void ARMXEmitter::VMVN_imm(u32 Size, ARMReg Vd, VIMMMode type, int imm) {3025// Only let through the modes that apply.3026switch (type) {3027case VIMM___x___x:3028case VIMM__x___x_:3029case VIMM_x___x__:3030case VIMMx___x___:3031if (Size != I_32)3032goto error;3033WriteVimm(Vd, (int)type, imm, 1);3034break;3035case VIMM_x_x_x_x:3036case VIMMx_x_x_x_:3037if (Size != I_16)3038goto error;3039WriteVimm(Vd, (int)type, imm, 1);3040break;3041default:3042goto error;3043}3044return;3045error:3046_dbg_assert_msg_(false, "Bad Size or type specified in VMVN_imm");3047}304830493050void ARMXEmitter::VREVX(u32 size, u32 Size, ARMReg Vd, ARMReg Vm)3051{3052bool register_quad = Vd >= Q0;3053Vd = SubBase(Vd);3054Vm = SubBase(Vm);30553056Write32((0xF3 << 24) | (1 << 23) | ((Vd & 0x10) << 18) | (0x3 << 20)3057| (encodedSize(Size) << 18) | ((Vd & 0xF) << 12) | (size << 7)3058| (register_quad << 6) | ((Vm & 0x10) << 1) | (Vm & 0xF));3059}30603061void ARMXEmitter::VREV64(u32 Size, ARMReg Vd, ARMReg Vm)3062{3063VREVX(0, Size, Vd, Vm);3064}30653066void ARMXEmitter::VREV32(u32 Size, ARMReg Vd, ARMReg Vm)3067{3068VREVX(1, Size, Vd, Vm);3069}30703071void ARMXEmitter::VREV16(u32 Size, ARMReg Vd, ARMReg Vm)3072{3073VREVX(2, Size, Vd, Vm);3074}30753076// See page A8-878 in ARMv7-A Architecture Reference Manual30773078// Dest is a Q register, Src is a D register.3079void ARMXEmitter::VCVTF32F16(ARMReg Dest, ARMReg Src) {3080_assert_msg_(cpu_info.bVFPv4, "Can't use half-float conversions when you don't support VFPv4");3081if (Dest < Q0 || Dest > Q15 || Src < D0 || Src > D15) {3082// Invalid!3083}30843085Dest = SubBase(Dest);3086Src = SubBase(Src);30873088int op = 1;3089Write32((0xF3B6 << 16) | ((Dest & 0x10) << 18) | ((Dest & 0xF) << 12) | 0x600 | (op << 8) | ((Src & 0x10) << 1) | (Src & 0xF));3090}30913092// UNTESTED3093// Dest is a D register, Src is a Q register.3094void ARMXEmitter::VCVTF16F32(ARMReg Dest, ARMReg Src) {3095_assert_msg_(cpu_info.bVFPv4, "Can't use half-float conversions when you don't support VFPv4");3096if (Dest < D0 || Dest > D15 || Src < Q0 || Src > Q15) {3097// Invalid!3098}3099Dest = SubBase(Dest);3100Src = SubBase(Src);3101int op = 0;3102Write32((0xF3B6 << 16) | ((Dest & 0x10) << 18) | ((Dest & 0xF) << 12) | 0x600 | (op << 8) | ((Src & 0x10) << 1) | (Src & 0xF));3103}31043105// Always clear code space with breakpoints, so that if someone accidentally executes3106// uninitialized, it just breaks into the debugger.3107void ARMXCodeBlock::PoisonMemory(int offset) {3108// TODO: this isn't right for ARM!3109memset(region + offset, 0xCC, region_size - offset);3110ResetCodePtr(offset);3111}31123113}311431153116