CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/x86/X64IRCompALU.cpp
Views: 1401
// Copyright (c) 2023- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)1920#include "Common/CPUDetect.h"21#include "Core/MemMap.h"22#include "Core/MIPS/x86/X64IRJit.h"23#include "Core/MIPS/x86/X64IRRegCache.h"2425// This file contains compilation for integer / arithmetic / logic related instructions.26//27// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.28// Currently known non working ones should have DISABLE. No flags because that's in IR already.2930// #define CONDITIONAL_DISABLE { CompIR_Generic(inst); return; }31#define CONDITIONAL_DISABLE {}32#define DISABLE { CompIR_Generic(inst); return; }33#define INVALIDOP { _assert_msg_(false, "Invalid IR inst %d", (int)inst.op); CompIR_Generic(inst); return; }3435namespace MIPSComp {3637using namespace Gen;38using namespace X64IRJitConstants;3940void X64JitBackend::CompIR_Arith(IRInst inst) {41CONDITIONAL_DISABLE;4243bool allowPtrMath = inst.constant <= 0x7FFFFFFF;44#ifdef MASKED_PSP_MEMORY45// Since we modify it, we can't safely.46allowPtrMath = false;47#endif4849switch (inst.op) {50case IROp::Add:51regs_.Map(inst);52if (inst.src1 == inst.src2) {53LEA(32, regs_.RX(inst.dest), MScaled(regs_.RX(inst.src1), 2, 0));54} else if (inst.dest == inst.src2) {55ADD(32, regs_.R(inst.dest), regs_.R(inst.src1));56} else if (inst.dest == inst.src1) {57ADD(32, regs_.R(inst.dest), regs_.R(inst.src2));58} else {59MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));60ADD(32, regs_.R(inst.dest), regs_.R(inst.src2));61}62break;6364case IROp::Sub:65regs_.Map(inst);66if (inst.src1 == inst.src2) {67regs_.SetGPRImm(inst.dest, 0);68} else if (inst.dest == inst.src2) {69NEG(32, regs_.R(inst.src2));70ADD(32, regs_.R(inst.dest), regs_.R(inst.src1));71} else if (inst.dest == inst.src1) {72SUB(32, regs_.R(inst.dest), regs_.R(inst.src2));73} else {74MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));75SUB(32, regs_.R(inst.dest), regs_.R(inst.src2));76}77break;7879case IROp::AddConst:80if (regs_.IsGPRMappedAsPointer(inst.dest) && inst.dest == inst.src1 && allowPtrMath) {81regs_.MarkGPRAsPointerDirty(inst.dest);82LEA(PTRBITS, regs_.RXPtr(inst.dest), MDisp(regs_.RXPtr(inst.dest), inst.constant));83} else {84regs_.Map(inst);85LEA(32, regs_.RX(inst.dest), MDisp(regs_.RX(inst.src1), inst.constant));86}87break;8889case IROp::SubConst:90if (regs_.IsGPRMappedAsPointer(inst.dest) && inst.dest == inst.src1 && allowPtrMath) {91regs_.MarkGPRAsPointerDirty(inst.dest);92LEA(PTRBITS, regs_.RXPtr(inst.dest), MDisp(regs_.RXPtr(inst.dest), -(int)inst.constant));93} else {94regs_.Map(inst);95LEA(32, regs_.RX(inst.dest), MDisp(regs_.RX(inst.src1), -(int)inst.constant));96}97break;9899case IROp::Neg:100regs_.Map(inst);101if (inst.dest != inst.src1) {102MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));103}104NEG(32, regs_.R(inst.dest));105break;106107default:108INVALIDOP;109break;110}111}112113void X64JitBackend::CompIR_Assign(IRInst inst) {114CONDITIONAL_DISABLE;115116switch (inst.op) {117case IROp::Mov:118if (inst.dest != inst.src1) {119regs_.Map(inst);120MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));121}122break;123124case IROp::Ext8to32:125regs_.MapWithFlags(inst, X64Map::NONE, X64Map::LOW_SUBREG);126MOVSX(32, 8, regs_.RX(inst.dest), regs_.R(inst.src1));127break;128129case IROp::Ext16to32:130regs_.Map(inst);131MOVSX(32, 16, regs_.RX(inst.dest), regs_.R(inst.src1));132break;133134default:135INVALIDOP;136break;137}138}139140void X64JitBackend::CompIR_Bits(IRInst inst) {141CONDITIONAL_DISABLE;142143switch (inst.op) {144case IROp::BSwap32:145regs_.Map(inst);146if (inst.src1 != inst.dest) {147MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));148}149BSWAP(32, regs_.RX(inst.dest));150break;151152case IROp::ReverseBits:153regs_.Map(inst);154if (inst.src1 != inst.dest) {155MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));156}157158// Swap even/odd bits (in bits: 0123 -> 1032.)159LEA(32, SCRATCH1, MScaled(regs_.RX(inst.dest), 2, 0));160SHR(32, regs_.R(inst.dest), Imm8(1));161XOR(32, regs_.R(inst.dest), R(SCRATCH1));162AND(32, regs_.R(inst.dest), Imm32(0x55555555));163XOR(32, regs_.R(inst.dest), R(SCRATCH1));164165// Swap pairs of bits (in bits: 10325476 -> 32107654.)166LEA(32, SCRATCH1, MScaled(regs_.RX(inst.dest), 4, 0));167SHR(32, regs_.R(inst.dest), Imm8(2));168XOR(32, regs_.R(inst.dest), R(SCRATCH1));169AND(32, regs_.R(inst.dest), Imm32(0x33333333));170XOR(32, regs_.R(inst.dest), R(SCRATCH1));171172// Swap nibbles (in nibbles: ABCD -> BADC.)173MOV(32, R(SCRATCH1), regs_.R(inst.dest));174SHL(32, R(SCRATCH1), Imm8(4));175SHR(32, regs_.R(inst.dest), Imm8(4));176XOR(32, regs_.R(inst.dest), R(SCRATCH1));177AND(32, regs_.R(inst.dest), Imm32(0x0F0F0F0F));178XOR(32, regs_.R(inst.dest), R(SCRATCH1));179180// Finally, swap the bytes to drop everything into place (nibbles: BADCFEHG -> HGFEDCBA.)181BSWAP(32, regs_.RX(inst.dest));182break;183184case IROp::BSwap16:185regs_.Map(inst);186if (cpu_info.bBMI2) {187// Rotate to put it into the correct register, then swap.188if (inst.dest != inst.src1)189RORX(32, regs_.RX(inst.dest), regs_.R(inst.src1), 16);190else191ROR(32, regs_.R(inst.dest), Imm8(16));192BSWAP(32, regs_.RX(inst.dest));193} else {194if (inst.dest != inst.src1)195MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));196BSWAP(32, regs_.RX(inst.dest));197ROR(32, regs_.R(inst.dest), Imm8(16));198}199break;200201case IROp::Clz:202regs_.Map(inst);203if (cpu_info.bLZCNT) {204LZCNT(32, regs_.RX(inst.dest), regs_.R(inst.src1));205} else {206BSR(32, regs_.RX(inst.dest), regs_.R(inst.src1));207FixupBranch notFound = J_CC(CC_Z);208209// Since one of these bits must be set, and none outside, this subtracts from 31.210XOR(32, regs_.R(inst.dest), Imm8(31));211FixupBranch skip = J();212213SetJumpTarget(notFound);214MOV(32, regs_.R(inst.dest), Imm32(32));215216SetJumpTarget(skip);217}218break;219220default:221INVALIDOP;222break;223}224}225226void X64JitBackend::CompIR_Compare(IRInst inst) {227CONDITIONAL_DISABLE;228229auto setCC = [&](const OpArg &arg, CCFlags cc) {230// If it's carry, we can take advantage of ADC to avoid subregisters.231if (cc == CC_C && inst.dest != inst.src1 && inst.dest != inst.src2) {232XOR(32, regs_.R(inst.dest), regs_.R(inst.dest));233CMP(32, regs_.R(inst.src1), arg);234ADC(32, regs_.R(inst.dest), Imm8(0));235} else if (regs_.HasLowSubregister(regs_.RX(inst.dest)) && inst.dest != inst.src1 && inst.dest != inst.src2) {236XOR(32, regs_.R(inst.dest), regs_.R(inst.dest));237CMP(32, regs_.R(inst.src1), arg);238SETcc(cc, regs_.R(inst.dest));239} else {240CMP(32, regs_.R(inst.src1), arg);241SETcc(cc, R(SCRATCH1));242MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));243}244};245246switch (inst.op) {247case IROp::Slt:248regs_.Map(inst);249setCC(regs_.R(inst.src2), CC_L);250break;251252case IROp::SltConst:253if (inst.constant == 0) {254// Basically, getting the sign bit. Let's shift instead.255regs_.Map(inst);256if (inst.dest != inst.src1)257MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));258SHR(32, regs_.R(inst.dest), Imm8(31));259} else {260regs_.Map(inst);261setCC(Imm32(inst.constant), CC_L);262}263break;264265case IROp::SltU:266if (regs_.IsGPRImm(inst.src1) && regs_.GetGPRImm(inst.src1) == 0) {267// This is kinda common, same as != 0. Avoid flushing src1.268regs_.SpillLockGPR(inst.src2, inst.dest);269regs_.MapGPR(inst.src2);270regs_.MapGPR(inst.dest, MIPSMap::NOINIT);271if (inst.dest != inst.src2 && regs_.HasLowSubregister(regs_.RX(inst.dest))) {272XOR(32, regs_.R(inst.dest), regs_.R(inst.dest));273TEST(32, regs_.R(inst.src2), regs_.R(inst.src2));274SETcc(CC_NE, regs_.R(inst.dest));275} else {276CMP(32, regs_.R(inst.src2), Imm8(0));277SETcc(CC_NE, R(SCRATCH1));278MOVZX(32, 8, regs_.RX(inst.dest), R(SCRATCH1));279}280} else {281regs_.Map(inst);282setCC(regs_.R(inst.src2), CC_B);283}284break;285286case IROp::SltUConst:287if (inst.constant == 0) {288regs_.SetGPRImm(inst.dest, 0);289} else {290regs_.Map(inst);291setCC(Imm32(inst.constant), CC_B);292}293break;294295default:296INVALIDOP;297break;298}299}300301void X64JitBackend::CompIR_CondAssign(IRInst inst) {302CONDITIONAL_DISABLE;303304switch (inst.op) {305case IROp::MovZ:306if (inst.dest != inst.src2) {307regs_.Map(inst);308TEST(32, regs_.R(inst.src1), regs_.R(inst.src1));309CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_Z);310}311break;312313case IROp::MovNZ:314if (inst.dest != inst.src2) {315regs_.Map(inst);316TEST(32, regs_.R(inst.src1), regs_.R(inst.src1));317CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_NZ);318}319break;320321case IROp::Max:322regs_.Map(inst);323if (inst.src1 == inst.src2) {324MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));325} else if (inst.dest == inst.src1) {326CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));327CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_L);328} else if (inst.dest == inst.src2) {329CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));330CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src1), CC_G);331} else {332MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));333CMP(32, regs_.R(inst.dest), regs_.R(inst.src2));334CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_L);335}336break;337338case IROp::Min:339regs_.Map(inst);340if (inst.src1 == inst.src2) {341MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));342} else if (inst.dest == inst.src1) {343CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));344CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_G);345} else if (inst.dest == inst.src2) {346CMP(32, regs_.R(inst.src1), regs_.R(inst.src2));347CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src1), CC_L);348} else {349MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));350CMP(32, regs_.R(inst.dest), regs_.R(inst.src2));351CMOVcc(32, regs_.RX(inst.dest), regs_.R(inst.src2), CC_G);352}353break;354355default:356INVALIDOP;357break;358}359}360361void X64JitBackend::CompIR_Div(IRInst inst) {362CONDITIONAL_DISABLE;363364switch (inst.op) {365case IROp::Div:366#if PPSSPP_ARCH(AMD64)367// We need EDX specifically, so force a spill (before spill locks happen.)368regs_.MapGPR2(IRREG_LO, MIPSMap::NOINIT | X64Map::HIGH_DATA);369regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT | X64Map::HIGH_DATA } });370#else // PPSSPP_ARCH(X86)371// Force a spill, it's HI in this path.372regs_.MapGPR(IRREG_HI, MIPSMap::NOINIT | X64Map::HIGH_DATA);373regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT | X64Map::HIGH_DATA } });374#endif375{376TEST(32, regs_.R(inst.src2), regs_.R(inst.src2));377FixupBranch divideByZero = J_CC(CC_E, false);378379// Sign extension sets HI to -1 for us on x64.380MOV(PTRBITS, regs_.R(IRREG_LO), Imm32(0x80000000));381#if PPSSPP_ARCH(X86)382MOV(PTRBITS, regs_.R(IRREG_HI), Imm32(-1));383#endif384CMP(32, regs_.R(inst.src1), regs_.R(IRREG_LO));385FixupBranch numeratorNotOverflow = J_CC(CC_NE, false);386CMP(32, regs_.R(inst.src2), Imm32(-1));387FixupBranch denominatorOverflow = J_CC(CC_E, false);388389SetJumpTarget(numeratorNotOverflow);390391// It's finally time to actually divide.392MOV(32, R(EAX), regs_.R(inst.src1));393CDQ();394IDIV(32, regs_.R(inst.src2));395#if PPSSPP_ARCH(AMD64)396// EDX == RX(IRREG_LO). Put the remainder in the upper bits, done.397SHL(64, R(EDX), Imm8(32));398OR(64, R(EDX), R(EAX));399#else // PPSSPP_ARCH(X86)400// EDX is already good (HI), just move EAX into place.401MOV(32, regs_.R(IRREG_LO), R(EAX));402#endif403FixupBranch done = J(false);404405SetJumpTarget(divideByZero);406X64Reg loReg = SCRATCH1;407#if PPSSPP_ARCH(X86)408if (regs_.HasLowSubregister(regs_.RX(IRREG_LO)))409loReg = regs_.RX(IRREG_LO);410#endif411// Set to -1 if numerator positive using SF.412XOR(32, R(loReg), R(loReg));413TEST(32, regs_.R(inst.src1), regs_.R(inst.src1));414SETcc(CC_NS, R(loReg));415NEG(32, R(loReg));416// If it was negative, OR in 1 (so we get -1 or 1.)417OR(32, R(loReg), Imm8(1));418419#if PPSSPP_ARCH(AMD64)420// Move the numerator into the high bits.421MOV(32, regs_.R(IRREG_LO), regs_.R(inst.src1));422SHL(64, regs_.R(IRREG_LO), Imm8(32));423OR(64, regs_.R(IRREG_LO), R(loReg));424#else // PPSSPP_ARCH(X86)425// If we didn't have a subreg, move into place.426if (loReg != regs_.RX(IRREG_LO))427MOV(32, regs_.R(IRREG_LO), R(loReg));428MOV(32, regs_.R(IRREG_HI), regs_.R(inst.src1));429#endif430431SetJumpTarget(denominatorOverflow);432SetJumpTarget(done);433}434break;435436case IROp::DivU:437#if PPSSPP_ARCH(AMD64)438// We need EDX specifically, so force a spill (before spill locks happen.)439regs_.MapGPR2(IRREG_LO, MIPSMap::NOINIT | X64Map::HIGH_DATA);440regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT | X64Map::HIGH_DATA } });441#else // PPSSPP_ARCH(X86)442// Force a spill, it's HI in this path.443regs_.MapGPR(IRREG_HI, MIPSMap::NOINIT | X64Map::HIGH_DATA);444regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT | X64Map::HIGH_DATA } });445#endif446{447TEST(32, regs_.R(inst.src2), regs_.R(inst.src2));448FixupBranch divideByZero = J_CC(CC_E, false);449450MOV(32, R(EAX), regs_.R(inst.src1));451XOR(32, R(EDX), R(EDX));452DIV(32, regs_.R(inst.src2));453#if PPSSPP_ARCH(AMD64)454// EDX == RX(IRREG_LO). Put the remainder in the upper bits, done.455SHL(64, R(EDX), Imm8(32));456OR(64, R(EDX), R(EAX));457#else // PPSSPP_ARCH(X86)458// EDX is already good (HI), just move EAX into place.459MOV(32, regs_.R(IRREG_LO), R(EAX));460#endif461FixupBranch done = J(false);462463SetJumpTarget(divideByZero);464// First, set LO to 0xFFFF if numerator was <= that value.465MOV(32, regs_.R(IRREG_LO), Imm32(0xFFFF));466XOR(32, R(SCRATCH1), R(SCRATCH1));467CMP(32, regs_.R(IRREG_LO), regs_.R(inst.src1));468// If 0xFFFF was less, CF was set - SBB will subtract 1 from 0, netting -1.469SBB(32, R(SCRATCH1), Imm8(0));470OR(32, regs_.R(IRREG_LO), R(SCRATCH1));471472#if PPSSPP_ARCH(AMD64)473// Move the numerator into the high bits.474MOV(32, R(SCRATCH1), regs_.R(inst.src1));475SHL(64, R(SCRATCH1), Imm8(32));476OR(64, regs_.R(IRREG_LO), R(SCRATCH1));477#else // PPSSPP_ARCH(X86)478MOV(32, regs_.R(IRREG_HI), regs_.R(inst.src1));479#endif480481SetJumpTarget(done);482}483break;484485default:486INVALIDOP;487break;488}489}490491void X64JitBackend::CompIR_HiLo(IRInst inst) {492CONDITIONAL_DISABLE;493494switch (inst.op) {495case IROp::MtLo:496#if PPSSPP_ARCH(AMD64)497regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });498// First, clear the bits we're replacing.499SHR(64, regs_.R(IRREG_LO), Imm8(32));500SHL(64, regs_.R(IRREG_LO), Imm8(32));501// Now clear the high bits and merge.502MOVZX(64, 32, regs_.RX(inst.src1), regs_.R(inst.src1));503OR(64, regs_.R(IRREG_LO), regs_.R(inst.src1));504#else // PPSSPP_ARCH(X86)505regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY } });506MOV(32, regs_.R(IRREG_LO), regs_.R(inst.src1));507#endif508break;509510case IROp::MtHi:511#if PPSSPP_ARCH(AMD64)512regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });513// First, clear the bits we're replacing.514MOVZX(64, 32, regs_.RX(IRREG_LO), regs_.R(IRREG_LO));515// Then move the new bits into place.516MOV(32, R(SCRATCH1), regs_.R(inst.src1));517SHL(64, R(SCRATCH1), Imm8(32));518OR(64, regs_.R(IRREG_LO), R(SCRATCH1));519#else // PPSSPP_ARCH(X86)520regs_.MapWithExtra(inst, { { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });521MOV(32, regs_.R(IRREG_HI), regs_.R(inst.src1));522#endif523break;524525case IROp::MfLo:526#if PPSSPP_ARCH(AMD64)527regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });528MOV(32, regs_.R(inst.dest), regs_.R(IRREG_LO));529#else // PPSSPP_ARCH(X86)530regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::INIT } });531MOV(32, regs_.R(inst.dest), regs_.R(IRREG_LO));532#endif533break;534535case IROp::MfHi:536#if PPSSPP_ARCH(AMD64)537regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::INIT } });538MOV(64, regs_.R(inst.dest), regs_.R(IRREG_LO));539SHR(64, regs_.R(inst.dest), Imm8(32));540#else // PPSSPP_ARCH(X86)541regs_.MapWithExtra(inst, { { 'G', IRREG_HI, 1, MIPSMap::INIT } });542MOV(32, regs_.R(inst.dest), regs_.R(IRREG_HI));543#endif544break;545546default:547INVALIDOP;548break;549}550}551552void X64JitBackend::CompIR_Logic(IRInst inst) {553CONDITIONAL_DISABLE;554555switch (inst.op) {556case IROp::And:557regs_.Map(inst);558if (inst.dest == inst.src1) {559AND(32, regs_.R(inst.dest), regs_.R(inst.src2));560} else if (inst.dest == inst.src2) {561AND(32, regs_.R(inst.dest), regs_.R(inst.src1));562} else {563MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));564AND(32, regs_.R(inst.dest), regs_.R(inst.src2));565}566break;567568case IROp::Or:569regs_.Map(inst);570if (inst.dest == inst.src1) {571OR(32, regs_.R(inst.dest), regs_.R(inst.src2));572} else if (inst.dest == inst.src2) {573OR(32, regs_.R(inst.dest), regs_.R(inst.src1));574} else {575MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));576OR(32, regs_.R(inst.dest), regs_.R(inst.src2));577}578break;579580case IROp::Xor:581regs_.Map(inst);582if (inst.dest == inst.src1) {583XOR(32, regs_.R(inst.dest), regs_.R(inst.src2));584} else if (inst.dest == inst.src2) {585XOR(32, regs_.R(inst.dest), regs_.R(inst.src1));586} else {587MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));588XOR(32, regs_.R(inst.dest), regs_.R(inst.src2));589}590break;591592case IROp::AndConst:593regs_.Map(inst);594if (inst.dest != inst.src1)595MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));596AND(32, regs_.R(inst.dest), SImmAuto((s32)inst.constant));597break;598599case IROp::OrConst:600regs_.Map(inst);601if (inst.dest != inst.src1)602MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));603OR(32, regs_.R(inst.dest), SImmAuto((s32)inst.constant));604break;605606case IROp::XorConst:607regs_.Map(inst);608if (inst.dest != inst.src1)609MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));610XOR(32, regs_.R(inst.dest), SImmAuto((s32)inst.constant));611break;612613case IROp::Not:614regs_.Map(inst);615if (inst.dest != inst.src1)616MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));617NOT(32, regs_.R(inst.dest));618break;619620default:621INVALIDOP;622break;623}624}625626void X64JitBackend::CompIR_Mult(IRInst inst) {627CONDITIONAL_DISABLE;628629switch (inst.op) {630case IROp::Mult:631#if PPSSPP_ARCH(AMD64)632regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });633MOVSX(64, 32, regs_.RX(IRREG_LO), regs_.R(inst.src1));634MOVSX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));635IMUL(64, regs_.RX(IRREG_LO), regs_.R(inst.src2));636#else // PPSSPP_ARCH(X86)637// Force a spill (before spill locks.)638regs_.MapGPR(IRREG_HI, MIPSMap::NOINIT | X64Map::HIGH_DATA);639// We keep it here so it stays locked.640regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT | X64Map::HIGH_DATA } });641MOV(32, R(EAX), regs_.R(inst.src1));642IMUL(32, regs_.R(inst.src2));643MOV(32, regs_.R(IRREG_LO), R(EAX));644// IRREG_HI was mapped to EDX.645#endif646break;647648case IROp::MultU:649#if PPSSPP_ARCH(AMD64)650regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::NOINIT } });651MOVZX(64, 32, regs_.RX(IRREG_LO), regs_.R(inst.src1));652MOVZX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));653IMUL(64, regs_.RX(IRREG_LO), regs_.R(inst.src2));654#else // PPSSPP_ARCH(X86)655// Force a spill (before spill locks.)656regs_.MapGPR(IRREG_HI, MIPSMap::NOINIT | X64Map::HIGH_DATA);657// We keep it here so it stays locked.658regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::NOINIT }, { 'G', IRREG_HI, 1, MIPSMap::NOINIT | X64Map::HIGH_DATA } });659MOV(32, R(EAX), regs_.R(inst.src1));660MUL(32, regs_.R(inst.src2));661MOV(32, regs_.R(IRREG_LO), R(EAX));662// IRREG_HI was mapped to EDX.663#endif664break;665666case IROp::Madd:667#if PPSSPP_ARCH(AMD64)668regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });669MOVSX(64, 32, SCRATCH1, regs_.R(inst.src1));670MOVSX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));671IMUL(64, SCRATCH1, regs_.R(inst.src2));672ADD(64, regs_.R(IRREG_LO), R(SCRATCH1));673#else // PPSSPP_ARCH(X86)674// For ones that modify LO/HI, we can't have anything else in EDX.675regs_.ReserveAndLockXGPR(EDX);676regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });677MOV(32, R(EAX), regs_.R(inst.src1));678IMUL(32, regs_.R(inst.src2));679ADD(32, regs_.R(IRREG_LO), R(EAX));680ADC(32, regs_.R(IRREG_HI), R(EDX));681#endif682break;683684case IROp::MaddU:685#if PPSSPP_ARCH(AMD64)686regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });687MOVZX(64, 32, SCRATCH1, regs_.R(inst.src1));688MOVZX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));689IMUL(64, SCRATCH1, regs_.R(inst.src2));690ADD(64, regs_.R(IRREG_LO), R(SCRATCH1));691#else // PPSSPP_ARCH(X86)692// For ones that modify LO/HI, we can't have anything else in EDX.693regs_.ReserveAndLockXGPR(EDX);694regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });695MOV(32, R(EAX), regs_.R(inst.src1));696MUL(32, regs_.R(inst.src2));697ADD(32, regs_.R(IRREG_LO), R(EAX));698ADC(32, regs_.R(IRREG_HI), R(EDX));699#endif700break;701702case IROp::Msub:703#if PPSSPP_ARCH(AMD64)704regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });705MOVSX(64, 32, SCRATCH1, regs_.R(inst.src1));706MOVSX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));707IMUL(64, SCRATCH1, regs_.R(inst.src2));708SUB(64, regs_.R(IRREG_LO), R(SCRATCH1));709#else // PPSSPP_ARCH(X86)710// For ones that modify LO/HI, we can't have anything else in EDX.711regs_.ReserveAndLockXGPR(EDX);712regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });713MOV(32, R(EAX), regs_.R(inst.src1));714IMUL(32, regs_.R(inst.src2));715SUB(32, regs_.R(IRREG_LO), R(EAX));716SBB(32, regs_.R(IRREG_HI), R(EDX));717#endif718break;719720case IROp::MsubU:721#if PPSSPP_ARCH(AMD64)722regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 2, MIPSMap::DIRTY } });723MOVZX(64, 32, SCRATCH1, regs_.R(inst.src1));724MOVZX(64, 32, regs_.RX(inst.src2), regs_.R(inst.src2));725IMUL(64, SCRATCH1, regs_.R(inst.src2));726SUB(64, regs_.R(IRREG_LO), R(SCRATCH1));727#else // PPSSPP_ARCH(X86)728// For ones that modify LO/HI, we can't have anything else in EDX.729regs_.ReserveAndLockXGPR(EDX);730regs_.MapWithExtra(inst, { { 'G', IRREG_LO, 1, MIPSMap::DIRTY }, { 'G', IRREG_HI, 1, MIPSMap::DIRTY } });731MOV(32, R(EAX), regs_.R(inst.src1));732MUL(32, regs_.R(inst.src2));733SUB(32, regs_.R(IRREG_LO), R(EAX));734SBB(32, regs_.R(IRREG_HI), R(EDX));735#endif736break;737738default:739INVALIDOP;740break;741}742}743744void X64JitBackend::CompIR_Shift(IRInst inst) {745CONDITIONAL_DISABLE;746747switch (inst.op) {748case IROp::Shl:749if (cpu_info.bBMI2) {750regs_.Map(inst);751SHLX(32, regs_.RX(inst.dest), regs_.R(inst.src1), regs_.RX(inst.src2));752} else {753regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);754if (inst.dest == inst.src1) {755SHL(32, regs_.R(inst.dest), regs_.R(inst.src2));756} else if (inst.dest == inst.src2) {757MOV(32, R(SCRATCH1), regs_.R(inst.src1));758SHL(32, R(SCRATCH1), regs_.R(inst.src2));759MOV(32, regs_.R(inst.dest), R(SCRATCH1));760} else {761MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));762SHL(32, regs_.R(inst.dest), regs_.R(inst.src2));763}764}765break;766767case IROp::Shr:768if (cpu_info.bBMI2) {769regs_.Map(inst);770SHRX(32, regs_.RX(inst.dest), regs_.R(inst.src1), regs_.RX(inst.src2));771} else {772regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);773if (inst.dest == inst.src1) {774SHR(32, regs_.R(inst.dest), regs_.R(inst.src2));775} else if (inst.dest == inst.src2) {776MOV(32, R(SCRATCH1), regs_.R(inst.src1));777SHR(32, R(SCRATCH1), regs_.R(inst.src2));778MOV(32, regs_.R(inst.dest), R(SCRATCH1));779} else {780MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));781SHR(32, regs_.R(inst.dest), regs_.R(inst.src2));782}783}784break;785786case IROp::Sar:787if (cpu_info.bBMI2) {788regs_.Map(inst);789SARX(32, regs_.RX(inst.dest), regs_.R(inst.src1), regs_.RX(inst.src2));790} else {791regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);792if (inst.dest == inst.src1) {793SAR(32, regs_.R(inst.dest), regs_.R(inst.src2));794} else if (inst.dest == inst.src2) {795MOV(32, R(SCRATCH1), regs_.R(inst.src1));796SAR(32, R(SCRATCH1), regs_.R(inst.src2));797MOV(32, regs_.R(inst.dest), R(SCRATCH1));798} else {799MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));800SAR(32, regs_.R(inst.dest), regs_.R(inst.src2));801}802}803break;804805case IROp::Ror:806regs_.MapWithFlags(inst, X64Map::NONE, X64Map::NONE, X64Map::SHIFT);807if (inst.dest == inst.src1) {808ROR(32, regs_.R(inst.dest), regs_.R(inst.src2));809} else if (inst.dest == inst.src2) {810MOV(32, R(SCRATCH1), regs_.R(inst.src1));811ROR(32, R(SCRATCH1), regs_.R(inst.src2));812MOV(32, regs_.R(inst.dest), R(SCRATCH1));813} else {814MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));815ROR(32, regs_.R(inst.dest), regs_.R(inst.src2));816}817break;818819case IROp::ShlImm:820// Shouldn't happen, but let's be safe of any passes that modify the ops.821if (inst.src2 >= 32) {822regs_.SetGPRImm(inst.dest, 0);823} else if (inst.src2 == 0) {824if (inst.dest != inst.src1) {825regs_.Map(inst);826MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));827}828} else if (inst.src2 <= 3) {829regs_.Map(inst);830LEA(32, regs_.RX(inst.dest), MScaled(regs_.RX(inst.src1), 1 << inst.src2, 0));831} else {832regs_.Map(inst);833if (inst.dest != inst.src1)834MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));835SHL(32, regs_.R(inst.dest), Imm8(inst.src2));836}837break;838839case IROp::ShrImm:840// Shouldn't happen, but let's be safe of any passes that modify the ops.841if (inst.src2 >= 32) {842regs_.SetGPRImm(inst.dest, 0);843} else if (inst.src2 == 0) {844if (inst.dest != inst.src1) {845regs_.Map(inst);846MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));847}848} else {849regs_.Map(inst);850if (inst.dest != inst.src1)851MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));852SHR(32, regs_.R(inst.dest), Imm8(inst.src2));853}854break;855856case IROp::SarImm:857// Shouldn't happen, but let's be safe of any passes that modify the ops.858if (inst.src2 >= 32) {859regs_.Map(inst);860if (inst.dest != inst.src1)861MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));862SAR(32, regs_.R(inst.dest), Imm8(31));863} else if (inst.src2 == 0) {864if (inst.dest != inst.src1) {865regs_.Map(inst);866MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));867}868} else {869regs_.Map(inst);870if (inst.dest != inst.src1)871MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));872SAR(32, regs_.R(inst.dest), Imm8(inst.src2));873}874break;875876case IROp::RorImm:877if (inst.src2 == 0) {878if (inst.dest != inst.src1) {879regs_.Map(inst);880MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));881}882} else if (cpu_info.bBMI2) {883regs_.Map(inst);884RORX(32, regs_.RX(inst.dest), regs_.R(inst.src1), inst.src2 & 31);885} else {886regs_.Map(inst);887if (inst.dest != inst.src1)888MOV(32, regs_.R(inst.dest), regs_.R(inst.src1));889ROR(32, regs_.R(inst.dest), Imm8(inst.src2 & 31));890}891break;892893default:894INVALIDOP;895break;896}897}898899} // namespace MIPSComp900901#endif902903904