CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/x86/CompALU.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)1920#include <algorithm>2122#include "Common/BitSet.h"23#include "Common/CommonTypes.h"24#include "Common/CPUDetect.h"25#include "Core/MIPS/MIPSAnalyst.h"26#include "Core/MIPS/MIPSCodeUtils.h"27#include "Core/MIPS/x86/Jit.h"28#include "Core/MIPS/x86/RegCache.h"2930using namespace MIPSAnalyst;3132#define _RS MIPS_GET_RS(op)33#define _RT MIPS_GET_RT(op)34#define _RD MIPS_GET_RD(op)35#define _FS MIPS_GET_FS(op)36#define _FT MIPS_GET_FT(op)37#define _FD MIPS_GET_FD(op)38#define _SA MIPS_GET_SA(op)39#define _POS ((op>> 6) & 0x1F)40#define _SIZE ((op>>11) & 0x1F)41#define _IMM16 (signed short)(op & 0xFFFF)42#define _IMM26 (op & 0x03FFFFFF)4344// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.45// Currently known non working ones should have DISABLE.4647// #define CONDITIONAL_DISABLE(ignore) { Comp_Generic(op); return; }48#define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }49#define DISABLE { Comp_Generic(op); return; }5051namespace MIPSComp52{53using namespace Gen;54using namespace X64JitConstants;5556static bool HasLowSubregister(OpArg arg) {57#if !PPSSPP_ARCH(AMD64)58// Can't use ESI or EDI (which we use), no 8-bit versions. Only these.59if (!arg.IsSimpleReg(EAX) && !arg.IsSimpleReg(EBX) && !arg.IsSimpleReg(ECX) && !arg.IsSimpleReg(EDX)) {60return false;61}62#endif63return arg.IsSimpleReg();64}6566void Jit::CompImmLogic(MIPSOpcode op, void (XEmitter::*arith)(int, const OpArg &, const OpArg &))67{68u32 uimm = (u16)(op & 0xFFFF);69MIPSGPReg rt = _RT;70MIPSGPReg rs = _RS;71gpr.Lock(rt, rs);72gpr.MapReg(rt, rt == rs, true);73if (rt != rs)74MOV(32, gpr.R(rt), gpr.R(rs));75(this->*arith)(32, gpr.R(rt), Imm32(uimm));76gpr.UnlockAll();77}7879void Jit::Comp_IType(MIPSOpcode op)80{81CONDITIONAL_DISABLE(ALU_IMM);82u32 uimm = op & 0xFFFF;83s32 simm = SignExtend16ToS32(op);84u32 suimm = SignExtend16ToU32(op);8586MIPSGPReg rt = _RT;87MIPSGPReg rs = _RS;8889// noop, won't write to ZERO.90if (rt == MIPS_REG_ZERO)91return;9293switch (op >> 26)94{95case 8: // same as addiu?96case 9: // R(rt) = R(rs) + simm; break; //addiu97{98if (gpr.IsImm(rs)) {99gpr.SetImm(rt, gpr.GetImm(rs) + simm);100break;101}102103gpr.Lock(rt, rs);104gpr.MapReg(rt, rt == rs, true);105if (rt == rs) {106if (simm > 0) {107ADD(32, gpr.R(rt), UImmAuto(simm));108} else if (simm < 0) {109SUB(32, gpr.R(rt), UImmAuto(-simm));110}111} else if (gpr.R(rs).IsSimpleReg()) {112LEA(32, gpr.RX(rt), MDisp(gpr.RX(rs), simm));113} else {114MOV(32, gpr.R(rt), gpr.R(rs));115if (simm > 0)116ADD(32, gpr.R(rt), UImmAuto(simm));117else if (simm < 0) {118SUB(32, gpr.R(rt), UImmAuto(-simm));119}120}121gpr.UnlockAll();122}123break;124125case 10: // R(rt) = (s32)R(rs) < simm; break; //slti126if (gpr.IsImm(rs)) {127gpr.SetImm(rt, (s32)gpr.GetImm(rs) < simm);128} else {129gpr.Lock(rt, rs);130// This is often used before a branch. If rs is not already mapped, let's leave it.131gpr.MapReg(rt, rt == rs, true);132133bool needsTemp = !HasLowSubregister(gpr.R(rt)) || rt == rs;134if (needsTemp) {135CMP(32, gpr.R(rs), Imm32(suimm));136SETcc(CC_L, R(TEMPREG));137MOVZX(32, 8, gpr.RX(rt), R(TEMPREG));138} else {139XOR(32, gpr.R(rt), gpr.R(rt));140CMP(32, gpr.R(rs), Imm32(suimm));141SETcc(CC_L, gpr.R(rt));142}143gpr.UnlockAll();144}145break;146147case 11: // R(rt) = R(rs) < uimm; break; //sltiu148if (gpr.IsImm(rs)) {149gpr.SetImm(rt, gpr.GetImm(rs) < suimm);150} else {151gpr.Lock(rt, rs);152// This is often used before a branch. If rs is not already mapped, let's leave it.153gpr.MapReg(rt, rt == rs, true);154155bool needsTemp = !HasLowSubregister(gpr.R(rt)) || rt == rs;156if (needsTemp) {157CMP(32, gpr.R(rs), Imm32(suimm));158SETcc(CC_B, R(TEMPREG));159MOVZX(32, 8, gpr.RX(rt), R(TEMPREG));160} else {161XOR(32, gpr.R(rt), gpr.R(rt));162CMP(32, gpr.R(rs), Imm32(suimm));163SETcc(CC_B, gpr.R(rt));164}165gpr.UnlockAll();166}167break;168169case 12: // R(rt) = R(rs) & uimm; break; //andi170if (uimm == 0)171gpr.SetImm(rt, 0);172else if (gpr.IsImm(rs))173gpr.SetImm(rt, gpr.GetImm(rs) & uimm);174else175CompImmLogic(op, &XEmitter::AND);176break;177178case 13: // R(rt) = R(rs) | uimm; break; //ori179if (gpr.IsImm(rs))180gpr.SetImm(rt, gpr.GetImm(rs) | uimm);181else182CompImmLogic(op, &XEmitter::OR);183break;184185case 14: // R(rt) = R(rs) ^ uimm; break; //xori186if (gpr.IsImm(rs))187gpr.SetImm(rt, gpr.GetImm(rs) ^ uimm);188else189CompImmLogic(op, &XEmitter::XOR);190break;191192case 15: //R(rt) = uimm << 16; break; //lui193gpr.SetImm(rt, uimm << 16);194break;195196default:197Comp_Generic(op);198break;199}200}201202void Jit::Comp_RType2(MIPSOpcode op)203{204CONDITIONAL_DISABLE(ALU_BIT);205MIPSGPReg rs = _RS;206MIPSGPReg rd = _RD;207208// Don't change $zr.209if (rd == MIPS_REG_ZERO)210return;211212switch (op & 63)213{214case 22: //clz215if (gpr.IsImm(rs))216{217u32 value = gpr.GetImm(rs);218int x = 31;219int count = 0;220while (x >= 0 && !(value & (1 << x)))221{222count++;223x--;224}225gpr.SetImm(rd, count);226}227else228{229gpr.Lock(rd, rs);230gpr.MapReg(rd, rd == rs, true);231BSR(32, TEMPREG, gpr.R(rs));232FixupBranch notFound = J_CC(CC_Z);233234MOV(32, gpr.R(rd), Imm32(31));235SUB(32, gpr.R(rd), R(TEMPREG));236FixupBranch skip = J();237238SetJumpTarget(notFound);239MOV(32, gpr.R(rd), Imm32(32));240241SetJumpTarget(skip);242gpr.UnlockAll();243}244break;245case 23: //clo246if (gpr.IsImm(rs))247{248u32 value = gpr.GetImm(rs);249int x = 31;250int count = 0;251while (x >= 0 && (value & (1 << x)))252{253count++;254x--;255}256gpr.SetImm(rd, count);257}258else259{260gpr.Lock(rd, rs);261gpr.MapReg(rd, rd == rs, true);262MOV(32, R(TEMPREG), gpr.R(rs));263NOT(32, R(TEMPREG));264BSR(32, TEMPREG, R(TEMPREG));265FixupBranch notFound = J_CC(CC_Z);266267MOV(32, gpr.R(rd), Imm32(31));268SUB(32, gpr.R(rd), R(TEMPREG));269FixupBranch skip = J();270271SetJumpTarget(notFound);272MOV(32, gpr.R(rd), Imm32(32));273274SetJumpTarget(skip);275gpr.UnlockAll();276}277break;278default:279DISABLE;280}281}282283static u32 RType3_ImmAdd(const u32 a, const u32 b)284{285return a + b;286}287288static u32 RType3_ImmSub(const u32 a, const u32 b)289{290return a - b;291}292293static u32 RType3_ImmAnd(const u32 a, const u32 b)294{295return a & b;296}297298static u32 RType3_ImmOr(const u32 a, const u32 b)299{300return a | b;301}302303static u32 RType3_ImmXor(const u32 a, const u32 b)304{305return a ^ b;306}307308//rd = rs X rt309void Jit::CompTriArith(MIPSOpcode op, void (XEmitter::*arith)(int, const OpArg &, const OpArg &), u32 (*doImm)(const u32, const u32), bool invertResult)310{311MIPSGPReg rt = _RT;312MIPSGPReg rs = _RS;313MIPSGPReg rd = _RD;314315// Both sides known, we can just evaporate the instruction.316if (doImm && gpr.IsImm(rs) && gpr.IsImm(rt)) {317u32 value = doImm(gpr.GetImm(rs), gpr.GetImm(rt));318gpr.SetImm(rd, invertResult ? (~value) : value);319return;320}321322// Act like zero was used if the operand is equivalent. This happens.323if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0)324rs = MIPS_REG_ZERO;325if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0)326rt = MIPS_REG_ZERO;327328// Special cases that translate nicely329if (doImm == &RType3_ImmSub && rs == MIPS_REG_ZERO && rt == rd) {330gpr.MapReg(rd, true, true);331NEG(32, gpr.R(rd));332if (invertResult) {333NOT(32, gpr.R(rd));334}335return;336}337338gpr.Lock(rt, rs, rd);339// Optimize out operations against 0... and is the only one that isn't a MOV.340if (rt == MIPS_REG_ZERO || (rs == MIPS_REG_ZERO && doImm != &RType3_ImmSub)) {341if (doImm == &RType3_ImmAnd) {342gpr.SetImm(rd, invertResult ? 0xFFFFFFFF : 0);343} else {344MIPSGPReg rsource = (rt == MIPS_REG_ZERO) ? rs : rt;345if (rsource != rd) {346gpr.MapReg(rd, false, true);347MOV(32, gpr.R(rd), gpr.R(rsource));348if (invertResult) {349NOT(32, gpr.R(rd));350}351} else if (invertResult) {352// rsource == rd, but still need to invert.353gpr.MapReg(rd, true, true);354NOT(32, gpr.R(rd));355}356}357} else if (gpr.IsImm(rt)) {358// No temporary needed.359u32 rtval = gpr.GetImm(rt);360gpr.MapReg(rd, rs == rd, true);361if (rs != rd) {362MOV(32, gpr.R(rd), gpr.R(rs));363}364(this->*arith)(32, gpr.R(rd), Imm32(rtval));365if (invertResult) {366NOT(32, gpr.R(rd));367}368} else {369// Use TEMPREG as a temporary if we'd overwrite it.370if (rd == rt)371MOV(32, R(TEMPREG), gpr.R(rt));372gpr.MapReg(rd, rs == rd, true);373if (rs != rd)374MOV(32, gpr.R(rd), gpr.R(rs));375(this->*arith)(32, gpr.R(rd), rd == rt ? R(TEMPREG) : gpr.R(rt));376if (invertResult) {377NOT(32, gpr.R(rd));378}379}380gpr.UnlockAll();381}382383void Jit::Comp_RType3(MIPSOpcode op)384{385CONDITIONAL_DISABLE(ALU);386387MIPSGPReg rt = _RT;388MIPSGPReg rs = _RS;389MIPSGPReg rd = _RD;390391// noop, won't write to ZERO.392if (rd == MIPS_REG_ZERO)393return;394395switch (op & 63)396{397case 10: //if (R(rt) == 0) R(rd) = R(rs); break; //movz398if (rd == rs)399break;400gpr.Lock(rt, rs, rd);401if (!gpr.IsImm(rt))402{403gpr.KillImmediate(rs, true, false);404// Need to load rd in case the condition fails.405gpr.MapReg(rd, true, true);406CMP(32, gpr.R(rt), Imm32(0));407CMOVcc(32, gpr.RX(rd), gpr.R(rs), CC_E);408}409else if (gpr.GetImm(rt) == 0)410{411if (gpr.IsImm(rs))412gpr.SetImm(rd, gpr.GetImm(rs));413else if (rd != rs)414{415gpr.MapReg(rd, false, true);416MOV(32, gpr.R(rd), gpr.R(rs));417}418}419gpr.UnlockAll();420break;421422case 11: //if (R(rt) != 0) R(rd) = R(rs); break; //movn423if (rd == rs)424break;425gpr.Lock(rt, rs, rd);426if (!gpr.IsImm(rt))427{428gpr.KillImmediate(rs, true, false);429// Need to load rd in case the condition fails.430gpr.MapReg(rd, true, true);431CMP(32, gpr.R(rt), Imm32(0));432CMOVcc(32, gpr.RX(rd), gpr.R(rs), CC_NE);433}434else if (gpr.GetImm(rt) != 0)435{436if (gpr.IsImm(rs))437gpr.SetImm(rd, gpr.GetImm(rs));438else if (rd != rs)439{440gpr.MapReg(rd, false, true);441MOV(32, gpr.R(rd), gpr.R(rs));442}443}444gpr.UnlockAll();445break;446447case 32: //R(rd) = R(rs) + R(rt); break; //add448case 33: //R(rd) = R(rs) + R(rt); break; //addu449if (rd != rs && rd != rt && gpr.R(rs).IsSimpleReg() && gpr.R(rt).IsSimpleReg()) {450gpr.Lock(rt, rs, rd);451gpr.MapReg(rd, false, true);452LEA(32, gpr.RX(rd), MRegSum(gpr.RX(rs), gpr.RX(rt)));453gpr.UnlockAll();454} else {455CompTriArith(op, &XEmitter::ADD, &RType3_ImmAdd);456}457break;458case 34: //R(rd) = R(rs) - R(rt); break; //sub459case 35: //R(rd) = R(rs) - R(rt); break; //subu460CompTriArith(op, &XEmitter::SUB, &RType3_ImmSub);461break;462case 36: //R(rd) = R(rs) & R(rt); break; //and463CompTriArith(op, &XEmitter::AND, &RType3_ImmAnd);464break;465case 37: //R(rd) = R(rs) | R(rt); break; //or466CompTriArith(op, &XEmitter::OR, &RType3_ImmOr);467break;468case 38: //R(rd) = R(rs) ^ R(rt); break; //xor469CompTriArith(op, &XEmitter::XOR, &RType3_ImmXor);470break;471472case 39: // R(rd) = ~(R(rs) | R(rt)); //nor473CompTriArith(op, &XEmitter::OR, &RType3_ImmOr, true);474break;475476case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt477if (gpr.IsImm(rs) && gpr.IsImm(rt)) {478gpr.SetImm(rd, (s32)gpr.GetImm(rs) < (s32)gpr.GetImm(rt));479} else if (rs == rt) {480gpr.SetImm(rd, 0);481} else {482gpr.Lock(rd, rs, rt);483gpr.MapReg(rd, rd == rt || rd == rs, true);484485// Let's try to avoid loading rs or if it's an imm, flushing it.486MIPSGPReg lhs = rs;487MIPSGPReg rhs = rt;488CCFlags cc = CC_L;489if (gpr.IsImm(lhs)) {490// rhs is guaranteed not to be an imm (handled above.)491std::swap(lhs, rhs);492cc = SwapCCFlag(cc);493} else if (!gpr.R(lhs).CanDoOpWith(gpr.R(rhs))) {494// Let's try to pick which makes more sense to load.495if (MIPSAnalyst::IsRegisterUsed(rhs, GetCompilerPC() + 4, 3)) {496std::swap(lhs, rhs);497cc = SwapCCFlag(cc);498}499gpr.MapReg(lhs, true, false);500}501502bool needsTemp = !HasLowSubregister(gpr.R(rd)) || rd == rt || rd == rs;503if (needsTemp) {504CMP(32, gpr.R(lhs), gpr.R(rhs));505SETcc(cc, R(TEMPREG));506MOVZX(32, 8, gpr.RX(rd), R(TEMPREG));507} else {508XOR(32, gpr.R(rd), gpr.R(rd));509CMP(32, gpr.R(lhs), gpr.R(rhs));510SETcc(cc, gpr.R(rd));511}512gpr.UnlockAll();513}514break;515516case 43: //R(rd) = R(rs) < R(rt); break; //sltu517if (gpr.IsImm(rs) && gpr.IsImm(rt)) {518gpr.SetImm(rd, gpr.GetImm(rs) < gpr.GetImm(rt));519} else if (rs == rt) {520gpr.SetImm(rd, 0);521} else {522gpr.Lock(rd, rs, rt);523gpr.MapReg(rd, rd == rt || rd == rs, true);524525// Let's try to avoid loading rs or if it's an imm, flushing it.526MIPSGPReg lhs = rs;527MIPSGPReg rhs = rt;528CCFlags cc = CC_B;529if (gpr.IsImm(lhs)) {530// rhs is guaranteed not to be an imm (handled above.)531std::swap(lhs, rhs);532cc = SwapCCFlag(cc);533} else if (!gpr.R(lhs).CanDoOpWith(gpr.R(rhs))) {534// Let's try to pick which makes more sense to load.535if (MIPSAnalyst::IsRegisterUsed(rhs, GetCompilerPC() + 4, 3)) {536std::swap(lhs, rhs);537cc = SwapCCFlag(cc);538}539gpr.MapReg(lhs, true, false);540}541542bool needsTemp = !HasLowSubregister(gpr.R(rd)) || rd == rt || rd == rs;543if (needsTemp) {544CMP(32, gpr.R(lhs), gpr.R(rhs));545SETcc(cc, R(TEMPREG));546MOVZX(32, 8, gpr.RX(rd), R(TEMPREG));547} else {548XOR(32, gpr.R(rd), gpr.R(rd));549CMP(32, gpr.R(lhs), gpr.R(rhs));550SETcc(cc, gpr.R(rd));551}552gpr.UnlockAll();553}554break;555556case 44: //R(rd) = (R(rs) > R(rt)) ? R(rs) : R(rt); break; //max557if (gpr.IsImm(rs) && gpr.IsImm(rt))558gpr.SetImm(rd, std::max((s32)gpr.GetImm(rs), (s32)gpr.GetImm(rt)));559else560{561MIPSGPReg rsrc = rd == rt ? rs : rt;562gpr.Lock(rd, rs, rt);563gpr.KillImmediate(rsrc, true, false);564gpr.MapReg(rd, rd == rs || rd == rt, true);565if (rd != rt && rd != rs)566MOV(32, gpr.R(rd), gpr.R(rs));567CMP(32, gpr.R(rd), gpr.R(rsrc));568CMOVcc(32, gpr.RX(rd), gpr.R(rsrc), CC_L);569gpr.UnlockAll();570}571break;572573case 45: //R(rd) = (R(rs) < R(rt)) ? R(rs) : R(rt); break; //min574if (gpr.IsImm(rs) && gpr.IsImm(rt))575gpr.SetImm(rd, std::min((s32)gpr.GetImm(rs), (s32)gpr.GetImm(rt)));576else577{578MIPSGPReg rsrc = rd == rt ? rs : rt;579gpr.Lock(rd, rs, rt);580gpr.KillImmediate(rsrc, true, false);581gpr.MapReg(rd, rd == rs || rd == rt, true);582if (rd != rt && rd != rs)583MOV(32, gpr.R(rd), gpr.R(rs));584CMP(32, gpr.R(rd), gpr.R(rsrc));585CMOVcc(32, gpr.RX(rd), gpr.R(rsrc), CC_G);586gpr.UnlockAll();587}588break;589590default:591Comp_Generic(op);592break;593}594}595596static u32 ShiftType_ImmLogicalLeft(const u32 a, const u32 b)597{598return a << (b & 0x1f);599}600601static u32 ShiftType_ImmLogicalRight(const u32 a, const u32 b)602{603return a >> (b & 0x1f);604}605606static u32 ShiftType_ImmArithRight(const u32 a, const u32 b)607{608return ((s32) a) >> (b & 0x1f);609}610611static u32 ShiftType_ImmRotateRight(const u32 a, const u32 b)612{613const s8 sa = b & 0x1f;614return (a >> sa) | (a << (32 - sa));615}616617void Jit::CompShiftImm(MIPSOpcode op, void (XEmitter::*shift)(int, OpArg, OpArg), u32 (*doImm)(const u32, const u32))618{619MIPSGPReg rd = _RD;620MIPSGPReg rt = _RT;621int sa = _SA;622623if (doImm && gpr.IsImm(rt))624{625gpr.SetImm(rd, doImm(gpr.GetImm(rt), sa));626return;627}628629gpr.Lock(rd, rt);630gpr.MapReg(rd, rd == rt, true);631if (rd != rt)632MOV(32, gpr.R(rd), gpr.R(rt));633(this->*shift)(32, gpr.R(rd), Imm8(sa));634gpr.UnlockAll();635}636637// "over-shifts" work the same as on x86 - only bottom 5 bits are used to get the shift value638void Jit::CompShiftVar(MIPSOpcode op, void (XEmitter::*shift)(int, OpArg, OpArg), u32 (*doImm)(const u32, const u32))639{640MIPSGPReg rd = _RD;641MIPSGPReg rt = _RT;642MIPSGPReg rs = _RS;643644if (doImm && gpr.IsImm(rs) && gpr.IsImm(rt))645{646gpr.SetImm(rd, doImm(gpr.GetImm(rt), gpr.GetImm(rs)));647return;648}649650gpr.Lock(rd, rt, rs);651if (gpr.IsImm(rs)) {652int sa = gpr.GetImm(rs);653gpr.MapReg(rd, rd == rt, true);654if (cpu_info.bBMI2 && shift == &XEmitter::ROR) {655_assert_(!gpr.IsImm(rt));656RORX(32, gpr.RX(rd), gpr.R(rt), sa & 0x1F);657} else {658if (rd != rt)659MOV(32, gpr.R(rd), gpr.R(rt));660(this->*shift)(32, gpr.R(rd), Imm8(sa & 0x1F));661}662} else if (cpu_info.bBMI2 && shift != &XEmitter::ROR) {663gpr.MapReg(rd, rd == rt || rd == rs, true);664gpr.MapReg(rs, true, false);665MIPSGPReg src = rt;666if (gpr.IsImm(rt) && rd == rs) {667gpr.MapReg(rt, true, false);668} else if (gpr.IsImm(rt)) {669MOV(32, gpr.R(rd), gpr.R(rt));670src = rd;671}672if (shift == &XEmitter::SHL)673SHLX(32, gpr.RX(rd), gpr.R(src), gpr.RX(rs));674else if (shift == &XEmitter::SHR)675SHRX(32, gpr.RX(rd), gpr.R(src), gpr.RX(rs));676else if (shift == &XEmitter::SAR)677SARX(32, gpr.RX(rd), gpr.R(src), gpr.RX(rs));678else679_assert_msg_(false, "Unexpected shift type");680} else {681gpr.FlushLockX(ECX);682gpr.MapReg(rd, rd == rt || rd == rs, true);683MOV(32, R(ECX), gpr.R(rs)); // Only ECX can be used for variable shifts.684AND(32, R(ECX), Imm32(0x1f));685if (rd != rt)686MOV(32, gpr.R(rd), gpr.R(rt));687(this->*shift)(32, gpr.R(rd), R(ECX));688gpr.UnlockAllX();689}690gpr.UnlockAll();691}692693void Jit::Comp_ShiftType(MIPSOpcode op)694{695CONDITIONAL_DISABLE(ALU);696int rs = (op>>21) & 0x1F;697MIPSGPReg rd = _RD;698int fd = (op>>6) & 0x1F;699700// noop, won't write to ZERO.701if (rd == MIPS_REG_ZERO)702return;703704// WARNING : ROTR705switch (op & 0x3f)706{707case 0: CompShiftImm(op, &XEmitter::SHL, &ShiftType_ImmLogicalLeft); break;708case 2: CompShiftImm(op, rs == 1 ? &XEmitter::ROR : &XEmitter::SHR, rs == 1 ? &ShiftType_ImmRotateRight : &ShiftType_ImmLogicalRight); break; // srl, rotr709case 3: CompShiftImm(op, &XEmitter::SAR, &ShiftType_ImmArithRight); break; // sra710711case 4: CompShiftVar(op, &XEmitter::SHL, &ShiftType_ImmLogicalLeft); break; //sllv712case 6: CompShiftVar(op, fd == 1 ? &XEmitter::ROR : &XEmitter::SHR, fd == 1 ? &ShiftType_ImmRotateRight : &ShiftType_ImmLogicalRight); break; //srlv713case 7: CompShiftVar(op, &XEmitter::SAR, &ShiftType_ImmArithRight); break; //srav714715default:716Comp_Generic(op);717break;718}719}720721void Jit::Comp_Special3(MIPSOpcode op)722{723CONDITIONAL_DISABLE(ALU_BIT);724MIPSGPReg rs = _RS;725MIPSGPReg rt = _RT;726727int pos = _POS;728int size = _SIZE + 1;729u32 mask = 0xFFFFFFFFUL >> (32 - size);730731// Don't change $zr.732if (rt == MIPS_REG_ZERO)733return;734735switch (op & 0x3f)736{737case 0x0: //ext738if (gpr.IsImm(rs))739{740gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask);741return;742}743744gpr.Lock(rs, rt);745gpr.MapReg(rt, rs == rt, true);746if (rs != rt)747MOV(32, gpr.R(rt), gpr.R(rs));748if (pos != 0) {749SHR(32, gpr.R(rt), Imm8(pos));750}751// Might not need to AND if we used a wall anyway.752if ((0xFFFFFFFF >> pos) != mask) {753AND(32, gpr.R(rt), Imm32(mask));754}755gpr.UnlockAll();756break;757758case 0x4: //ins759{760u32 sourcemask = mask >> pos;761u32 destmask = ~(sourcemask << pos);762if (gpr.IsImm(rs))763{764u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos;765if (gpr.IsImm(rt))766{767gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted);768return;769}770771gpr.Lock(rs, rt);772gpr.MapReg(rt, true, true);773AND(32, gpr.R(rt), Imm32(destmask));774if (inserted != 0)775OR(32, gpr.R(rt), Imm32(inserted));776gpr.UnlockAll();777}778else if (gpr.IsImm(rt))779{780// This happens. We can skip the AND and a load.781gpr.Lock(rs, rt);782u32 rtImm = gpr.GetImm(rt) & destmask;783gpr.MapReg(rt, false, true);784MOV(32, gpr.R(rt), gpr.R(rs));785AND(32, gpr.R(rt), Imm32(sourcemask));786if (pos != 0) {787SHL(32, gpr.R(rt), Imm8(pos));788}789OR(32, gpr.R(rt), Imm32(rtImm));790gpr.UnlockAll();791}792else793{794gpr.Lock(rs, rt);795gpr.MapReg(rt, true, true);796MOV(32, R(TEMPREG), gpr.R(rs));797AND(32, R(TEMPREG), Imm32(sourcemask));798if (pos != 0) {799SHL(32, R(TEMPREG), Imm8(pos));800}801AND(32, gpr.R(rt), Imm32(destmask));802OR(32, gpr.R(rt), R(TEMPREG));803gpr.UnlockAll();804}805}806break;807}808}809810811void Jit::Comp_Allegrex(MIPSOpcode op)812{813CONDITIONAL_DISABLE(ALU_BIT);814MIPSGPReg rt = _RT;815MIPSGPReg rd = _RD;816// Don't change $zr.817if (rd == MIPS_REG_ZERO)818return;819820switch ((op >> 6) & 31)821{822case 16: // seb // R(rd) = SignExtend8ToU32(R(rt));823if (gpr.IsImm(rt))824{825gpr.SetImm(rd, SignExtend8ToU32(gpr.GetImm(rt)));826break;827}828829gpr.Lock(rd, rt);830gpr.MapReg(rd, rd == rt, true);831// Work around the byte-register addressing problem.832if (gpr.R(rt).IsSimpleReg() && !HasLowSubregister(gpr.R(rt)))833{834MOV(32, R(TEMPREG), gpr.R(rt));835MOVSX(32, 8, gpr.RX(rd), R(TEMPREG));836}837else838{839gpr.KillImmediate(rt, true, false);840MOVSX(32, 8, gpr.RX(rd), gpr.R(rt));841}842gpr.UnlockAll();843break;844845case 20: //bitrev846if (gpr.IsImm(rt))847{848// http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel849u32 v = gpr.GetImm(rt);850// swap odd and even bits851v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);852// swap consecutive pairs853v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);854// swap nibbles ...855v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);856// swap bytes857v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);858// swap 2-byte long pairs859v = ( v >> 16 ) | ( v << 16);860gpr.SetImm(rd, v);861break;862}863864gpr.Lock(rd, rt);865gpr.MapReg(rd, rd == rt, true);866if (rd != rt)867MOV(32, gpr.R(rd), gpr.R(rt));868869LEA(32, TEMPREG, MScaled(gpr.RX(rd), 2, 0));870SHR(32, gpr.R(rd), Imm8(1));871XOR(32, gpr.R(rd), R(TEMPREG));872AND(32, gpr.R(rd), Imm32(0x55555555));873XOR(32, gpr.R(rd), R(TEMPREG));874875LEA(32, TEMPREG, MScaled(gpr.RX(rd), 4, 0));876SHR(32, gpr.R(rd), Imm8(2));877XOR(32, gpr.R(rd), R(TEMPREG));878AND(32, gpr.R(rd), Imm32(0x33333333));879XOR(32, gpr.R(rd), R(TEMPREG));880881MOV(32, R(TEMPREG), gpr.R(rd));882SHL(32, R(TEMPREG), Imm8(4));883SHR(32, gpr.R(rd), Imm8(4));884XOR(32, gpr.R(rd), R(TEMPREG));885AND(32, gpr.R(rd), Imm32(0x0F0F0F0F));886XOR(32, gpr.R(rd), R(TEMPREG));887888MOV(32, R(TEMPREG), gpr.R(rd));889SHL(32, R(TEMPREG), Imm8(8));890SHR(32, gpr.R(rd), Imm8(8));891XOR(32, gpr.R(rd), R(TEMPREG));892AND(32, gpr.R(rd), Imm32(0x00FF00FF));893XOR(32, gpr.R(rd), R(TEMPREG));894895ROL(32, gpr.R(rd), Imm8(16));896897gpr.UnlockAll();898break;899900case 24: // seh // R(rd) = SignExtend16ToU32(R(rt));901if (gpr.IsImm(rt))902{903gpr.SetImm(rd, SignExtend16ToU32(gpr.GetImm(rt)));904break;905}906907gpr.Lock(rd, rt);908gpr.MapReg(rd, rd == rt, true);909MOVSX(32, 16, gpr.RX(rd), gpr.R(rt));910gpr.UnlockAll();911break;912913default:914Comp_Generic(op);915return;916}917}918919void Jit::Comp_Allegrex2(MIPSOpcode op)920{921CONDITIONAL_DISABLE(ALU_BIT);922MIPSGPReg rt = _RT;923MIPSGPReg rd = _RD;924// Don't change $zr.925if (rd == MIPS_REG_ZERO)926return;927928switch (op & 0x3ff)929{930case 0xA0: //wsbh931if (gpr.IsImm(rt)) {932u32 rtImm = gpr.GetImm(rt);933gpr.SetImm(rd, ((rtImm & 0xFF00FF00) >> 8) | ((rtImm & 0x00FF00FF) << 8));934break;935}936gpr.Lock(rd, rt);937gpr.MapReg(rd, rd == rt, true);938if (rd != rt)939MOV(32, gpr.R(rd), gpr.R(rt));940// Swap both 16-bit halfwords by rotating afterward.941BSWAP(32, gpr.RX(rd));942ROR(32, gpr.R(rd), Imm8(16));943gpr.UnlockAll();944break;945case 0xE0: //wsbw946if (gpr.IsImm(rt)) {947gpr.SetImm(rd, swap32(gpr.GetImm(rt)));948break;949}950gpr.Lock(rd, rt);951gpr.MapReg(rd, rd == rt, true);952if (rd != rt)953MOV(32, gpr.R(rd), gpr.R(rt));954BSWAP(32, gpr.RX(rd));955gpr.UnlockAll();956break;957default:958Comp_Generic(op);959break;960}961}962963void Jit::Comp_MulDivType(MIPSOpcode op)964{965CONDITIONAL_DISABLE(MULDIV);966MIPSGPReg rt = _RT;967MIPSGPReg rs = _RS;968MIPSGPReg rd = _RD;969970switch (op & 63)971{972case 16: // R(rd) = HI; //mfhi973if (rd != MIPS_REG_ZERO) {974gpr.MapReg(rd, false, true);975MOV(32, gpr.R(rd), gpr.R(MIPS_REG_HI));976}977break;978979case 17: // HI = R(rs); //mthi980gpr.KillImmediate(MIPS_REG_HI, false, true);981gpr.MapReg(rs, true, false);982MOV(32, gpr.R(MIPS_REG_HI), gpr.R(rs));983break;984985case 18: // R(rd) = LO; break; //mflo986if (rd != MIPS_REG_ZERO) {987gpr.MapReg(rd, false, true);988MOV(32, gpr.R(rd), gpr.R(MIPS_REG_LO));989}990break;991992case 19: // LO = R(rs); break; //mtlo993gpr.KillImmediate(MIPS_REG_LO, false, true);994gpr.MapReg(rs, true, false);995MOV(32, gpr.R(MIPS_REG_LO), gpr.R(rs));996break;997998case 24: //mult (the most popular one). lo,hi = signed mul (rs * rt)999gpr.FlushLockX(EDX);1000gpr.KillImmediate(MIPS_REG_HI, false, true);1001gpr.KillImmediate(MIPS_REG_LO, false, true);1002gpr.KillImmediate(rt, true, false);1003// Mul, this must be EAX!1004MOV(32, R(EAX), gpr.R(rs));1005IMUL(32, gpr.R(rt));1006MOV(32, gpr.R(MIPS_REG_HI), R(EDX));1007MOV(32, gpr.R(MIPS_REG_LO), R(EAX));1008gpr.UnlockAllX();1009break;101010111012case 25: //multu (2nd) lo,hi = unsigned mul (rs * rt)1013gpr.FlushLockX(EDX);1014gpr.KillImmediate(MIPS_REG_HI, false, true);1015gpr.KillImmediate(MIPS_REG_LO, false, true);1016gpr.KillImmediate(rt, true, false);1017MOV(32, R(EAX), gpr.R(rs));1018MUL(32, gpr.R(rt));1019MOV(32, gpr.R(MIPS_REG_HI), R(EDX));1020MOV(32, gpr.R(MIPS_REG_LO), R(EAX));1021gpr.UnlockAllX();1022break;10231024case 26: //div1025{1026gpr.FlushLockX(EDX);1027gpr.KillImmediate(MIPS_REG_HI, false, true);1028gpr.KillImmediate(MIPS_REG_LO, false, true);1029// For CMP.1030gpr.KillImmediate(rs, true, false);1031gpr.KillImmediate(rt, true, false);10321033MOV(32, R(EAX), gpr.R(rs));10341035CMP(32, gpr.R(rt), Imm32(0));1036FixupBranch divZero = J_CC(CC_E);10371038// INT_MAX / -1 would overflow.1039CMP(32, gpr.R(rs), Imm32(0x80000000));1040FixupBranch notOverflow = J_CC(CC_NE);1041CMP(32, gpr.R(rt), Imm32((u32) -1));1042FixupBranch notOverflow2 = J_CC(CC_NE);1043MOV(32, gpr.R(MIPS_REG_LO), Imm32(0x80000000));1044MOV(32, gpr.R(MIPS_REG_HI), Imm32(-1));1045FixupBranch skip2 = J();10461047SetJumpTarget(notOverflow);1048SetJumpTarget(notOverflow2);10491050CDQ();1051IDIV(32, gpr.R(rt));1052MOV(32, gpr.R(MIPS_REG_HI), R(EDX));1053MOV(32, gpr.R(MIPS_REG_LO), R(EAX));1054FixupBranch skip = J();10551056SetJumpTarget(divZero);1057MOV(32, gpr.R(MIPS_REG_HI), R(EAX));1058MOV(32, gpr.R(MIPS_REG_LO), Imm32(-1));1059CMP(32, R(EAX), Imm32(0));1060FixupBranch positiveDivZero = J_CC(CC_GE);1061MOV(32, gpr.R(MIPS_REG_LO), Imm32(1));10621063SetJumpTarget(positiveDivZero);1064SetJumpTarget(skip);1065SetJumpTarget(skip2);1066gpr.UnlockAllX();1067}1068break;10691070case 27: //divu1071{1072gpr.FlushLockX(EDX);1073gpr.KillImmediate(MIPS_REG_HI, false, true);1074gpr.KillImmediate(MIPS_REG_LO, false, true);1075gpr.KillImmediate(rt, true, false);10761077MOV(32, R(EAX), gpr.R(rs));1078MOV(32, R(EDX), Imm32(0));10791080CMP(32, gpr.R(rt), Imm32(0));1081FixupBranch divZero = J_CC(CC_E);10821083DIV(32, gpr.R(rt));1084MOV(32, gpr.R(MIPS_REG_HI), R(EDX));1085MOV(32, gpr.R(MIPS_REG_LO), R(EAX));1086FixupBranch skip = J();10871088SetJumpTarget(divZero);1089MOV(32, gpr.R(MIPS_REG_HI), R(EAX));1090MOV(32, gpr.R(MIPS_REG_LO), Imm32(-1));1091CMP(32, R(EAX), Imm32(0xFFFF));1092FixupBranch moreThan16Bit = J_CC(CC_A);1093MOV(32, gpr.R(MIPS_REG_LO), Imm32(0xFFFF));10941095SetJumpTarget(moreThan16Bit);1096SetJumpTarget(skip);1097gpr.UnlockAllX();1098}1099break;11001101case 28: // madd1102gpr.FlushLockX(EDX);1103gpr.KillImmediate(MIPS_REG_HI, false, true);1104gpr.KillImmediate(MIPS_REG_LO, false, true);1105gpr.KillImmediate(rt, true, false);1106MOV(32, R(EAX), gpr.R(rs));1107IMUL(32, gpr.R(rt));1108ADD(32, gpr.R(MIPS_REG_LO), R(EAX));1109ADC(32, gpr.R(MIPS_REG_HI), R(EDX));1110gpr.UnlockAllX();1111break;11121113case 29: // maddu1114gpr.FlushLockX(EDX);1115gpr.KillImmediate(MIPS_REG_HI, false, true);1116gpr.KillImmediate(MIPS_REG_LO, false, true);1117gpr.KillImmediate(rt, true, false);1118MOV(32, R(EAX), gpr.R(rs));1119MUL(32, gpr.R(rt));1120ADD(32, gpr.R(MIPS_REG_LO), R(EAX));1121ADC(32, gpr.R(MIPS_REG_HI), R(EDX));1122gpr.UnlockAllX();1123break;11241125case 46: // msub1126gpr.FlushLockX(EDX);1127gpr.KillImmediate(MIPS_REG_HI, false, true);1128gpr.KillImmediate(MIPS_REG_LO, false, true);1129gpr.KillImmediate(rt, true, false);1130MOV(32, R(EAX), gpr.R(rs));1131IMUL(32, gpr.R(rt));1132SUB(32, gpr.R(MIPS_REG_LO), R(EAX));1133SBB(32, gpr.R(MIPS_REG_HI), R(EDX));1134gpr.UnlockAllX();1135break;11361137case 47: // msubu1138gpr.FlushLockX(EDX);1139gpr.KillImmediate(MIPS_REG_HI, false, true);1140gpr.KillImmediate(MIPS_REG_LO, false, true);1141gpr.KillImmediate(rt, true, false);1142MOV(32, R(EAX), gpr.R(rs));1143MUL(32, gpr.R(rt));1144SUB(32, gpr.R(MIPS_REG_LO), R(EAX));1145SBB(32, gpr.R(MIPS_REG_HI), R(EDX));1146gpr.UnlockAllX();1147break;11481149default:1150DISABLE;1151}1152}1153}11541155#endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)115611571158