CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/ARM64/Arm64CompALU.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#if PPSSPP_ARCH(ARM64)1920#include <algorithm>2122#include "Common/BitSet.h"23#include "Common/CPUDetect.h"24#include "Common/Data/Convert/SmallDataConvert.h"25#include "Core/MIPS/MIPS.h"26#include "Core/MIPS/MIPSAnalyst.h"27#include "Core/MIPS/MIPSCodeUtils.h"28#include "Core/MIPS/ARM64/Arm64Jit.h"29#include "Core/MIPS/ARM64/Arm64RegCache.h"3031using namespace MIPSAnalyst;3233#define _RS MIPS_GET_RS(op)34#define _RT MIPS_GET_RT(op)35#define _RD MIPS_GET_RD(op)36#define _FS MIPS_GET_FS(op)37#define _FT MIPS_GET_FT(op)38#define _FD MIPS_GET_FD(op)39#define _SA MIPS_GET_SA(op)40#define _POS ((op>> 6) & 0x1F)41#define _SIZE ((op>>11) & 0x1F)42#define _IMM16 (signed short)(op & 0xFFFF)43#define _IMM26 (op & 0x03FFFFFF)4445// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.46// Currently known non working ones should have DISABLE.4748//#define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; }49#define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }50#define DISABLE { Comp_Generic(op); return; }5152namespace MIPSComp {53using namespace Arm64Gen;54using namespace Arm64JitConstants;5556static u32 EvalOr(u32 a, u32 b) { return a | b; }57static u32 EvalEor(u32 a, u32 b) { return a ^ b; }58static u32 EvalAnd(u32 a, u32 b) { return a & b; }59static u32 EvalAdd(u32 a, u32 b) { return a + b; }60static u32 EvalSub(u32 a, u32 b) { return a - b; }6162void Arm64Jit::CompImmLogic(MIPSGPReg rs, MIPSGPReg rt, u32 uimm, void (ARM64XEmitter::*arith)(ARM64Reg dst, ARM64Reg src, ARM64Reg src2), bool (ARM64XEmitter::*tryArithI2R)(ARM64Reg dst, ARM64Reg src, u64 val), u32 (*eval)(u32 a, u32 b)) {63if (gpr.IsImm(rs)) {64gpr.SetImm(rt, (*eval)(gpr.GetImm(rs), uimm));65} else {66gpr.MapDirtyIn(rt, rs);67if (!(this->*tryArithI2R)(gpr.R(rt), gpr.R(rs), uimm)) {68gpr.SetRegImm(SCRATCH1, uimm);69(this->*arith)(gpr.R(rt), gpr.R(rs), SCRATCH1);70}71}72}7374void Arm64Jit::Comp_IType(MIPSOpcode op) {75CONDITIONAL_DISABLE(ALU_IMM);76u32 uimm = op & 0xFFFF;77s32 simm = SignExtend16ToS32(op);78u32 suimm = SignExtend16ToU32(op);7980MIPSGPReg rt = _RT;81MIPSGPReg rs = _RS;8283// noop, won't write to ZERO.84if (rt == 0)85return;8687switch (op >> 26) {88case 8: // same as addiu?89case 9: // R(rt) = R(rs) + simm; break; //addiu90// Special-case for small adjustments of pointerified registers. Commonly for SP but happens for others.91if (rs == rt && gpr.IsMappedAsPointer(rs) && IsImmArithmetic(simm < 0 ? -simm : simm, nullptr, nullptr)) {92ARM64Reg r32 = gpr.RPtr(rs);93gpr.MarkDirty(r32);94ARM64Reg r = EncodeRegTo64(r32);95ADDI2R(r, r, simm);96} else {97if (simm >= 0) {98CompImmLogic(rs, rt, simm, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd);99} else if (simm < 0) {100CompImmLogic(rs, rt, -simm, &ARM64XEmitter::SUB, &ARM64XEmitter::TrySUBI2R, &EvalSub);101}102}103break;104105case 12: CompImmLogic(rs, rt, uimm, &ARM64XEmitter::AND, &ARM64XEmitter::TryANDI2R, &EvalAnd); break;106case 13: CompImmLogic(rs, rt, uimm, &ARM64XEmitter::ORR, &ARM64XEmitter::TryORRI2R, &EvalOr); break;107case 14: CompImmLogic(rs, rt, uimm, &ARM64XEmitter::EOR, &ARM64XEmitter::TryEORI2R, &EvalEor); break;108109case 10: // R(rt) = (s32)R(rs) < simm; break; //slti110if (gpr.IsImm(rs)) {111gpr.SetImm(rt, (s32)gpr.GetImm(rs) < simm ? 1 : 0);112break;113} else if (simm == 0) {114gpr.MapDirtyIn(rt, rs);115// Grab the sign bit (< 0) as 1/0. Slightly faster than a shift.116UBFX(gpr.R(rt), gpr.R(rs), 31, 1);117break;118}119gpr.MapDirtyIn(rt, rs);120if (!TryCMPI2R(gpr.R(rs), (u32)simm)) {121gpr.SetRegImm(SCRATCH1, simm);122CMP(gpr.R(rs), SCRATCH1);123}124CSET(gpr.R(rt), CC_LT);125break;126127case 11: // R(rt) = R(rs) < suimm; break; //sltiu128if (gpr.IsImm(rs)) {129gpr.SetImm(rt, gpr.GetImm(rs) < suimm ? 1 : 0);130break;131}132gpr.MapDirtyIn(rt, rs);133if (!TryCMPI2R(gpr.R(rs), suimm)) {134gpr.SetRegImm(SCRATCH1, suimm);135CMP(gpr.R(rs), SCRATCH1);136}137CSET(gpr.R(rt), CC_LO);138break;139140case 15: // R(rt) = uimm << 16; //lui141gpr.SetImm(rt, uimm << 16);142break;143144default:145Comp_Generic(op);146break;147}148}149150void Arm64Jit::Comp_RType2(MIPSOpcode op) {151CONDITIONAL_DISABLE(ALU_BIT);152153MIPSGPReg rs = _RS;154MIPSGPReg rd = _RD;155156// Don't change $zr.157if (rd == 0)158return;159160switch (op & 63) {161case 22: //clz162if (gpr.IsImm(rs)) {163u32 value = gpr.GetImm(rs);164int x = 31;165int count = 0;166while (x >= 0 && !(value & (1 << x))) {167count++;168x--;169}170gpr.SetImm(rd, count);171break;172}173gpr.MapDirtyIn(rd, rs);174CLZ(gpr.R(rd), gpr.R(rs));175break;176case 23: //clo177if (gpr.IsImm(rs)) {178u32 value = gpr.GetImm(rs);179int x = 31;180int count = 0;181while (x >= 0 && (value & (1 << x))) {182count++;183x--;184}185gpr.SetImm(rd, count);186break;187}188gpr.MapDirtyIn(rd, rs);189MVN(gpr.R(rd), gpr.R(rs));190CLZ(gpr.R(rd), gpr.R(rd));191break;192default:193DISABLE;194}195}196197void Arm64Jit::CompType3(MIPSGPReg rd, MIPSGPReg rs, MIPSGPReg rt, void (ARM64XEmitter::*arith)(ARM64Reg dst, ARM64Reg rm, ARM64Reg rn), bool (ARM64XEmitter::*tryArithI2R)(ARM64Reg dst, ARM64Reg rm, u64 val), u32(*eval)(u32 a, u32 b), bool symmetric) {198if (gpr.IsImm(rs) && gpr.IsImm(rt)) {199gpr.SetImm(rd, (*eval)(gpr.GetImm(rs), gpr.GetImm(rt)));200return;201}202203// Optimize anything against zero.204if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0) {205gpr.MapDirtyIn(rd, rt);206(this->*arith)(gpr.R(rd), WZR, gpr.R(rt));207return;208}209if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0) {210gpr.MapDirtyIn(rd, rs);211(this->*arith)(gpr.R(rd), gpr.R(rs), WZR);212return;213}214215if (gpr.IsImm(rt) || (gpr.IsImm(rs) && symmetric)) {216MIPSGPReg lhs = gpr.IsImm(rs) ? rt : rs;217MIPSGPReg rhs = gpr.IsImm(rs) ? rs : rt;218u32 rhsImm = gpr.GetImm(rhs);219gpr.MapDirtyIn(rd, lhs);220if ((this->*tryArithI2R)(gpr.R(rd), gpr.R(lhs), rhsImm)) {221return;222}223// If rd is rhs, we may have lost it in the MapDirtyIn(). lhs was kept.224// This means the rhsImm value was never flushed to rhs, and would be garbage.225if (rd == rhs) {226// Luckily, it was just an imm.227gpr.SetImm(rhs, rhsImm);228}229}230231// Can't do the RSB optimization on ARM64 - no RSB!232233// Generic solution. If it's an imm, better to flush at this point.234gpr.MapDirtyInIn(rd, rs, rt);235(this->*arith)(gpr.R(rd), gpr.R(rs), gpr.R(rt));236}237238void Arm64Jit::Comp_RType3(MIPSOpcode op) {239CONDITIONAL_DISABLE(ALU);240241MIPSGPReg rt = _RT;242MIPSGPReg rs = _RS;243MIPSGPReg rd = _RD;244245// noop, won't write to ZERO.246if (rd == 0)247return;248249switch (op & 63) {250case 10: //if (!R(rt)) R(rd) = R(rs); break; //movz251gpr.MapDirtyInIn(rd, rt, rs, false);252CMP(gpr.R(rt), 0);253CSEL(gpr.R(rd), gpr.R(rs), gpr.R(rd), CC_EQ);254break;255case 11:// if (R(rt)) R(rd) = R(rs); break; //movn256gpr.MapDirtyInIn(rd, rt, rs, false);257CMP(gpr.R(rt), 0);258CSEL(gpr.R(rd), gpr.R(rs), gpr.R(rd), CC_NEQ);259break;260261case 32: //R(rd) = R(rs) + R(rt); break; //add262case 33: //R(rd) = R(rs) + R(rt); break; //addu263if (gpr.IsImm(rs) && gpr.GetImm(rs) == 0 && !gpr.IsImm(rt)) {264// Special case: actually a mov, avoid arithmetic.265gpr.MapDirtyIn(rd, rt);266MOV(gpr.R(rd), gpr.R(rt));267} else if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0 && !gpr.IsImm(rs)) {268gpr.MapDirtyIn(rd, rs);269MOV(gpr.R(rd), gpr.R(rs));270} else {271CompType3(rd, rs, rt, &ARM64XEmitter::ADD, &ARM64XEmitter::TryADDI2R, &EvalAdd, true);272}273break;274275case 34: //R(rd) = R(rs) - R(rt); break; //sub276case 35: //R(rd) = R(rs) - R(rt); break; //subu277CompType3(rd, rs, rt, &ARM64XEmitter::SUB, &ARM64XEmitter::TrySUBI2R, &EvalSub, false);278break;279280case 36: //R(rd) = R(rs) & R(rt); break; //and281CompType3(rd, rs, rt, &ARM64XEmitter::AND, &ARM64XEmitter::TryANDI2R, &EvalAnd, true);282break;283case 37: //R(rd) = R(rs) | R(rt); break; //or284CompType3(rd, rs, rt, &ARM64XEmitter::ORR, &ARM64XEmitter::TryORRI2R, &EvalOr, true);285break;286case 38: //R(rd) = R(rs) ^ R(rt); break; //xor/eor287CompType3(rd, rs, rt, &ARM64XEmitter::EOR, &ARM64XEmitter::TryEORI2R, &EvalEor, true);288break;289290case 39: // R(rd) = ~(R(rs) | R(rt)); break; //nor291if (gpr.IsImm(rs) && gpr.IsImm(rt)) {292gpr.SetImm(rd, ~(gpr.GetImm(rs) | gpr.GetImm(rt)));293} else if (gpr.IsImm(rs) || gpr.IsImm(rt)) {294MIPSGPReg lhs = gpr.IsImm(rs) ? rt : rs;295MIPSGPReg rhs = gpr.IsImm(rs) ? rs : rt;296u32 rhsImm = gpr.GetImm(rhs);297if (rhsImm == 0) {298gpr.MapDirtyIn(rd, lhs);299MVN(gpr.R(rd), gpr.R(lhs));300} else {301// Ignored, just for IsImmLogical.302unsigned int n, imm_s, imm_r;303if (IsImmLogical(rhsImm, 32, &n, &imm_s, &imm_r)) {304// Great, we can avoid flushing a reg.305gpr.MapDirtyIn(rd, lhs);306ORRI2R(gpr.R(rd), gpr.R(lhs), rhsImm);307} else {308gpr.MapDirtyInIn(rd, rs, rt);309ORR(gpr.R(rd), gpr.R(rs), gpr.R(rt));310}311MVN(gpr.R(rd), gpr.R(rd));312}313} else {314gpr.MapDirtyInIn(rd, rs, rt);315ORR(gpr.R(rd), gpr.R(rs), gpr.R(rt));316MVN(gpr.R(rd), gpr.R(rd));317}318break;319320case 42: //R(rd) = (int)R(rs) < (int)R(rt); break; //slt321if (gpr.IsImm(rs) && gpr.IsImm(rt)) {322gpr.SetImm(rd, (s32)gpr.GetImm(rs) < (s32)gpr.GetImm(rt));323} else {324gpr.MapDirtyInIn(rd, rs, rt);325CMP(gpr.R(rs), gpr.R(rt));326CSET(gpr.R(rd), CC_LT);327}328break;329330case 43: //R(rd) = R(rs) < R(rt); break; //sltu331if (gpr.IsImm(rs) && gpr.IsImm(rt)) {332gpr.SetImm(rd, gpr.GetImm(rs) < gpr.GetImm(rt));333} else {334gpr.MapDirtyInIn(rd, rs, rt);335CMP(gpr.R(rs), gpr.R(rt));336CSET(gpr.R(rd), CC_LO);337}338break;339340case 44: //R(rd) = max(R(rs), R(rt); break; //max341if (gpr.IsImm(rs) && gpr.IsImm(rt)) {342gpr.SetImm(rd, std::max(gpr.GetImm(rs), gpr.GetImm(rt)));343break;344}345gpr.MapDirtyInIn(rd, rs, rt);346CMP(gpr.R(rs), gpr.R(rt));347CSEL(gpr.R(rd), gpr.R(rs), gpr.R(rt), CC_GE);348break;349350case 45: //R(rd) = min(R(rs), R(rt)); break; //min351if (gpr.IsImm(rs) && gpr.IsImm(rt)) {352gpr.SetImm(rd, std::min(gpr.GetImm(rs), gpr.GetImm(rt)));353break;354}355gpr.MapDirtyInIn(rd, rs, rt);356CMP(gpr.R(rs), gpr.R(rt));357CSEL(gpr.R(rd), gpr.R(rs), gpr.R(rt), CC_LE);358break;359360default:361Comp_Generic(op);362break;363}364}365366void Arm64Jit::CompShiftImm(MIPSOpcode op, Arm64Gen::ShiftType shiftType, int sa) {367MIPSGPReg rd = _RD;368MIPSGPReg rt = _RT;369if (gpr.IsImm(rt)) {370switch (shiftType) {371case ST_LSL:372gpr.SetImm(rd, gpr.GetImm(rt) << sa);373break;374case ST_LSR:375gpr.SetImm(rd, gpr.GetImm(rt) >> sa);376break;377case ST_ASR:378gpr.SetImm(rd, (int)gpr.GetImm(rt) >> sa);379break;380case ST_ROR:381gpr.SetImm(rd, (gpr.GetImm(rt) >> sa) | (gpr.GetImm(rt) << (32 - sa)));382break;383default:384DISABLE;385}386} else {387gpr.MapDirtyIn(rd, rt);388MOV(gpr.R(rd), gpr.R(rt), ArithOption(gpr.R(rd), shiftType, sa));389}390}391392void Arm64Jit::CompShiftVar(MIPSOpcode op, Arm64Gen::ShiftType shiftType) {393MIPSGPReg rd = _RD;394MIPSGPReg rt = _RT;395MIPSGPReg rs = _RS;396if (gpr.IsImm(rs)) {397int sa = gpr.GetImm(rs) & 0x1F;398CompShiftImm(op, shiftType, sa);399return;400}401gpr.MapDirtyInIn(rd, rs, rt);402switch (shiftType) {403case ST_LSL: LSLV(gpr.R(rd), gpr.R(rt), gpr.R(rs)); break;404case ST_LSR: LSRV(gpr.R(rd), gpr.R(rt), gpr.R(rs)); break;405case ST_ASR: ASRV(gpr.R(rd), gpr.R(rt), gpr.R(rs)); break;406case ST_ROR: RORV(gpr.R(rd), gpr.R(rt), gpr.R(rs)); break;407}408}409410void Arm64Jit::Comp_ShiftType(MIPSOpcode op) {411CONDITIONAL_DISABLE(ALU);412MIPSGPReg rs = _RS;413MIPSGPReg rd = _RD;414int fd = _FD;415int sa = _SA;416417// noop, won't write to ZERO.418if (rd == 0)419return;420421// WARNING : ROTR422switch (op & 0x3f) {423case 0: CompShiftImm(op, ST_LSL, sa); break; //sll424case 2: CompShiftImm(op, rs == 1 ? ST_ROR : ST_LSR, sa); break; //srl425case 3: CompShiftImm(op, ST_ASR, sa); break; //sra426case 4: CompShiftVar(op, ST_LSL); break; //sllv427case 6: CompShiftVar(op, fd == 1 ? ST_ROR : ST_LSR); break; //srlv428case 7: CompShiftVar(op, ST_ASR); break; //srav429default:430DISABLE;431break;432}433}434435void Arm64Jit::Comp_Special3(MIPSOpcode op) {436CONDITIONAL_DISABLE(ALU_BIT);437MIPSGPReg rs = _RS;438MIPSGPReg rt = _RT;439440int pos = _POS;441int size = _SIZE + 1;442u32 mask = 0xFFFFFFFFUL >> (32 - size);443444// Don't change $zr.445if (rt == 0)446return;447448switch (op & 0x3f) {449case 0x0: //ext450if (gpr.IsImm(rs)) {451gpr.SetImm(rt, (gpr.GetImm(rs) >> pos) & mask);452return;453}454455gpr.MapDirtyIn(rt, rs);456UBFX(gpr.R(rt), gpr.R(rs), pos, size);457break;458459case 0x4: //ins460{461u32 sourcemask = mask >> pos;462u32 destmask = ~(sourcemask << pos);463if (gpr.IsImm(rs)) {464u32 inserted = (gpr.GetImm(rs) & sourcemask) << pos;465if (gpr.IsImm(rt)) {466gpr.SetImm(rt, (gpr.GetImm(rt) & destmask) | inserted);467return;468}469470// It might be nice to avoid flushing rs, but it's a little slower and471// usually more instructions. Not worth it.472gpr.MapDirtyIn(rt, rs, false);473BFI(gpr.R(rt), gpr.R(rs), pos, size - pos);474} else {475gpr.MapDirtyIn(rt, rs, false);476BFI(gpr.R(rt), gpr.R(rs), pos, size - pos);477}478}479break;480}481}482483void Arm64Jit::Comp_Allegrex(MIPSOpcode op) {484CONDITIONAL_DISABLE(ALU_BIT);485MIPSGPReg rt = _RT;486MIPSGPReg rd = _RD;487// Don't change $zr.488if (rd == 0)489return;490491switch ((op >> 6) & 31) {492case 16: // seb // R(rd) = SignExtend8ToU32(R(rt));493if (gpr.IsImm(rt)) {494gpr.SetImm(rd, SignExtend8ToU32(gpr.GetImm(rt)));495return;496}497gpr.MapDirtyIn(rd, rt);498SXTB(gpr.R(rd), gpr.R(rt));499break;500501case 24: // seh502if (gpr.IsImm(rt)) {503gpr.SetImm(rd, SignExtend16ToU32(gpr.GetImm(rt)));504return;505}506gpr.MapDirtyIn(rd, rt);507SXTH(gpr.R(rd), gpr.R(rt));508break;509510case 20: //bitrev511if (gpr.IsImm(rt)) {512// http://graphics.stanford.edu/~seander/bithacks.html#ReverseParallel513u32 v = gpr.GetImm(rt);514v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1); // odd<->even515v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2); // pair<->pair516v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4); // nibb<->nibb517v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8); // byte<->byte518v = (v >> 16) | (v << 16); // hword<->hword519gpr.SetImm(rd, v);520return;521}522523gpr.MapDirtyIn(rd, rt);524RBIT(gpr.R(rd), gpr.R(rt));525break;526527default:528Comp_Generic(op);529return;530}531}532533void Arm64Jit::Comp_Allegrex2(MIPSOpcode op) {534CONDITIONAL_DISABLE(ALU_BIT);535MIPSGPReg rt = _RT;536MIPSGPReg rd = _RD;537// Don't change $zr.538if (rd == 0)539return;540541switch (op & 0x3ff) {542case 0xA0: //wsbh543if (gpr.IsImm(rt)) {544gpr.SetImm(rd, ((gpr.GetImm(rt) & 0xFF00FF00) >> 8) | ((gpr.GetImm(rt) & 0x00FF00FF) << 8));545} else {546gpr.MapDirtyIn(rd, rt);547REV16(gpr.R(rd), gpr.R(rt));548}549break;550case 0xE0: //wsbw551if (gpr.IsImm(rt)) {552gpr.SetImm(rd, swap32(gpr.GetImm(rt)));553} else {554gpr.MapDirtyIn(rd, rt);555REV32(gpr.R(rd), gpr.R(rt));556}557break;558default:559Comp_Generic(op);560break;561}562}563564void Arm64Jit::Comp_MulDivType(MIPSOpcode op) {565CONDITIONAL_DISABLE(MULDIV);566MIPSGPReg rt = _RT;567MIPSGPReg rs = _RS;568MIPSGPReg rd = _RD;569570// Note that in all cases below, LO is actually mapped to HI:LO.571// That is, the host reg is 64 bits and has HI at the top.572// HI is not mappable.573574switch (op & 63) {575case 16: // R(rd) = HI; //mfhi576// LO and HI are in the same reg.577if (gpr.IsImm(MIPS_REG_LO)) {578gpr.SetImm(rd, gpr.GetImm(MIPS_REG_LO) >> 32);579break;580}581gpr.MapDirtyIn(rd, MIPS_REG_LO);582UBFX(EncodeRegTo64(gpr.R(rd)), EncodeRegTo64(gpr.R(MIPS_REG_LO)), 32, 32);583break;584585case 17: // HI = R(rs); //mthi586if (gpr.IsImm(rs) && gpr.IsImm(MIPS_REG_LO)) {587gpr.SetImm(MIPS_REG_LO, (gpr.GetImm(rs) << 32) | (gpr.GetImm(MIPS_REG_LO) & 0xFFFFFFFFULL));588break;589}590gpr.MapDirtyIn(MIPS_REG_LO, rs, false);591BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), EncodeRegTo64(gpr.R(rs)), 32, 32);592break;593594case 18: // R(rd) = LO; break; //mflo595if (gpr.IsImm(MIPS_REG_LO)) {596gpr.SetImm(rd, gpr.GetImm(MIPS_REG_LO) & 0xFFFFFFFFULL);597break;598}599gpr.MapDirtyIn(rd, MIPS_REG_LO);600MOV(gpr.R(rd), gpr.R(MIPS_REG_LO));601break;602603case 19: // LO = R(rs); break; //mtlo604if (gpr.IsImm(rs) && gpr.IsImm(MIPS_REG_LO)) {605gpr.SetImm(MIPS_REG_LO, gpr.GetImm(rs) | (gpr.GetImm(MIPS_REG_LO) & ~0xFFFFFFFFULL));606break;607}608gpr.MapDirtyIn(MIPS_REG_LO, rs, false);609BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), EncodeRegTo64(gpr.R(rs)), 0, 32);610break;611612case 24: //mult (the most popular one). lo,hi = signed mul (rs * rt)613if (gpr.IsImm(rs) && gpr.IsImm(rt)) {614s64 result = (s64)(s32)gpr.GetImm(rs) * (s64)(s32)gpr.GetImm(rt);615gpr.SetImm(MIPS_REG_LO, (u64)result);616break;617}618gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);619SMULL(EncodeRegTo64(gpr.R(MIPS_REG_LO)), gpr.R(rs), gpr.R(rt));620break;621622case 25: //multu (2nd) lo,hi = unsigned mul (rs * rt)623if (gpr.IsImm(rs) && gpr.IsImm(rt)) {624u64 resultBits = (u64)gpr.GetImm(rs) * (u64)gpr.GetImm(rt);625gpr.SetImm(MIPS_REG_LO, resultBits);626break;627}628gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);629// In case of pointerification, let's use UMULL.630UMULL(EncodeRegTo64(gpr.R(MIPS_REG_LO)), gpr.R(rs), gpr.R(rt));631break;632633case 26: //div634{635// TODO: Does this handle INT_MAX, 0, etc. correctly?636gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);637SDIV(gpr.R(MIPS_REG_LO), gpr.R(rs), gpr.R(rt));638MSUB(SCRATCH1, gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));639640CMPI2R(gpr.R(rt), 0);641FixupBranch skipZero = B(CC_NEQ);642// HI set properly already, we just need to set LO.643MOVI2R(gpr.R(MIPS_REG_LO), -1);644CMPI2R(gpr.R(rs), 0);645FixupBranch moreThan16Bit = B(CC_GE);646MOVI2R(gpr.R(MIPS_REG_LO), 1);647SetJumpTarget(moreThan16Bit);648SetJumpTarget(skipZero);649650BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), SCRATCH1_64, 32, 32);651break;652}653654case 27: //divu655// Do we have a known power-of-two denominator? Yes, this happens.656if (gpr.IsImm(rt) && (gpr.GetImm(rt) & (gpr.GetImm(rt) - 1)) == 0 && gpr.GetImm(rt) != 0) {657u32 denominator = gpr.GetImm(rt);658gpr.MapDirtyIn(MIPS_REG_LO, rs);659// Remainder is just an AND, neat.660ANDI2R(SCRATCH1, gpr.R(rs), denominator - 1, SCRATCH1);661int shift = 0;662while (denominator != 0) {663++shift;664denominator >>= 1;665}666// The shift value is one too much for the divide by the same value.667if (shift > 1) {668LSR(gpr.R(MIPS_REG_LO), gpr.R(rs), shift - 1);669} else {670MOV(gpr.R(MIPS_REG_LO), gpr.R(rs));671}672BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), SCRATCH1_64, 32, 32);673} else {674// TODO: Does this handle INT_MAX, 0, etc. correctly?675gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt);676UDIV(gpr.R(MIPS_REG_LO), gpr.R(rs), gpr.R(rt));677MSUB(SCRATCH1, gpr.R(rt), gpr.R(MIPS_REG_LO), gpr.R(rs));678679CMPI2R(gpr.R(rt), 0);680FixupBranch skipZero = B(CC_NEQ);681// HI set properly, we just need to set LO.682MOVI2R(SCRATCH2, 0xFFFF);683MOVI2R(gpr.R(MIPS_REG_LO), -1);684CMP(gpr.R(rs), SCRATCH2);685FixupBranch moreThan16Bit = B(CC_HI);686MOV(gpr.R(MIPS_REG_LO), SCRATCH2);687SetJumpTarget(moreThan16Bit);688SetJumpTarget(skipZero);689690BFI(EncodeRegTo64(gpr.R(MIPS_REG_LO)), SCRATCH1_64, 32, 32);691}692break;693694case 28: //madd695{696gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);697ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));698SMADDL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!699}700break;701702case 29: //maddu703{704gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);705ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));706UMADDL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!707}708break;709710case 46: // msub711{712gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);713ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));714SMSUBL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!715}716break;717718case 47: // msubu719{720gpr.MapDirtyInIn(MIPS_REG_LO, rs, rt, false);721ARM64Reg lo64 = EncodeRegTo64(gpr.R(MIPS_REG_LO));722UMSUBL(lo64, gpr.R(rs), gpr.R(rt), lo64); // Operands are backwards in the emitter!723break;724}725726default:727DISABLE;728}729}730731}732733#endif // PPSSPP_ARCH(ARM64)734735736