CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/x86/CompFPU.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)1920#include "Core/Config.h"21#include "Core/MemMap.h"22#include "Common/CommonTypes.h"23#include "Core/MIPS/MIPS.h"24#include "Core/MIPS/MIPSCodeUtils.h"25#include "Core/MIPS/x86/Jit.h"26#include "Core/MIPS/x86/RegCache.h"2728#define _RS MIPS_GET_RS(op)29#define _RT MIPS_GET_RT(op)30#define _RD MIPS_GET_RD(op)31#define _FS MIPS_GET_FS(op)32#define _FT MIPS_GET_FT(op)33#define _FD MIPS_GET_FD(op)34#define _SA MIPS_GET_SA(op)35#define _POS ((op>> 6) & 0x1F)36#define _SIZE ((op>>11) & 0x1F)37#define _IMM16 (signed short)(op & 0xFFFF)38#define _IMM26 (op & 0x03FFFFFF)3940// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.41// Currently known non working ones should have DISABLE.4243// #define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; }44#define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }45#define DISABLE { Comp_Generic(op); return; }4647namespace MIPSComp {4849using namespace Gen;50using namespace X64JitConstants;5152alignas(16) const u32 reverseQNAN[4] = { 0x803FFFFF, 0x803FFFFF, 0x803FFFFF, 0x803FFFFF };5354void Jit::CopyFPReg(X64Reg dst, OpArg src) {55if (src.IsSimpleReg()) {56MOVAPS(dst, src);57} else {58MOVSS(dst, src);59}60}6162void Jit::CompFPTriArith(MIPSOpcode op, void (XEmitter::*arith)(X64Reg reg, OpArg), bool orderMatters) {63int ft = _FT;64int fs = _FS;65int fd = _FD;66fpr.SpillLock(fd, fs, ft);6768if (fs == fd) {69fpr.MapReg(fd, true, true);70(this->*arith)(fpr.RX(fd), fpr.R(ft));71} else if (ft == fd && !orderMatters) {72fpr.MapReg(fd, true, true);73(this->*arith)(fpr.RX(fd), fpr.R(fs));74} else if (ft != fd) {75// fs can't be fd (handled above.)76fpr.MapReg(fd, false, true);77CopyFPReg(fpr.RX(fd), fpr.R(fs));78(this->*arith)(fpr.RX(fd), fpr.R(ft));79} else {80// fd must be ft, and order must matter.81fpr.MapReg(fd, true, true);82CopyFPReg(XMM0, fpr.R(fs));83(this->*arith)(XMM0, fpr.R(ft));84MOVAPS(fpr.RX(fd), R(XMM0));85}86fpr.ReleaseSpillLocks();87}8889void Jit::Comp_FPU3op(MIPSOpcode op) {90CONDITIONAL_DISABLE(FPU);91switch (op & 0x3f) {92case 0: CompFPTriArith(op, &XEmitter::ADDSS, false); break; //F(fd) = F(fs) + F(ft); //add93case 1: CompFPTriArith(op, &XEmitter::SUBSS, true); break; //F(fd) = F(fs) - F(ft); //sub94case 2: //F(fd) = F(fs) * F(ft); //mul95// XMM1 = !my_isnan(fs) && !my_isnan(ft)96MOVSS(XMM1, fpr.R(_FS));97CMPORDSS(XMM1, fpr.R(_FT));98CompFPTriArith(op, &XEmitter::MULSS, false);99100// fd must still be in a reg, save it in XMM0 for now.101MOVAPS(XMM0, fpr.R(_FD));102// fd = my_isnan(fd) && !my_isnan(fs) && !my_isnan(ft)103CMPUNORDSS(fpr.RX(_FD), fpr.R(_FD));104ANDPS(fpr.RX(_FD), R(XMM1));105// At this point fd = FFFFFFFF if non-NAN inputs produced a NAN output.106// We'll AND it with the inverse QNAN bits to clear (00000000 means no change.)107if (RipAccessible(&reverseQNAN)) {108ANDPS(fpr.RX(_FD), M(&reverseQNAN)); // rip accessible109} else {110MOV(PTRBITS, R(TEMPREG), ImmPtr(&reverseQNAN));111ANDPS(fpr.RX(_FD), MatR(TEMPREG));112}113// ANDN is backwards, which is why we saved XMM0 to start. Now put it back.114ANDNPS(fpr.RX(_FD), R(XMM0));115break;116case 3: CompFPTriArith(op, &XEmitter::DIVSS, true); break; //F(fd) = F(fs) / F(ft); //div117default:118_dbg_assert_msg_(false,"Trying to compile FPU3Op instruction that can't be interpreted");119break;120}121}122123void Jit::Comp_FPULS(MIPSOpcode op) {124CONDITIONAL_DISABLE(LSU_FPU);125s32 offset = _IMM16;126int ft = _FT;127MIPSGPReg rs = _RS;128129CheckMemoryBreakpoint(0, rs, offset);130131switch (op >> 26) {132case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc1133{134gpr.Lock(rs);135fpr.SpillLock(ft);136fpr.MapReg(ft, false, true);137138JitSafeMem safe(this, rs, offset);139OpArg src;140if (safe.PrepareRead(src, 4))141MOVSS(fpr.RX(ft), src);142if (safe.PrepareSlowRead(safeMemFuncs.readU32))143MOVD_xmm(fpr.RX(ft), R(EAX));144safe.Finish();145146gpr.UnlockAll();147fpr.ReleaseSpillLocks();148}149break;150case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1151{152gpr.Lock(rs);153fpr.SpillLock(ft);154fpr.MapReg(ft, true, false);155156JitSafeMem safe(this, rs, offset);157OpArg dest;158if (safe.PrepareWrite(dest, 4))159MOVSS(dest, fpr.RX(ft));160if (safe.PrepareSlowWrite())161{162MOVSS(MIPSSTATE_VAR(temp), fpr.RX(ft));163safe.DoSlowWrite(safeMemFuncs.writeU32, MIPSSTATE_VAR(temp));164}165safe.Finish();166167gpr.UnlockAll();168fpr.ReleaseSpillLocks();169}170break;171172default:173_dbg_assert_msg_(false,"Trying to interpret FPULS instruction that can't be interpreted");174break;175}176}177178alignas(16) static const u64 ssSignBits2[2] = {0x8000000080000000ULL, 0x8000000080000000ULL};179alignas(16) static const u64 ssNoSignMask[2] = {0x7FFFFFFF7FFFFFFFULL, 0x7FFFFFFF7FFFFFFFULL};180181void Jit::CompFPComp(int lhs, int rhs, u8 compare, bool allowNaN) {182gpr.MapReg(MIPS_REG_FPCOND, false, true);183184// This means that NaN also means true, e.g. !<> or !>, etc.185if (allowNaN) {186CopyFPReg(XMM0, fpr.R(lhs));187CopyFPReg(XMM1, fpr.R(lhs));188CMPSS(XMM0, fpr.R(rhs), compare);189CMPUNORDSS(XMM1, fpr.R(rhs));190191POR(XMM0, R(XMM1));192} else {193CopyFPReg(XMM0, fpr.R(lhs));194CMPSS(XMM0, fpr.R(rhs), compare);195}196197MOVD_xmm(gpr.R(MIPS_REG_FPCOND), XMM0);198}199200void Jit::Comp_FPUComp(MIPSOpcode op) {201CONDITIONAL_DISABLE(FPU_COMP);202203int fs = _FS;204int ft = _FT;205206switch (op & 0xf) {207case 0: //f208case 8: //sf209gpr.SetImm(MIPS_REG_FPCOND, 0);210break;211212case 1: //un213case 9: //ngle214CompFPComp(fs, ft, CMP_UNORD);215break;216217case 2: //eq218case 10: //seq219CompFPComp(fs, ft, CMP_EQ);220break;221222case 3: //ueq223case 11: //ngl224CompFPComp(fs, ft, CMP_EQ, true);225break;226227case 4: //olt228case 12: //lt229CompFPComp(fs, ft, CMP_LT);230break;231232case 5: //ult233case 13: //nge234CompFPComp(ft, fs, CMP_NLE);235break;236237case 6: //ole238case 14: //le239CompFPComp(fs, ft, CMP_LE);240break;241242case 7: //ule243case 15: //ngt244CompFPComp(ft, fs, CMP_NLT);245break;246247default:248DISABLE;249}250}251252void Jit::Comp_FPU2op(MIPSOpcode op) {253CONDITIONAL_DISABLE(FPU);254255int fs = _FS;256int fd = _FD;257258auto execRounding = [&](void (XEmitter::*conv)(X64Reg, OpArg), int setMXCSR) {259fpr.SpillLock(fd, fs);260fpr.MapReg(fd, fs == fd, true);261262// Small optimization: 0 is our default mode anyway.263if (setMXCSR == 0 && !js.hasSetRounding) {264setMXCSR = -1;265}266if (setMXCSR != -1) {267STMXCSR(MIPSSTATE_VAR(mxcsrTemp));268MOV(32, R(TEMPREG), MIPSSTATE_VAR(mxcsrTemp));269AND(32, R(TEMPREG), Imm32(~(3 << 13)));270OR(32, R(TEMPREG), Imm32(setMXCSR << 13));271MOV(32, MIPSSTATE_VAR(temp), R(TEMPREG));272LDMXCSR(MIPSSTATE_VAR(temp));273}274275(this->*conv)(TEMPREG, fpr.R(fs));276277// Did we get an indefinite integer value?278CMP(32, R(TEMPREG), Imm32(0x80000000));279FixupBranch skip = J_CC(CC_NE);280if (fd != fs) {281CopyFPReg(fpr.RX(fd), fpr.R(fs));282}283XORPS(XMM1, R(XMM1));284CMPSS(fpr.RX(fd), R(XMM1), CMP_LT);285286// At this point, -inf = 0xffffffff, inf/nan = 0x00000000.287// We want -inf to be 0x80000000 inf/nan to be 0x7fffffff, so we flip those bits.288MOVD_xmm(R(TEMPREG), fpr.RX(fd));289XOR(32, R(TEMPREG), Imm32(0x7fffffff));290291SetJumpTarget(skip);292MOVD_xmm(fpr.RX(fd), R(TEMPREG));293294if (setMXCSR != -1) {295LDMXCSR(MIPSSTATE_VAR(mxcsrTemp));296}297};298299switch (op & 0x3f) {300case 5: //F(fd) = fabsf(F(fs)); break; //abs301fpr.SpillLock(fd, fs);302fpr.MapReg(fd, fd == fs, true);303MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssNoSignMask[0]));304if (fd != fs && fpr.IsMapped(fs)) {305MOVAPS(fpr.RX(fd), MatR(TEMPREG));306ANDPS(fpr.RX(fd), fpr.R(fs));307} else {308if (fd != fs) {309MOVSS(fpr.RX(fd), fpr.R(fs));310}311ANDPS(fpr.RX(fd), MatR(TEMPREG));312}313break;314315case 6: //F(fd) = F(fs); break; //mov316if (fd != fs) {317fpr.SpillLock(fd, fs);318fpr.MapReg(fd, fd == fs, true);319CopyFPReg(fpr.RX(fd), fpr.R(fs));320}321break;322323case 7: //F(fd) = -F(fs); break; //neg324fpr.SpillLock(fd, fs);325fpr.MapReg(fd, fd == fs, true);326MOV(PTRBITS, R(TEMPREG), ImmPtr(&ssSignBits2[0]));327if (fd != fs && fpr.IsMapped(fs)) {328MOVAPS(fpr.RX(fd), MatR(TEMPREG));329XORPS(fpr.RX(fd), fpr.R(fs));330} else {331if (fd != fs) {332MOVSS(fpr.RX(fd), fpr.R(fs));333}334XORPS(fpr.RX(fd), MatR(TEMPREG));335}336break;337338case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt339fpr.SpillLock(fd, fs);340fpr.MapReg(fd, fd == fs, true);341SQRTSS(fpr.RX(fd), fpr.R(fs));342break;343344case 13: //FsI(fd) = F(fs)>=0 ? (int)floorf(F(fs)) : (int)ceilf(F(fs)); break; //trunc.w.s345execRounding(&XEmitter::CVTTSS2SI, -1);346break;347348case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w349fpr.SpillLock(fd, fs);350fpr.MapReg(fd, fs == fd, true);351if (fpr.IsMapped(fs)) {352CVTDQ2PS(fpr.RX(fd), fpr.R(fs));353} else {354// If fs was fd, we'd be in the case above since we mapped fd.355MOVSS(fpr.RX(fd), fpr.R(fs));356CVTDQ2PS(fpr.RX(fd), fpr.R(fd));357}358break;359360case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s361// Uses the current rounding mode.362execRounding(&XEmitter::CVTSS2SI, -1);363break;364365case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s366execRounding(&XEmitter::CVTSS2SI, 0);367break;368case 14: //FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s369execRounding(&XEmitter::CVTSS2SI, 2);370break;371case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s372execRounding(&XEmitter::CVTSS2SI, 1);373break;374default:375DISABLE;376return;377}378fpr.ReleaseSpillLocks();379}380381void Jit::Comp_mxc1(MIPSOpcode op) {382CONDITIONAL_DISABLE(FPU_XFER);383384int fs = _FS;385MIPSGPReg rt = _RT;386387switch ((op >> 21) & 0x1f) {388case 0: // R(rt) = FI(fs); break; //mfc1389if (rt == MIPS_REG_ZERO)390return;391gpr.MapReg(rt, false, true);392// If fs is not mapped, most likely it's being abandoned.393// Just load from memory in that case.394if (fpr.R(fs).IsSimpleReg()) {395MOVD_xmm(gpr.R(rt), fpr.RX(fs));396} else {397MOV(32, gpr.R(rt), fpr.R(fs));398}399break;400401case 2: // R(rt) = currentMIPS->ReadFCR(fs); break; //cfc1402if (rt == MIPS_REG_ZERO)403return;404if (fs == 31) {405bool wasImm = gpr.IsImm(MIPS_REG_FPCOND);406if (!wasImm) {407gpr.Lock(rt, MIPS_REG_FPCOND);408gpr.MapReg(MIPS_REG_FPCOND, true, false);409}410gpr.MapReg(rt, false, true);411MOV(32, gpr.R(rt), MIPSSTATE_VAR(fcr31));412if (wasImm) {413if (gpr.GetImm(MIPS_REG_FPCOND) & 1) {414OR(32, gpr.R(rt), Imm32(1 << 23));415} else {416AND(32, gpr.R(rt), Imm32(~(1 << 23)));417}418} else {419AND(32, gpr.R(rt), Imm32(~(1 << 23)));420MOV(32, R(TEMPREG), gpr.R(MIPS_REG_FPCOND));421AND(32, R(TEMPREG), Imm32(1));422SHL(32, R(TEMPREG), Imm8(23));423OR(32, gpr.R(rt), R(TEMPREG));424}425gpr.UnlockAll();426} else if (fs == 0) {427gpr.SetImm(rt, MIPSState::FCR0_VALUE);428} else {429Comp_Generic(op);430}431return;432433case 4: //FI(fs) = R(rt); break; //mtc1434fpr.MapReg(fs, false, true);435if (gpr.IsImm(rt) && gpr.GetImm(rt) == 0) {436XORPS(fpr.RX(fs), fpr.R(fs));437} else {438gpr.KillImmediate(rt, true, false);439MOVD_xmm(fpr.RX(fs), gpr.R(rt));440}441return;442443case 6: //currentMIPS->WriteFCR(fs, R(rt)); break; //ctc1444if (fs == 31) {445// Must clear before setting, since ApplyRoundingMode() assumes it was cleared.446RestoreRoundingMode();447if (gpr.IsImm(rt)) {448gpr.SetImm(MIPS_REG_FPCOND, (gpr.GetImm(rt) >> 23) & 1);449MOV(32, MIPSSTATE_VAR(fcr31), Imm32(gpr.GetImm(rt) & 0x0181FFFF));450if ((gpr.GetImm(rt) & 0x1000003) == 0) {451// Default nearest / no-flush mode, just leave it cleared.452} else {453UpdateRoundingMode(gpr.GetImm(rt));454ApplyRoundingMode();455}456} else {457gpr.Lock(rt, MIPS_REG_FPCOND);458gpr.MapReg(rt, true, false);459gpr.MapReg(MIPS_REG_FPCOND, false, true);460MOV(32, gpr.R(MIPS_REG_FPCOND), gpr.R(rt));461SHR(32, gpr.R(MIPS_REG_FPCOND), Imm8(23));462AND(32, gpr.R(MIPS_REG_FPCOND), Imm32(1));463MOV(32, MIPSSTATE_VAR(fcr31), gpr.R(rt));464AND(32, MIPSSTATE_VAR(fcr31), Imm32(0x0181FFFF));465gpr.UnlockAll();466UpdateRoundingMode();467ApplyRoundingMode();468}469} else {470Comp_Generic(op);471}472return;473}474}475476} // namespace MIPSComp477478#endif // PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)479480481