CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/ARM64/Arm64CompFPU.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#if PPSSPP_ARCH(ARM64)1920#include "Core/Config.h"21#include "Core/MemMap.h"22#include "Core/MIPS/MIPS.h"23#include "Core/MIPS/MIPSCodeUtils.h"24#include "Core/MIPS/MIPSTables.h"2526#include "Core/MIPS/ARM64/Arm64Jit.h"27#include "Core/MIPS/ARM64/Arm64RegCache.h"28#include "Common/CPUDetect.h"2930#define _RS MIPS_GET_RS(op)31#define _RT MIPS_GET_RT(op)32#define _RD MIPS_GET_RD(op)33#define _FS MIPS_GET_FS(op)34#define _FT MIPS_GET_FT(op)35#define _FD MIPS_GET_FD(op)36#define _SA MIPS_GET_SA(op)37#define _POS ((op>> 6) & 0x1F)38#define _SIZE ((op>>11) & 0x1F)39#define _IMM16 (signed short)(op & 0xFFFF)40#define _IMM26 (op & 0x03FFFFFF)414243// FPCR interesting bits:44// 24: FZ (flush-to-zero)45// 23:22: RMode (0 = nearest, 1 = +inf, 2 = -inf, 3 = zero)46// not much else is interesting for us, but should be preserved.47// To access: MRS Xt, FPCR ; MSR FPCR, Xt484950// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.51// Currently known non working ones should have DISABLE.5253// #define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; }54#define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }55#define DISABLE { Comp_Generic(op); return; }5657namespace MIPSComp {58using namespace Arm64Gen;59using namespace Arm64JitConstants;6061void Arm64Jit::Comp_FPU3op(MIPSOpcode op) {62CONDITIONAL_DISABLE(FPU);6364int ft = _FT;65int fs = _FS;66int fd = _FD;6768fpr.MapDirtyInIn(fd, fs, ft);69switch (op & 0x3f) {70case 0: fp.FADD(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) + F(ft); //add71case 1: fp.FSUB(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) - F(ft); //sub72case 2: fp.FMUL(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) * F(ft); //mul73case 3: fp.FDIV(fpr.R(fd), fpr.R(fs), fpr.R(ft)); break; //F(fd) = F(fs) / F(ft); //div74default:75DISABLE;76return;77}78}7980void Arm64Jit::Comp_FPULS(MIPSOpcode op)81{82CONDITIONAL_DISABLE(LSU_FPU);83CheckMemoryBreakpoint();8485s32 offset = SignExtend16ToS32(op & 0xFFFF);86int ft = _FT;87MIPSGPReg rs = _RS;88// u32 addr = R(rs) + offset;89std::vector<FixupBranch> skips;90switch (op >> 26) {91case 49: //FI(ft) = Memory::Read_U32(addr); break; //lwc192if (!gpr.IsImm(rs) && jo.cachePointers && g_Config.bFastMemory && (offset & 3) == 0 && offset <= 16380 && offset >= 0) {93gpr.MapRegAsPointer(rs);94fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY);95fp.LDR(32, INDEX_UNSIGNED, fpr.R(ft), gpr.RPtr(rs), offset);96break;97}9899fpr.SpillLock(ft);100fpr.MapReg(ft, MAP_NOINIT | MAP_DIRTY);101if (gpr.IsImm(rs)) {102#ifdef MASKED_PSP_MEMORY103u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;104#else105u32 addr = offset + gpr.GetImm(rs);106#endif107gpr.SetRegImm(SCRATCH1, addr);108} else {109gpr.MapReg(rs);110if (g_Config.bFastMemory) {111SetScratch1ToEffectiveAddress(rs, offset);112} else {113skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);114}115}116fp.LDR(32, fpr.R(ft), SCRATCH1_64, ArithOption(MEMBASEREG));117for (auto skip : skips) {118SetJumpTarget(skip);119}120fpr.ReleaseSpillLocksAndDiscardTemps();121break;122123case 57: //Memory::Write_U32(FI(ft), addr); break; //swc1124if (!gpr.IsImm(rs) && jo.cachePointers && g_Config.bFastMemory && (offset & 3) == 0 && offset <= 16380 && offset >= 0) {125gpr.MapRegAsPointer(rs);126fpr.MapReg(ft, 0);127fp.STR(32, INDEX_UNSIGNED, fpr.R(ft), gpr.RPtr(rs), offset);128break;129}130131fpr.MapReg(ft);132if (gpr.IsImm(rs)) {133#ifdef MASKED_PSP_MEMORY134u32 addr = (offset + gpr.GetImm(rs)) & 0x3FFFFFFF;135#else136u32 addr = offset + gpr.GetImm(rs);137#endif138gpr.SetRegImm(SCRATCH1, addr);139} else {140gpr.MapReg(rs);141if (g_Config.bFastMemory) {142SetScratch1ToEffectiveAddress(rs, offset);143} else {144skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);145}146}147fp.STR(32, fpr.R(ft), SCRATCH1_64, ArithOption(MEMBASEREG));148for (auto skip : skips) {149SetJumpTarget(skip);150}151break;152153default:154Comp_Generic(op);155return;156}157}158159void Arm64Jit::Comp_FPUComp(MIPSOpcode op) {160CONDITIONAL_DISABLE(FPU_COMP);161162int opc = op & 0xF;163if (opc >= 8) opc -= 8; // alias164if (opc == 0) { // f, sf (signalling false)165gpr.SetImm(MIPS_REG_FPCOND, 0);166return;167}168169int fs = _FS;170int ft = _FT;171gpr.MapReg(MIPS_REG_FPCOND, MAP_DIRTY | MAP_NOINIT);172fpr.MapInIn(fs, ft);173fp.FCMP(fpr.R(fs), fpr.R(ft));174175switch (opc) {176case 1: // un, ngle (unordered)177CSET(gpr.R(MIPS_REG_FPCOND), CC_VS);178break;179case 2: // eq, seq (equal, ordered)180CSET(gpr.R(MIPS_REG_FPCOND), CC_EQ);181break;182case 3: // ueq, ngl (equal, unordered)183CSET(gpr.R(MIPS_REG_FPCOND), CC_EQ);184// If ordered, use the above result. If unordered, use ZR+1 (being 1.)185CSINC(gpr.R(MIPS_REG_FPCOND), gpr.R(MIPS_REG_FPCOND), WZR, CC_VC);186return;187case 4: // olt, lt (less than, ordered)188CSET(gpr.R(MIPS_REG_FPCOND), CC_LO);189break;190case 5: // ult, nge (less than, unordered)191CSET(gpr.R(MIPS_REG_FPCOND), CC_LT);192break;193case 6: // ole, le (less equal, ordered)194CSET(gpr.R(MIPS_REG_FPCOND), CC_LS);195break;196case 7: // ule, ngt (less equal, unordered)197CSET(gpr.R(MIPS_REG_FPCOND), CC_LE);198break;199default:200Comp_Generic(op);201return;202}203}204205void Arm64Jit::Comp_FPU2op(MIPSOpcode op) {206CONDITIONAL_DISABLE(FPU);207int fs = _FS;208int fd = _FD;209210switch (op & 0x3f) {211case 4: //F(fd) = sqrtf(F(fs)); break; //sqrt212fpr.MapDirtyIn(fd, fs);213fp.FSQRT(fpr.R(fd), fpr.R(fs));214break;215case 5: //F(fd) = fabsf(F(fs)); break; //abs216fpr.MapDirtyIn(fd, fs);217fp.FABS(fpr.R(fd), fpr.R(fs));218break;219case 6: //F(fd) = F(fs); break; //mov220fpr.MapDirtyIn(fd, fs);221fp.FMOV(fpr.R(fd), fpr.R(fs));222break;223case 7: //F(fd) = -F(fs); break; //neg224fpr.MapDirtyIn(fd, fs);225fp.FNEG(fpr.R(fd), fpr.R(fs));226break;227228case 12: //FsI(fd) = (int)floorf(F(fs)+0.5f); break; //round.w.s229{230fpr.MapDirtyIn(fd, fs);231fp.FCMP(fpr.R(fs), fpr.R(fs)); // Detect NaN232fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_N); // to nearest, ties to even233FixupBranch skip = B(CC_VC);234MOVI2R(SCRATCH1, 0x7FFFFFFF);235fp.FMOV(fpr.R(fd), SCRATCH1);236SetJumpTarget(skip);237break;238}239240case 13: //FsI(fd) = Rto0(F(fs))); break; //trunc.w.s241{242fpr.MapDirtyIn(fd, fs);243fp.FCMP(fpr.R(fs), fpr.R(fs));244fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_Z);245FixupBranch skip = B(CC_VC);246MOVI2R(SCRATCH1, 0x7FFFFFFF);247fp.FMOV(fpr.R(fd), SCRATCH1);248SetJumpTarget(skip);249break;250}251252case 14://FsI(fd) = (int)ceilf (F(fs)); break; //ceil.w.s253{254fpr.MapDirtyIn(fd, fs);255fp.FCMP(fpr.R(fs), fpr.R(fs));256fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_P); // towards +inf257FixupBranch skip = B(CC_VC);258MOVI2R(SCRATCH1, 0x7FFFFFFF);259fp.FMOV(fpr.R(fd), SCRATCH1);260SetJumpTarget(skip);261break;262}263case 15: //FsI(fd) = (int)floorf(F(fs)); break; //floor.w.s264{265fpr.MapDirtyIn(fd, fs);266fp.FCMP(fpr.R(fs), fpr.R(fs));267fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_M); // towards -inf268FixupBranch skip = B(CC_VC);269MOVI2R(SCRATCH1, 0x7FFFFFFF);270fp.FMOV(fpr.R(fd), SCRATCH1);271SetJumpTarget(skip);272break;273}274275case 32: //F(fd) = (float)FsI(fs); break; //cvt.s.w276fpr.MapDirtyIn(fd, fs);277fp.SCVTF(fpr.R(fd), fpr.R(fs));278break;279280case 36: //FsI(fd) = (int) F(fs); break; //cvt.w.s281fpr.MapDirtyIn(fd, fs);282if (js.hasSetRounding) {283// We're just going to defer to our cached func. Here's the arg.284fp.FMOV(S0, fpr.R(fs));285286MOVP2R(SCRATCH1_64, &js.currentRoundingFunc);287LDR(INDEX_UNSIGNED, SCRATCH1_64, SCRATCH1_64, 0);288289BLR(SCRATCH1_64);290291fp.FMOV(fpr.R(fd), S0);292} else {293fp.FCMP(fpr.R(fs), fpr.R(fs));294fp.FCVTS(fpr.R(fd), fpr.R(fs), ROUND_N);295FixupBranch skip_nan = B(CC_VC);296MOVI2R(SCRATCH1, 0x7FFFFFFF);297fp.FMOV(fpr.R(fd), SCRATCH1);298SetJumpTarget(skip_nan);299}300break;301302default:303DISABLE;304}305}306307void Arm64Jit::Comp_mxc1(MIPSOpcode op)308{309CONDITIONAL_DISABLE(FPU_XFER);310311int fs = _FS;312MIPSGPReg rt = _RT;313314switch ((op >> 21) & 0x1f) {315case 0: // R(rt) = FI(fs); break; //mfc1316if (rt == MIPS_REG_ZERO) {317return;318}319gpr.MapReg(rt, MAP_DIRTY | MAP_NOINIT);320if (fpr.IsMapped(fs)) {321fp.FMOV(gpr.R(rt), fpr.R(fs));322} else {323LDR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, fpr.GetMipsRegOffset(fs));324}325return;326327case 2: //cfc1328if (rt == MIPS_REG_ZERO) {329return;330}331if (fs == 31) {332if (gpr.IsImm(MIPS_REG_FPCOND)) {333gpr.MapReg(rt, MAP_DIRTY | MAP_NOINIT);334LDR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31));335if (gpr.GetImm(MIPS_REG_FPCOND) & 1) {336ORRI2R(gpr.R(rt), gpr.R(rt), 0x1 << 23, SCRATCH2);337} else {338ANDI2R(gpr.R(rt), gpr.R(rt), ~(0x1 << 23), SCRATCH2);339}340} else {341gpr.MapDirtyIn(rt, MIPS_REG_FPCOND);342LDR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31));343BFI(gpr.R(rt), gpr.R(MIPS_REG_FPCOND), 23, 1);344}345} else if (fs == 0) {346gpr.SetImm(rt, MIPSState::FCR0_VALUE);347} else {348// Unsupported regs are always 0.349gpr.SetImm(rt, 0);350}351return;352353case 4: //FI(fs) = R(rt); break; //mtc1354if (gpr.IsImm(rt)) {355// This can't be run on LO/HI.356uint32_t ival = (uint32_t)gpr.GetImm(rt);357float floatval;358memcpy(&floatval, &ival, sizeof(floatval));359uint8_t imm8;360// If zero, just zero it.361fpr.MapReg(fs, MAP_NOINIT | MAP_DIRTY);362if (ival == 0) {363fp.FMOV(fpr.R(fs), WZR); // This is supposedly special cased in hardware to be fast.364} else if (FPImm8FromFloat(floatval, &imm8)) {365fp.FMOV(fpr.R(fs), imm8);366} else {367// Materialize the register and do a cross move.368gpr.MapReg(rt);369fp.FMOV(fpr.R(fs), gpr.R(rt));370}371} else {372gpr.MapReg(rt);373fpr.MapReg(fs, MAP_NOINIT | MAP_DIRTY);374fp.FMOV(fpr.R(fs), gpr.R(rt));375}376return;377378case 6: //ctc1379if (fs == 31) {380// Must clear before setting, since ApplyRoundingMode() assumes it was cleared.381RestoreRoundingMode();382bool wasImm = gpr.IsImm(rt);383u32 immVal = -1;384if (wasImm) {385immVal = gpr.GetImm(rt);386gpr.SetImm(MIPS_REG_FPCOND, (immVal >> 23) & 1);387gpr.MapReg(rt);388} else {389gpr.MapDirtyIn(MIPS_REG_FPCOND, rt);390}391392// Update MIPS state393// TODO: Technically, should mask by 0x0181FFFF. Maybe just put all of FCR31 in the reg?394STR(INDEX_UNSIGNED, gpr.R(rt), CTXREG, offsetof(MIPSState, fcr31));395if (!wasImm) {396UBFX(gpr.R(MIPS_REG_FPCOND), gpr.R(rt), 23, 1);397// TODO: We do have the fcr31 value in a register here, could use that in UpdateRoundingMode to avoid reloading it.398UpdateRoundingMode();399} else {400UpdateRoundingMode(immVal);401}402ApplyRoundingMode();403} else {404Comp_Generic(op);405}406return;407default:408DISABLE;409break;410}411}412413} // namespace MIPSComp414415#endif // PPSSPP_ARCH(ARM64)416417418