CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/x86/X64IRAsm.cpp
Views: 1401
// Copyright (c) 2023- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)1920#include "Common/Log.h"21#include "Core/CoreTiming.h"22#include "Core/MemMap.h"23#include "Core/MIPS/x86/X64IRJit.h"24#include "Core/MIPS/x86/X64IRRegCache.h"25#include "Core/MIPS/JitCommon/JitCommon.h"26#include "Core/MIPS/JitCommon/JitState.h"27#include "Core/System.h"2829namespace MIPSComp {3031using namespace Gen;32using namespace X64IRJitConstants;3334static const bool enableDebug = false;35static const bool enableDisasm = false;3637static void ShowPC(void *membase, void *jitbase) {38static int count = 0;39if (currentMIPS) {40u32 downcount = currentMIPS->downcount;41ERROR_LOG(Log::JIT, "[%08x] ShowPC Downcount : %08x %d %p %p", currentMIPS->pc, downcount, count, membase, jitbase);42} else {43ERROR_LOG(Log::JIT, "Universe corrupt?");44}45//if (count > 2000)46// exit(0);47count++;48}4950void X64JitBackend::GenerateFixedCode(MIPSState *mipsState) {51// This will be used as a writable scratch area, always 32-bit accessible.52const u8 *start = AlignCodePage();53if (DebugProfilerEnabled()) {54ProtectMemoryPages(start, GetMemoryProtectPageSize(), MEM_PROT_READ | MEM_PROT_WRITE);55hooks_.profilerPC = (uint32_t *)GetWritableCodePtr();56Write32(0);57hooks_.profilerStatus = (IRProfilerStatus *)GetWritableCodePtr();58Write32(0);59}6061EmitFPUConstants();62EmitVecConstants();6364const u8 *disasmStart = AlignCodePage();65BeginWrite(GetMemoryProtectPageSize());6667jo.downcountInRegister = false;68#if PPSSPP_ARCH(AMD64)69bool jitbaseInR15 = false;70int jitbaseCtxDisp = 0;71// We pre-bake the MIPS_EMUHACK_OPCODE subtraction into our jitbase value.72intptr_t jitbase = (intptr_t)GetBasePtr() - MIPS_EMUHACK_OPCODE;73if ((jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) && !Accessible((const u8 *)&mipsState->f[0], (const u8 *)jitbase)) {74jo.reserveR15ForAsm = true;75jitbaseInR15 = true;76} else {77jo.downcountInRegister = true;78jo.reserveR15ForAsm = true;79if (jitbase < -0x80000000LL || jitbase > 0x7FFFFFFFLL) {80jitbaseCtxDisp = (int)(jitbase - (intptr_t)&mipsState->f[0]);81}82}83#endif8485if (jo.useStaticAlloc && false) {86saveStaticRegisters_ = AlignCode16();87if (jo.downcountInRegister)88MOV(32, MDisp(CTXREG, downcountOffset), R(DOWNCOUNTREG));89//regs_.EmitSaveStaticRegisters();90RET();9192// Note: needs to not modify EAX, or to save it if it does.93loadStaticRegisters_ = AlignCode16();94//regs_.EmitLoadStaticRegisters();95if (jo.downcountInRegister)96MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset));97RET();98} else {99saveStaticRegisters_ = nullptr;100loadStaticRegisters_ = nullptr;101}102103restoreRoundingMode_ = AlignCode16();104{105STMXCSR(MDisp(CTXREG, tempOffset));106// Clear the rounding mode and flush-to-zero bits back to 0.107AND(32, MDisp(CTXREG, tempOffset), Imm32(~(7 << 13)));108LDMXCSR(MDisp(CTXREG, tempOffset));109RET();110}111112applyRoundingMode_ = AlignCode16();113{114MOV(32, R(SCRATCH1), MDisp(CTXREG, fcr31Offset));115AND(32, R(SCRATCH1), Imm32(0x01000003));116117// If it's 0 (nearest + no flush0), we don't actually bother setting - we cleared the rounding118// mode out in restoreRoundingMode anyway. This is the most common.119FixupBranch skip = J_CC(CC_Z);120STMXCSR(MDisp(CTXREG, tempOffset));121122// The MIPS bits don't correspond exactly, so we have to adjust.123// 0 -> 0 (skip2), 1 -> 3, 2 -> 2 (skip2), 3 -> 1124TEST(8, R(AL), Imm8(1));125FixupBranch skip2 = J_CC(CC_Z);126XOR(32, R(SCRATCH1), Imm8(2));127SetJumpTarget(skip2);128129// Adjustment complete, now reconstruct MXCSR130SHL(32, R(SCRATCH1), Imm8(13));131// Before setting new bits, we must clear the old ones.132// Clearing bits 13-14 (rounding mode) and 15 (flush to zero.)133AND(32, MDisp(CTXREG, tempOffset), Imm32(~(7 << 13)));134OR(32, MDisp(CTXREG, tempOffset), R(SCRATCH1));135136TEST(32, MDisp(CTXREG, fcr31Offset), Imm32(1 << 24));137FixupBranch skip3 = J_CC(CC_Z);138OR(32, MDisp(CTXREG, tempOffset), Imm32(1 << 15));139SetJumpTarget(skip3);140141LDMXCSR(MDisp(CTXREG, tempOffset));142SetJumpTarget(skip);143RET();144}145146hooks_.enterDispatcher = (IRNativeFuncNoArg)AlignCode16();147148ABI_PushAllCalleeSavedRegsAndAdjustStack();149#if PPSSPP_ARCH(AMD64)150// Two x64-specific statically allocated registers.151MOV(64, R(MEMBASEREG), ImmPtr(Memory::base));152if (jitbaseInR15)153MOV(64, R(JITBASEREG), ImmPtr((const void *)jitbase));154#endif155// From the start of the FP reg, a single byte offset can reach all GPR + all FPR (but not VFPR.)156MOV(PTRBITS, R(CTXREG), ImmPtr(&mipsState->f[0]));157158LoadStaticRegisters();159WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);160MovFromPC(SCRATCH1);161WriteDebugPC(SCRATCH1);162outerLoopPCInSCRATCH1_ = GetCodePtr();163MovToPC(SCRATCH1);164outerLoop_ = GetCodePtr();165// Advance can change the downcount (or thread), so must save/restore around it.166SaveStaticRegisters();167RestoreRoundingMode(true);168WriteDebugProfilerStatus(IRProfilerStatus::TIMER_ADVANCE);169ABI_CallFunction(reinterpret_cast<void *>(&CoreTiming::Advance));170WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);171ApplyRoundingMode(true);172LoadStaticRegisters();173174dispatcherCheckCoreState_ = GetCodePtr();175// TODO: See if we can get the slice decrement to line up with IR.176177if (RipAccessible((const void *)&coreState)) {178CMP(32, M(&coreState), Imm8(0)); // rip accessible179} else {180MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));181CMP(32, MatR(RAX), Imm8(0));182}183FixupBranch badCoreState = J_CC(CC_NZ, true);184185if (jo.downcountInRegister) {186TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));187} else {188CMP(32, MDisp(CTXREG, downcountOffset), Imm8(0));189}190J_CC(CC_S, outerLoop_);191FixupBranch skipToRealDispatch = J();192193dispatcherPCInSCRATCH1_ = GetCodePtr();194MovToPC(SCRATCH1);195196hooks_.dispatcher = GetCodePtr();197198// TODO: See if we can get the slice decrement to line up with IR.199if (jo.downcountInRegister) {200TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));201} else {202CMP(32, MDisp(CTXREG, downcountOffset), Imm8(0));203}204FixupBranch bail = J_CC(CC_S, true);205SetJumpTarget(skipToRealDispatch);206207dispatcherNoCheck_ = GetCodePtr();208209// Debug210if (enableDebug) {211#if PPSSPP_ARCH(AMD64)212if (jitbaseInR15) {213ABI_CallFunctionAA(reinterpret_cast<void *>(&ShowPC), R(MEMBASEREG), R(JITBASEREG));214} else if (jitbaseCtxDisp != 0) {215LEA(64, SCRATCH1, MDisp(CTXREG, jitbaseCtxDisp));216ABI_CallFunctionAA(reinterpret_cast<void *>(&ShowPC), R(MEMBASEREG), R(SCRATCH1));217} else {218ABI_CallFunctionAC(reinterpret_cast<void *>(&ShowPC), R(MEMBASEREG), (u32)jitbase);219}220#else221ABI_CallFunctionCC(reinterpret_cast<void *>(&ShowPC), (u32)Memory::base, (u32)GetBasePtr());222#endif223}224225MovFromPC(SCRATCH1);226WriteDebugPC(SCRATCH1);227#ifdef MASKED_PSP_MEMORY228AND(32, R(SCRATCH1), Imm32(Memory::MEMVIEW32_MASK));229#endif230hooks_.dispatchFetch = GetCodePtr();231#if PPSSPP_ARCH(X86)232_assert_msg_( Memory::base != 0, "Memory base bogus");233MOV(32, R(SCRATCH1), MDisp(SCRATCH1, (u32)Memory::base));234#elif PPSSPP_ARCH(AMD64)235MOV(32, R(SCRATCH1), MComplex(MEMBASEREG, SCRATCH1, SCALE_1, 0));236#endif237_assert_msg_(MIPS_JITBLOCK_MASK == 0xFF000000, "Hardcoded assumption of emuhack mask");238if (cpu_info.bBMI2) {239RORX(32, EDX, R(SCRATCH1), 24);240CMP(8, R(EDX), Imm8(MIPS_EMUHACK_OPCODE >> 24));241} else {242MOV(32, R(EDX), R(SCRATCH1));243SHR(32, R(EDX), Imm8(24));244CMP(32, R(EDX), Imm8(MIPS_EMUHACK_OPCODE >> 24));245}246FixupBranch needsCompile = J_CC(CC_NE);247// We don't mask here - that's baked into jitbase.248#if PPSSPP_ARCH(X86)249LEA(32, SCRATCH1, MDisp(SCRATCH1, (u32)GetBasePtr() - MIPS_EMUHACK_OPCODE));250#elif PPSSPP_ARCH(AMD64)251if (jitbaseInR15) {252ADD(64, R(SCRATCH1), R(JITBASEREG));253} else if (jitbaseCtxDisp) {254LEA(64, SCRATCH1, MComplex(CTXREG, SCRATCH1, SCALE_1, jitbaseCtxDisp));255} else {256// See above, reserveR15ForAsm is used when above 0x7FFFFFFF.257LEA(64, SCRATCH1, MDisp(SCRATCH1, (s32)jitbase));258}259#endif260JMPptr(R(SCRATCH1));261SetJumpTarget(needsCompile);262263// No block found, let's jit. We don't need to save static regs, they're all callee saved.264RestoreRoundingMode(true);265WriteDebugProfilerStatus(IRProfilerStatus::COMPILING);266ABI_CallFunction(&MIPSComp::JitAt);267WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);268ApplyRoundingMode(true);269// Let's just dispatch again, we'll enter the block since we know it's there.270JMP(dispatcherNoCheck_, true);271272SetJumpTarget(bail);273274if (RipAccessible((const void *)&coreState)) {275CMP(32, M(&coreState), Imm8(0)); // rip accessible276} else {277MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));278CMP(32, MatR(RAX), Imm8(0));279}280J_CC(CC_Z, outerLoop_, true);281282const uint8_t *quitLoop = GetCodePtr();283SetJumpTarget(badCoreState);284285WriteDebugProfilerStatus(IRProfilerStatus::NOT_RUNNING);286SaveStaticRegisters();287RestoreRoundingMode(true);288ABI_PopAllCalleeSavedRegsAndAdjustStack();289RET();290291hooks_.crashHandler = GetCodePtr();292if (RipAccessible((const void *)&coreState)) {293MOV(32, M(&coreState), Imm32(CORE_RUNTIME_ERROR));294} else {295MOV(PTRBITS, R(RAX), ImmPtr((const void *)&coreState));296MOV(32, MatR(RAX), Imm32(CORE_RUNTIME_ERROR));297}298JMP(quitLoop, true);299300301// Leave this at the end, add more stuff above.302if (enableDisasm) {303#if PPSSPP_ARCH(AMD64)304std::vector<std::string> lines = DisassembleX86(disasmStart, (int)(GetCodePtr() - disasmStart));305for (auto s : lines) {306INFO_LOG(Log::JIT, "%s", s.c_str());307}308#endif309}310311// Let's spare the pre-generated code from unprotect-reprotect.312AlignCodePage();313jitStartOffset_ = (int)(GetCodePtr() - start);314EndWrite();315}316317} // namespace MIPSComp318319#endif320321322