CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/x86/X64IRJit.cpp
Views: 1401
// Copyright (c) 2023- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#if PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)1920#include <cstddef>21#include "Common/StringUtils.h"22#include "Core/MemMap.h"23#include "Core/MIPS/MIPSTables.h"24#include "Core/MIPS/x86/X64IRJit.h"25#include "Core/MIPS/x86/X64IRRegCache.h"2627namespace MIPSComp {2829using namespace Gen;30using namespace X64IRJitConstants;3132// Invalidations just need a MOV and JMP.33static constexpr int MIN_BLOCK_NORMAL_LEN = 10;34// As long as we can fit a JMP, we should be fine.35static constexpr int MIN_BLOCK_EXIT_LEN = 5;3637X64JitBackend::X64JitBackend(JitOptions &jitopt, IRBlockCache &blocks)38: IRNativeBackend(blocks), jo(jitopt), regs_(&jo) {39// Automatically disable incompatible options.40if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) {41jo.enablePointerify = false;42}43jo.optimizeForInterpreter = false;4445// Since we store the offset, this is as big as it can be.46AllocCodeSpace(1024 * 1024 * 16);4748regs_.Init(this);49}5051X64JitBackend::~X64JitBackend() {}5253static void NoBlockExits() {54_assert_msg_(false, "Never exited block, invalid IR?");55}5657bool X64JitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) {58if (GetSpaceLeft() < 0x800)59return false;6061IRBlock *block = irBlockCache->GetBlock(block_num);62u32 startPC = block->GetOriginalStart();63bool wroteCheckedOffset = false;64if (jo.enableBlocklink && !jo.useBackJump) {65SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));66wroteCheckedOffset = true;6768WriteDebugPC(startPC);6970// TODO: See if we can get flags to always have the downcount compare.71if (jo.downcountInRegister) {72TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));73} else {74CMP(32, MDisp(CTXREG, downcountOffset), Imm32(0));75}76FixupBranch normalEntry = J_CC(CC_NS);77MOV(32, R(SCRATCH1), Imm32(startPC));78JMP(outerLoopPCInSCRATCH1_, true);79SetJumpTarget(normalEntry);80}8182// Don't worry, the codespace isn't large enough to overflow offsets.83const u8 *blockStart = GetCodePointer();84block->SetNativeOffset((int)GetOffset(blockStart));85compilingBlockNum_ = block_num;86lastConstPC_ = 0;8788regs_.Start(irBlockCache, block_num);8990std::vector<const u8 *> addresses;91addresses.reserve(block->GetNumIRInstructions());92const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);93for (int i = 0; i < block->GetNumIRInstructions(); ++i) {94const IRInst &inst = instructions[i];95regs_.SetIRIndex(i);96addresses.push_back(GetCodePtr());9798CompileIRInst(inst);99100if (jo.Disabled(JitDisable::REGALLOC_GPR) || jo.Disabled(JitDisable::REGALLOC_FPR))101regs_.FlushAll(jo.Disabled(JitDisable::REGALLOC_GPR), jo.Disabled(JitDisable::REGALLOC_FPR));102103// Safety check, in case we get a bunch of really large jit ops without a lot of branching.104if (GetSpaceLeft() < 0x800) {105compilingBlockNum_ = -1;106return false;107}108}109110// We should've written an exit above. If we didn't, bad things will happen.111// Only check if debug stats are enabled - needlessly wastes jit space.112if (DebugStatsEnabled()) {113ABI_CallFunction((const void *)&NoBlockExits);114JMP(hooks_.crashHandler, true);115}116117int len = (int)GetOffset(GetCodePointer()) - block->GetNativeOffset();118if (len < MIN_BLOCK_NORMAL_LEN) {119// We need at least 10 bytes to invalidate blocks with.120ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - len);121}122123if (!wroteCheckedOffset) {124// Always record this, even if block link disabled - it's used for size calc.125SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));126}127128if (jo.enableBlocklink && jo.useBackJump) {129WriteDebugPC(startPC);130131if (jo.downcountInRegister) {132TEST(32, R(DOWNCOUNTREG), R(DOWNCOUNTREG));133} else {134CMP(32, MDisp(CTXREG, downcountOffset), Imm32(0));135}136J_CC(CC_NS, blockStart, true);137138MOV(32, R(SCRATCH1), Imm32(startPC));139JMP(outerLoopPCInSCRATCH1_, true);140}141142if (logBlocks_ > 0) {143--logBlocks_;144145std::map<const u8 *, int> addressesLookup;146for (int i = 0; i < (int)addresses.size(); ++i)147addressesLookup[addresses[i]] = i;148149INFO_LOG(Log::JIT, "=============== x86 (%08x, %d bytes) ===============", startPC, len);150const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);151for (const u8 *p = blockStart; p < GetCodePointer(); ) {152auto it = addressesLookup.find(p);153if (it != addressesLookup.end()) {154const IRInst &inst = instructions[it->second];155156char temp[512];157DisassembleIR(temp, sizeof(temp), inst);158INFO_LOG(Log::JIT, "IR: #%d %s", it->second, temp);159}160161auto next = std::next(it);162const u8 *nextp = next == addressesLookup.end() ? GetCodePointer() : next->first;163164auto lines = DisassembleX86(p, (int)(nextp - p));165for (const auto &line : lines)166INFO_LOG(Log::JIT, " X: %s", line.c_str());167p = nextp;168}169}170171compilingBlockNum_ = -1;172173return true;174}175176void X64JitBackend::WriteConstExit(uint32_t pc) {177int block_num = blocks_.GetBlockNumberFromStartAddress(pc);178const IRNativeBlock *nativeBlock = GetNativeBlock(block_num);179180int exitStart = (int)GetOffset(GetCodePointer());181if (block_num >= 0 && jo.enableBlocklink && nativeBlock && nativeBlock->checkedOffset != 0) {182JMP(GetBasePtr() + nativeBlock->checkedOffset, true);183} else {184MOV(32, R(SCRATCH1), Imm32(pc));185JMP(dispatcherPCInSCRATCH1_, true);186}187188if (jo.enableBlocklink) {189// In case of compression or early link, make sure it's large enough.190int len = (int)GetOffset(GetCodePointer()) - exitStart;191if (len < MIN_BLOCK_EXIT_LEN) {192ReserveCodeSpace(MIN_BLOCK_EXIT_LEN - len);193len = MIN_BLOCK_EXIT_LEN;194}195196AddLinkableExit(compilingBlockNum_, pc, exitStart, len);197}198}199200void X64JitBackend::OverwriteExit(int srcOffset, int len, int block_num) {201_dbg_assert_(len >= MIN_BLOCK_EXIT_LEN);202203const IRNativeBlock *nativeBlock = GetNativeBlock(block_num);204if (nativeBlock) {205u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + srcOffset;206if (PlatformIsWXExclusive()) {207ProtectMemoryPages(writable, len, MEM_PROT_READ | MEM_PROT_WRITE);208}209210XEmitter emitter(writable);211emitter.JMP(GetBasePtr() + nativeBlock->checkedOffset, true);212int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable);213_dbg_assert_(bytesWritten <= MIN_BLOCK_EXIT_LEN);214if (bytesWritten < len)215emitter.ReserveCodeSpace(len - bytesWritten);216217if (PlatformIsWXExclusive()) {218ProtectMemoryPages(writable, 16, MEM_PROT_READ | MEM_PROT_EXEC);219}220}221}222223void X64JitBackend::CompIR_Generic(IRInst inst) {224// If we got here, we're going the slow way.225uint64_t value;226memcpy(&value, &inst, sizeof(inst));227228FlushAll();229SaveStaticRegisters();230WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);231#if PPSSPP_ARCH(AMD64)232ABI_CallFunctionP((const void *)&DoIRInst, (void *)value);233#else234ABI_CallFunctionCC((const void *)&DoIRInst, (u32)(value & 0xFFFFFFFF), (u32)(value >> 32));235#endif236WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);237LoadStaticRegisters();238239// We only need to check the return value if it's a potential exit.240if ((GetIRMeta(inst.op)->flags & IRFLAG_EXIT) != 0) {241// Result in RAX aka SCRATCH1.242_assert_(RAX == SCRATCH1);243CMP(32, R(SCRATCH1), Imm32(0));244J_CC(CC_NE, dispatcherPCInSCRATCH1_);245}246}247248void X64JitBackend::CompIR_Interpret(IRInst inst) {249MIPSOpcode op(inst.constant);250251// IR protects us against this being a branching instruction (well, hopefully.)252FlushAll();253SaveStaticRegisters();254WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);255if (DebugStatsEnabled()) {256ABI_CallFunctionP((const void *)&NotifyMIPSInterpret, (void *)MIPSGetName(op));257}258ABI_CallFunctionC((const void *)MIPSGetInterpretFunc(op), inst.constant);259WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);260LoadStaticRegisters();261}262263void X64JitBackend::FlushAll() {264regs_.FlushAll();265}266267bool X64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {268// Used in disassembly viewer and profiling tools.269// Don't use spaces; profilers get confused or truncate them.270if (ptr == dispatcherPCInSCRATCH1_) {271name = "dispatcherPCInSCRATCH1";272} else if (ptr == outerLoopPCInSCRATCH1_) {273name = "outerLoopPCInSCRATCH1";274} else if (ptr == dispatcherNoCheck_) {275name = "dispatcherNoCheck";276} else if (ptr == saveStaticRegisters_) {277name = "saveStaticRegisters";278} else if (ptr == loadStaticRegisters_) {279name = "loadStaticRegisters";280} else if (ptr == restoreRoundingMode_) {281name = "restoreRoundingMode";282} else if (ptr == applyRoundingMode_) {283name = "applyRoundingMode";284} else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) {285if (ptr == constants.noSignMask) {286name = "constants.noSignMask";287} else if (ptr == constants.signBitAll) {288name = "constants.signBitAll";289} else if (ptr == constants.positiveZeroes) {290name = "constants.positiveZeroes";291} else if (ptr == constants.positiveInfinity) {292name = "constants.positiveInfinity";293} else if (ptr == constants.positiveOnes) {294name = "constants.positiveOnes";295} else if (ptr == constants.negativeOnes) {296name = "constants.negativeOnes";297} else if (ptr == constants.qNAN) {298name = "constants.qNAN";299} else if (ptr == constants.maxIntBelowAsFloat) {300name = "constants.maxIntBelowAsFloat";301} else if ((const float *)ptr >= constants.mulTableVi2f && (const float *)ptr < constants.mulTableVi2f + 32) {302name = StringFromFormat("constants.mulTableVi2f[%d]", (int)((const float *)ptr - constants.mulTableVi2f));303} else if ((const float *)ptr >= constants.mulTableVf2i && (const float *)ptr < constants.mulTableVf2i + 32) {304name = StringFromFormat("constants.mulTableVf2i[%d]", (int)((const float *)ptr - constants.mulTableVf2i));305} else if ((const Float4Constant *)ptr >= constants.vec4InitValues && (const Float4Constant *)ptr < constants.vec4InitValues + 8) {306name = StringFromFormat("constants.vec4InitValues[%d]", (int)((const Float4Constant *)ptr - constants.vec4InitValues));307} else {308name = "fixedCode";309}310} else {311return IRNativeBackend::DescribeCodePtr(ptr, name);312}313return true;314}315316void X64JitBackend::ClearAllBlocks() {317ClearCodeSpace(jitStartOffset_);318EraseAllLinks(-1);319}320321void X64JitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) {322IRBlock *block = irBlockCache->GetBlock(block_num);323int offset = block->GetNativeOffset();324u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset;325326// Overwrite the block with a jump to compile it again.327u32 pc = block->GetOriginalStart();328if (pc != 0) {329// Hopefully we always have at least 16 bytes, which should be all we need.330if (PlatformIsWXExclusive()) {331ProtectMemoryPages(writable, MIN_BLOCK_NORMAL_LEN, MEM_PROT_READ | MEM_PROT_WRITE);332}333334XEmitter emitter(writable);335emitter.MOV(32, R(SCRATCH1), Imm32(pc));336emitter.JMP(dispatcherPCInSCRATCH1_, true);337int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable);338if (bytesWritten < MIN_BLOCK_NORMAL_LEN)339emitter.ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - bytesWritten);340341if (PlatformIsWXExclusive()) {342ProtectMemoryPages(writable, MIN_BLOCK_NORMAL_LEN, MEM_PROT_READ | MEM_PROT_EXEC);343}344}345346EraseAllLinks(block_num);347}348349void X64JitBackend::RestoreRoundingMode(bool force) {350CALL(restoreRoundingMode_);351}352353void X64JitBackend::ApplyRoundingMode(bool force) {354CALL(applyRoundingMode_);355}356357void X64JitBackend::MovFromPC(X64Reg r) {358MOV(32, R(r), MDisp(CTXREG, pcOffset));359}360361void X64JitBackend::MovToPC(X64Reg r) {362MOV(32, MDisp(CTXREG, pcOffset), R(r));363}364365void X64JitBackend::WriteDebugPC(uint32_t pc) {366if (hooks_.profilerPC)367MOV(32, M(hooks_.profilerPC), Imm32(pc));368}369370void X64JitBackend::WriteDebugPC(Gen::X64Reg r) {371if (hooks_.profilerPC)372MOV(32, M(hooks_.profilerPC), R(r));373}374375void X64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {376if (hooks_.profilerPC)377MOV(32, M(hooks_.profilerStatus), Imm32((int32_t)status));378}379380void X64JitBackend::SaveStaticRegisters() {381if (jo.useStaticAlloc) {382//CALL(saveStaticRegisters_);383} else if (jo.downcountInRegister) {384// Inline the single operation385MOV(32, MDisp(CTXREG, downcountOffset), R(DOWNCOUNTREG));386}387}388389void X64JitBackend::LoadStaticRegisters() {390if (jo.useStaticAlloc) {391//CALL(loadStaticRegisters_);392} else if (jo.downcountInRegister) {393MOV(32, R(DOWNCOUNTREG), MDisp(CTXREG, downcountOffset));394}395}396397void X64JitBackend::EmitConst4x32(const void **c, uint32_t v) {398*c = AlignCode16();399for (int i = 0; i < 4; ++i)400Write32(v);401}402403} // namespace MIPSComp404405#endif406407408