CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/ARM64/Arm64IRJit.cpp
Views: 1401
// Copyright (c) 2023- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18// In other words, PPSSPP_ARCH(ARM64) || DISASM_ALL.19#if PPSSPP_ARCH(ARM64) || (PPSSPP_PLATFORM(WINDOWS) && !defined(__LIBRETRO__))2021#include <cstddef>22#include "Core/MemMap.h"23#include "Core/MIPS/MIPSTables.h"24#include "Core/MIPS/ARM64/Arm64IRJit.h"25#include "Core/MIPS/ARM64/Arm64IRRegCache.h"2627#include <algorithm>28// for std::min2930namespace MIPSComp {3132using namespace Arm64Gen;33using namespace Arm64IRJitConstants;3435// Invalidations just need at most two MOVs and B.36static constexpr int MIN_BLOCK_NORMAL_LEN = 12;37// As long as we can fit a B, we should be fine.38static constexpr int MIN_BLOCK_EXIT_LEN = 4;3940Arm64JitBackend::Arm64JitBackend(JitOptions &jitopt, IRBlockCache &blocks)41: IRNativeBackend(blocks), jo(jitopt), regs_(&jo), fp_(this) {42// Automatically disable incompatible options.43if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) {44jo.enablePointerify = false;45}46jo.optimizeForInterpreter = false;47#ifdef MASKED_PSP_MEMORY48jo.enablePointerify = false;49#endif5051// Since we store the offset, this is as big as it can be.52AllocCodeSpace(1024 * 1024 * 16);5354regs_.Init(this, &fp_);55}5657Arm64JitBackend::~Arm64JitBackend() {}5859void Arm64JitBackend::UpdateFCR31(MIPSState *mipsState) {60currentRoundingFunc_ = convertS0ToSCRATCH1_[mipsState->fcr31 & 3];61}6263static void NoBlockExits() {64_assert_msg_(false, "Never exited block, invalid IR?");65}6667bool Arm64JitBackend::CompileBlock(IRBlockCache *irBlockCache, int block_num, bool preload) {68if (GetSpaceLeft() < 0x800)69return false;7071IRBlock *block = irBlockCache->GetBlock(block_num);72BeginWrite(std::min(GetSpaceLeft(), (size_t)block->GetNumIRInstructions() * 32));7374u32 startPC = block->GetOriginalStart();75bool wroteCheckedOffset = false;76if (jo.enableBlocklink && !jo.useBackJump) {77SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));78wroteCheckedOffset = true;7980WriteDebugPC(startPC);8182// Check the sign bit to check if negative.83FixupBranch normalEntry = TBZ(DOWNCOUNTREG, 31);84MOVI2R(SCRATCH1, startPC);85B(outerLoopPCInSCRATCH1_);86SetJumpTarget(normalEntry);87}8889// Don't worry, the codespace isn't large enough to overflow offsets.90const u8 *blockStart = GetCodePointer();91block->SetNativeOffset((int)GetOffset(blockStart));92compilingBlockNum_ = block_num;93lastConstPC_ = 0;9495regs_.Start(irBlockCache, block_num);9697std::vector<const u8 *> addresses;98addresses.reserve(block->GetNumIRInstructions());99const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);100for (int i = 0; i < block->GetNumIRInstructions(); ++i) {101const IRInst &inst = instructions[i];102regs_.SetIRIndex(i);103addresses.push_back(GetCodePtr());104105CompileIRInst(inst);106107if (jo.Disabled(JitDisable::REGALLOC_GPR) || jo.Disabled(JitDisable::REGALLOC_FPR))108regs_.FlushAll(jo.Disabled(JitDisable::REGALLOC_GPR), jo.Disabled(JitDisable::REGALLOC_FPR));109110// Safety check, in case we get a bunch of really large jit ops without a lot of branching.111if (GetSpaceLeft() < 0x800) {112compilingBlockNum_ = -1;113return false;114}115}116117// We should've written an exit above. If we didn't, bad things will happen.118// Only check if debug stats are enabled - needlessly wastes jit space.119if (DebugStatsEnabled()) {120QuickCallFunction(SCRATCH2_64, &NoBlockExits);121B(hooks_.crashHandler);122}123124int len = (int)GetOffset(GetCodePointer()) - block->GetNativeOffset();125if (len < MIN_BLOCK_NORMAL_LEN) {126// We need at least 10 bytes to invalidate blocks with.127ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - len);128}129130if (!wroteCheckedOffset) {131// Always record this, even if block link disabled - it's used for size calc.132SetBlockCheckedOffset(block_num, (int)GetOffset(GetCodePointer()));133}134135if (jo.enableBlocklink && jo.useBackJump) {136WriteDebugPC(startPC);137138// Small blocks are common, check if it's < 32KB long.139ptrdiff_t distance = blockStart - GetCodePointer();140if (distance >= -0x8000 && distance < 0x8000) {141TBZ(DOWNCOUNTREG, 31, blockStart);142} else {143FixupBranch toDispatch = TBNZ(DOWNCOUNTREG, 31);144B(blockStart);145SetJumpTarget(toDispatch);146}147148MOVI2R(SCRATCH1, startPC);149B(outerLoopPCInSCRATCH1_);150}151152if (logBlocks_ > 0) {153--logBlocks_;154155std::map<const u8 *, int> addressesLookup;156for (int i = 0; i < (int)addresses.size(); ++i)157addressesLookup[addresses[i]] = i;158159INFO_LOG(Log::JIT, "=============== ARM64 (%08x, %d bytes) ===============", startPC, len);160const IRInst *instructions = irBlockCache->GetBlockInstructionPtr(*block);161for (const u8 *p = blockStart; p < GetCodePointer(); ) {162auto it = addressesLookup.find(p);163if (it != addressesLookup.end()) {164const IRInst &inst = instructions[it->second];165166char temp[512];167DisassembleIR(temp, sizeof(temp), inst);168INFO_LOG(Log::JIT, "IR: #%d %s", it->second, temp);169}170171auto next = std::next(it);172const u8 *nextp = next == addressesLookup.end() ? GetCodePointer() : next->first;173174auto lines = DisassembleArm64(p, (int)(nextp - p));175for (const auto &line : lines)176INFO_LOG(Log::JIT, " A: %s", line.c_str());177p = nextp;178}179}180181EndWrite();182FlushIcache();183compilingBlockNum_ = -1;184185return true;186}187188void Arm64JitBackend::WriteConstExit(uint32_t pc) {189int block_num = blocks_.GetBlockNumberFromStartAddress(pc);190const IRNativeBlock *nativeBlock = GetNativeBlock(block_num);191192int exitStart = (int)GetOffset(GetCodePointer());193if (block_num >= 0 && jo.enableBlocklink && nativeBlock && nativeBlock->checkedOffset != 0) {194B(GetBasePtr() + nativeBlock->checkedOffset);195} else {196MOVI2R(SCRATCH1, pc);197B(dispatcherPCInSCRATCH1_);198}199200if (jo.enableBlocklink) {201// In case of compression or early link, make sure it's large enough.202int len = (int)GetOffset(GetCodePointer()) - exitStart;203if (len < MIN_BLOCK_EXIT_LEN) {204ReserveCodeSpace(MIN_BLOCK_EXIT_LEN - len);205len = MIN_BLOCK_EXIT_LEN;206}207208AddLinkableExit(compilingBlockNum_, pc, exitStart, len);209}210}211212void Arm64JitBackend::OverwriteExit(int srcOffset, int len, int block_num) {213_dbg_assert_(len >= MIN_BLOCK_EXIT_LEN);214215const IRNativeBlock *nativeBlock = GetNativeBlock(block_num);216if (nativeBlock) {217u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + srcOffset;218if (PlatformIsWXExclusive()) {219ProtectMemoryPages(writable, len, MEM_PROT_READ | MEM_PROT_WRITE);220}221222ARM64XEmitter emitter(GetBasePtr() + srcOffset, writable);223emitter.B(GetBasePtr() + nativeBlock->checkedOffset);224int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable);225_dbg_assert_(bytesWritten <= MIN_BLOCK_EXIT_LEN);226if (bytesWritten < len)227emitter.ReserveCodeSpace(len - bytesWritten);228emitter.FlushIcache();229230if (PlatformIsWXExclusive()) {231ProtectMemoryPages(writable, 16, MEM_PROT_READ | MEM_PROT_EXEC);232}233}234}235236void Arm64JitBackend::CompIR_Generic(IRInst inst) {237// If we got here, we're going the slow way.238uint64_t value;239memcpy(&value, &inst, sizeof(inst));240241FlushAll();242SaveStaticRegisters();243WriteDebugProfilerStatus(IRProfilerStatus::IR_INTERPRET);244MOVI2R(X0, value);245QuickCallFunction(SCRATCH2_64, &DoIRInst);246WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);247LoadStaticRegisters();248249// We only need to check the return value if it's a potential exit.250if ((GetIRMeta(inst.op)->flags & IRFLAG_EXIT) != 0) {251MOV(SCRATCH1, X0);252253ptrdiff_t distance = dispatcherPCInSCRATCH1_ - GetCodePointer();254if (distance >= -0x100000 && distance < 0x100000) {255// Convenient, we can do a simple branch if within 1MB.256CBNZ(W0, dispatcherPCInSCRATCH1_);257} else {258// That's a shame, we need a long branch.259FixupBranch keepOnKeepingOn = CBZ(W0);260B(dispatcherPCInSCRATCH1_);261SetJumpTarget(keepOnKeepingOn);262}263}264}265266void Arm64JitBackend::CompIR_Interpret(IRInst inst) {267MIPSOpcode op(inst.constant);268269// IR protects us against this being a branching instruction (well, hopefully.)270FlushAll();271SaveStaticRegisters();272WriteDebugProfilerStatus(IRProfilerStatus::INTERPRET);273if (DebugStatsEnabled()) {274MOVP2R(X0, MIPSGetName(op));275QuickCallFunction(SCRATCH2_64, &NotifyMIPSInterpret);276}277MOVI2R(X0, inst.constant);278QuickCallFunction(SCRATCH2_64, MIPSGetInterpretFunc(op));279WriteDebugProfilerStatus(IRProfilerStatus::IN_JIT);280LoadStaticRegisters();281}282283void Arm64JitBackend::FlushAll() {284regs_.FlushAll();285}286287bool Arm64JitBackend::DescribeCodePtr(const u8 *ptr, std::string &name) const {288// Used in disassembly viewer and profiling tools.289// Don't use spaces; profilers get confused or truncate them.290if (ptr == dispatcherPCInSCRATCH1_) {291name = "dispatcherPCInSCRATCH1";292} else if (ptr == outerLoopPCInSCRATCH1_) {293name = "outerLoopPCInSCRATCH1";294} else if (ptr == dispatcherNoCheck_) {295name = "dispatcherNoCheck";296} else if (ptr == saveStaticRegisters_) {297name = "saveStaticRegisters";298} else if (ptr == loadStaticRegisters_) {299name = "loadStaticRegisters";300} else if (ptr == restoreRoundingMode_) {301name = "restoreRoundingMode";302} else if (ptr == applyRoundingMode_) {303name = "applyRoundingMode";304} else if (ptr == updateRoundingMode_) {305name = "updateRoundingMode";306} else if (ptr == currentRoundingFunc_) {307name = "currentRoundingFunc";308} else if (ptr >= convertS0ToSCRATCH1_[0] && ptr <= convertS0ToSCRATCH1_[7]) {309name = "convertS0ToSCRATCH1";310} else if (ptr >= GetBasePtr() && ptr < GetBasePtr() + jitStartOffset_) {311name = "fixedCode";312} else {313return IRNativeBackend::DescribeCodePtr(ptr, name);314}315return true;316}317318void Arm64JitBackend::ClearAllBlocks() {319ClearCodeSpace(jitStartOffset_);320FlushIcacheSection(region + jitStartOffset_, region + region_size - jitStartOffset_);321EraseAllLinks(-1);322}323324void Arm64JitBackend::InvalidateBlock(IRBlockCache *irBlockCache, int block_num) {325IRBlock *block = irBlockCache->GetBlock(block_num);326int offset = block->GetNativeOffset();327u8 *writable = GetWritablePtrFromCodePtr(GetBasePtr()) + offset;328329// Overwrite the block with a jump to compile it again.330u32 pc = block->GetOriginalStart();331if (pc != 0) {332// Hopefully we always have at least 16 bytes, which should be all we need.333if (PlatformIsWXExclusive()) {334ProtectMemoryPages(writable, MIN_BLOCK_NORMAL_LEN, MEM_PROT_READ | MEM_PROT_WRITE);335}336337ARM64XEmitter emitter(GetBasePtr() + offset, writable);338emitter.MOVI2R(SCRATCH1, pc);339emitter.B(dispatcherPCInSCRATCH1_);340int bytesWritten = (int)(emitter.GetWritableCodePtr() - writable);341if (bytesWritten < MIN_BLOCK_NORMAL_LEN)342emitter.ReserveCodeSpace(MIN_BLOCK_NORMAL_LEN - bytesWritten);343emitter.FlushIcache();344345if (PlatformIsWXExclusive()) {346ProtectMemoryPages(writable, MIN_BLOCK_NORMAL_LEN, MEM_PROT_READ | MEM_PROT_EXEC);347}348}349350EraseAllLinks(block_num);351}352353void Arm64JitBackend::RestoreRoundingMode(bool force) {354QuickCallFunction(SCRATCH2_64, restoreRoundingMode_);355}356357void Arm64JitBackend::ApplyRoundingMode(bool force) {358QuickCallFunction(SCRATCH2_64, applyRoundingMode_);359}360361void Arm64JitBackend::UpdateRoundingMode(bool force) {362QuickCallFunction(SCRATCH2_64, updateRoundingMode_);363}364365void Arm64JitBackend::MovFromPC(ARM64Reg r) {366LDR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));367}368369void Arm64JitBackend::MovToPC(ARM64Reg r) {370STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));371}372373void Arm64JitBackend::WriteDebugPC(uint32_t pc) {374if (hooks_.profilerPC) {375int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());376MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);377MOVI2R(SCRATCH1, pc);378STR(SCRATCH1, JITBASEREG, SCRATCH2);379}380}381382void Arm64JitBackend::WriteDebugPC(ARM64Reg r) {383if (hooks_.profilerPC) {384int offset = (int)((const u8 *)hooks_.profilerPC - GetBasePtr());385MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);386STR(r, JITBASEREG, SCRATCH2);387}388}389390void Arm64JitBackend::WriteDebugProfilerStatus(IRProfilerStatus status) {391if (hooks_.profilerPC) {392int offset = (int)((const u8 *)hooks_.profilerStatus - GetBasePtr());393MOVI2R(SCRATCH2, MIPS_EMUHACK_OPCODE + offset);394MOVI2R(SCRATCH1, (int)status);395STR(SCRATCH1, JITBASEREG, SCRATCH2);396}397}398399void Arm64JitBackend::SaveStaticRegisters() {400if (jo.useStaticAlloc) {401QuickCallFunction(SCRATCH2_64, saveStaticRegisters_);402} else {403// Inline the single operation404STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));405}406}407408void Arm64JitBackend::LoadStaticRegisters() {409if (jo.useStaticAlloc) {410QuickCallFunction(SCRATCH2_64, loadStaticRegisters_);411} else {412LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));413}414}415416} // namespace MIPSComp417418#endif419420421