CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/ARM64/Arm64Jit.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"1819#if PPSSPP_ARCH(ARM64)2021#include "Common/Profiler/Profiler.h"22#include "Common/Log.h"23#include "Common/Serialize/Serializer.h"24#include "Common/Serialize/SerializeFuncs.h"25#include "Common/CPUDetect.h"26#include "Common/StringUtils.h"2728#include "Core/Reporting.h"29#include "Core/Config.h"30#include "Core/Core.h"31#include "Core/CoreTiming.h"32#include "Core/Debugger/Breakpoints.h"33#include "Core/Debugger/SymbolMap.h"34#include "Core/MemMap.h"3536#include "Core/MIPS/MIPS.h"37#include "Core/MIPS/MIPSAnalyst.h"38#include "Core/MIPS/MIPSCodeUtils.h"39#include "Core/MIPS/MIPSInt.h"40#include "Core/MIPS/MIPSTables.h"41#include "Core/HLE/ReplaceTables.h"42#include "Core/MIPS/ARM64/Arm64RegCache.h"43#include "Core/MIPS/ARM64/Arm64RegCacheFPU.h"4445#include "Core/MIPS/ARM64/Arm64Jit.h"46#include "Core/MIPS/JitCommon/JitCommon.h"4748using namespace Arm64JitConstants;4950static void DisassembleArm64Print(const u8 *data, int size) {51std::vector<std::string> lines = DisassembleArm64(data, size);52for (auto s : lines) {53INFO_LOG(Log::JIT, "%s", s.c_str());54}55/*56INFO_LOG(Log::JIT, "+++");57// A format friendly to Online Disassembler which gets endianness wrong58for (size_t i = 0; i < lines.size(); i++) {59uint32_t opcode = ((const uint32_t *)data)[i];60INFO_LOG(Log::JIT, "%d/%d: %08x", (int)(i+1), (int)lines.size(), swap32(opcode));61}62INFO_LOG(Log::JIT, "===");63INFO_LOG(Log::JIT, "===");*/64}6566static u32 JitBreakpoint(uint32_t addr) {67// Should we skip this breakpoint?68if (CBreakPoints::CheckSkipFirst() == currentMIPS->pc || CBreakPoints::CheckSkipFirst() == addr)69return 0;7071BreakAction result = CBreakPoints::ExecBreakPoint(addr);72if ((result & BREAK_ACTION_PAUSE) == 0)73return 0;7475return 1;76}7778static u32 JitMemCheck(u32 pc) {79if (CBreakPoints::CheckSkipFirst() == currentMIPS->pc)80return 0;8182// Note: pc may be the delay slot.83const auto op = Memory::Read_Instruction(pc, true);84s32 offset = SignExtend16ToS32(op & 0xFFFF);85if (MIPSGetInfo(op) & IS_VFPU)86offset &= 0xFFFC;87u32 addr = currentMIPS->r[MIPS_GET_RS(op)] + offset;8889CBreakPoints::ExecOpMemCheck(addr, pc);90return coreState == CORE_RUNNING || coreState == CORE_NEXTFRAME ? 0 : 1;91}9293namespace MIPSComp94{95using namespace Arm64Gen;96using namespace Arm64JitConstants;9798Arm64Jit::Arm64Jit(MIPSState *mipsState) : blocks(mipsState, this), gpr(mipsState, &js, &jo), fpr(mipsState, &js, &jo), mips_(mipsState), fp(this) {99// Automatically disable incompatible options.100if (((intptr_t)Memory::base & 0x00000000FFFFFFFFUL) != 0) {101jo.enablePointerify = false;102}103104#ifdef MASKED_PSP_MEMORY105jo.enablePointerify = false;106#endif107108logBlocks = 0;109dontLogBlocks = 0;110blocks.Init();111gpr.SetEmitter(this);112fpr.SetEmitter(this, &fp);113AllocCodeSpace(1024 * 1024 * 16); // 32MB is the absolute max because that's what an ARM branch instruction can reach, backwards and forwards.114GenerateFixedCode(jo);115js.startDefaultPrefix = mips_->HasDefaultPrefix();116js.currentRoundingFunc = convertS0ToSCRATCH1[mips_->fcr31 & 3];117118// The debugger sets this so that "go" on a breakpoint will actually... go.119// But if they reset, we can end up hitting it by mistake, since it's based on PC and ticks.120CBreakPoints::SetSkipFirst(0);121}122123Arm64Jit::~Arm64Jit() {124}125126void Arm64Jit::DoState(PointerWrap &p) {127auto s = p.Section("Jit", 1, 2);128if (!s)129return;130131Do(p, js.startDefaultPrefix);132if (p.mode == PointerWrap::MODE_READ && !js.startDefaultPrefix) {133WARN_LOG(Log::CPU, "Jit: An uneaten prefix was previously detected. Jitting in unknown-prefix mode.");134}135if (s >= 2) {136Do(p, js.hasSetRounding);137if (p.mode == PointerWrap::MODE_READ) {138js.lastSetRounding = 0;139}140} else {141js.hasSetRounding = 1;142}143144// Note: we can't update the currentRoundingFunc here because fcr31 wasn't loaded yet.145146// The debugger sets this so that "go" on a breakpoint will actually... go.147// But if they reset, we can end up hitting it by mistake, since it's based on PC and ticks.148CBreakPoints::SetSkipFirst(0);149}150151void Arm64Jit::UpdateFCR31() {152js.currentRoundingFunc = convertS0ToSCRATCH1[mips_->fcr31 & 3];153}154155void Arm64Jit::FlushAll() {156gpr.FlushAll();157fpr.FlushAll();158FlushPrefixV();159}160161void Arm64Jit::FlushPrefixV() {162if (js.startDefaultPrefix && !js.blockWrotePrefixes && js.HasNoPrefix()) {163// They started default, we never modified in memory, and they're default now.164// No reason to modify memory. This is common at end of blocks. Just clear dirty.165js.prefixSFlag = (JitState::PrefixState)(js.prefixSFlag & ~JitState::PREFIX_DIRTY);166js.prefixTFlag = (JitState::PrefixState)(js.prefixTFlag & ~JitState::PREFIX_DIRTY);167js.prefixDFlag = (JitState::PrefixState)(js.prefixDFlag & ~JitState::PREFIX_DIRTY);168return;169}170171if ((js.prefixSFlag & JitState::PREFIX_DIRTY) != 0) {172gpr.SetRegImm(SCRATCH1, js.prefixS);173STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_SPREFIX]));174js.prefixSFlag = (JitState::PrefixState) (js.prefixSFlag & ~JitState::PREFIX_DIRTY);175}176177if ((js.prefixTFlag & JitState::PREFIX_DIRTY) != 0) {178gpr.SetRegImm(SCRATCH1, js.prefixT);179STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_TPREFIX]));180js.prefixTFlag = (JitState::PrefixState) (js.prefixTFlag & ~JitState::PREFIX_DIRTY);181}182183if ((js.prefixDFlag & JitState::PREFIX_DIRTY) != 0) {184gpr.SetRegImm(SCRATCH1, js.prefixD);185STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, vfpuCtrl[VFPU_CTRL_DPREFIX]));186js.prefixDFlag = (JitState::PrefixState) (js.prefixDFlag & ~JitState::PREFIX_DIRTY);187}188189// If we got here, we must've written prefixes to memory in this block.190js.blockWrotePrefixes = true;191}192193void Arm64Jit::ClearCache() {194INFO_LOG(Log::JIT, "ARM64Jit: Clearing the cache!");195blocks.Clear();196ClearCodeSpace(jitStartOffset);197FlushIcacheSection(region + jitStartOffset, region + region_size - jitStartOffset);198}199200void Arm64Jit::InvalidateCacheAt(u32 em_address, int length) {201if (blocks.RangeMayHaveEmuHacks(em_address, em_address + length)) {202blocks.InvalidateICache(em_address, length);203}204}205206void Arm64Jit::EatInstruction(MIPSOpcode op) {207MIPSInfo info = MIPSGetInfo(op);208if (info & DELAYSLOT) {209ERROR_LOG_REPORT_ONCE(ateDelaySlot, Log::JIT, "Ate a branch op.");210}211if (js.inDelaySlot) {212ERROR_LOG_REPORT_ONCE(ateInDelaySlot, Log::JIT, "Ate an instruction inside a delay slot.");213}214215CheckJitBreakpoint(GetCompilerPC() + 4, 0);216js.numInstructions++;217js.compilerPC += 4;218js.downcountAmount += MIPSGetInstructionCycleEstimate(op);219}220221void Arm64Jit::CompileDelaySlot(int flags) {222// Need to offset the downcount which was already incremented for the branch + delay slot.223CheckJitBreakpoint(GetCompilerPC() + 4, -2);224225// preserve flag around the delay slot! Maybe this is not always necessary on ARM where226// we can (mostly) control whether we set the flag or not. Of course, if someone puts an slt in to the227// delay slot, we're screwed.228if (flags & DELAYSLOT_SAFE)229MRS(FLAGTEMPREG, FIELD_NZCV); // Save flags register. FLAGTEMPREG is preserved through function calls and is not allocated.230231js.inDelaySlot = true;232MIPSOpcode op = GetOffsetInstruction(1);233MIPSCompileOp(op, this);234js.inDelaySlot = false;235236if (flags & DELAYSLOT_FLUSH)237FlushAll();238if (flags & DELAYSLOT_SAFE)239_MSR(FIELD_NZCV, FLAGTEMPREG); // Restore flags register240}241242243void Arm64Jit::Compile(u32 em_address) {244PROFILE_THIS_SCOPE("jitc");245if (GetSpaceLeft() < 0x10000 || blocks.IsFull()) {246INFO_LOG(Log::JIT, "Space left: %d", (int)GetSpaceLeft());247ClearCache();248}249250BeginWrite(JitBlockCache::MAX_BLOCK_INSTRUCTIONS * 16);251252int block_num = blocks.AllocateBlock(em_address);253JitBlock *b = blocks.GetBlock(block_num);254DoJit(em_address, b);255_assert_msg_(b->originalAddress == em_address, "original %08x != em_address %08x (block %d)", b->originalAddress, em_address, b->blockNum);256blocks.FinalizeBlock(block_num, jo.enableBlocklink);257EndWrite();258259// Don't forget to zap the newly written instructions in the instruction cache!260FlushIcache();261262bool cleanSlate = false;263264if (js.hasSetRounding && !js.lastSetRounding) {265WARN_LOG(Log::JIT, "Detected rounding mode usage, rebuilding jit with checks");266// Won't loop, since hasSetRounding is only ever set to 1.267js.lastSetRounding = js.hasSetRounding;268cleanSlate = true;269}270271// Drat. The VFPU hit an uneaten prefix at the end of a block.272if (js.startDefaultPrefix && js.MayHavePrefix()) {273WARN_LOG_REPORT(Log::JIT, "An uneaten prefix at end of block: %08x", GetCompilerPC() - 4);274js.LogPrefix();275276// Let's try that one more time. We won't get back here because we toggled the value.277js.startDefaultPrefix = false;278// TODO ARM64: This crashes.279//cleanSlate = true;280}281282if (cleanSlate) {283// Our assumptions are all wrong so it's clean-slate time.284ClearCache();285Compile(em_address);286}287}288289void Arm64Jit::RunLoopUntil(u64 globalticks) {290PROFILE_THIS_SCOPE("jit");291((void (*)())enterDispatcher)();292}293294u32 Arm64Jit::GetCompilerPC() {295return js.compilerPC;296}297298MIPSOpcode Arm64Jit::GetOffsetInstruction(int offset) {299return Memory::Read_Instruction(GetCompilerPC() + 4 * offset);300}301302const u8 *Arm64Jit::DoJit(u32 em_address, JitBlock *b) {303js.cancel = false;304js.blockStart = em_address;305js.compilerPC = em_address;306js.lastContinuedPC = 0;307js.initialBlockSize = 0;308js.nextExit = 0;309js.downcountAmount = 0;310js.curBlock = b;311js.compiling = true;312js.inDelaySlot = false;313js.blockWrotePrefixes = false;314js.PrefixStart();315316// We add a downcount flag check before the block, used when entering from a linked block.317// The last block decremented downcounter, and the flag should still be available.318// Got three variants here of where we position the code, needs detailed benchmarking.319320FixupBranch bail;321if (jo.useBackJump) {322// Moves the MOVI2R and B *before* checkedEntry, and just branch backwards there.323// Speedup seems to be zero unfortunately but I guess it may vary from device to device.324// Not intrusive so keeping it around here to experiment with, may help on ARMv6 due to325// large/slow construction of 32-bit immediates?326const u8 *backJump = GetCodePtr();327MOVI2R(SCRATCH1, js.blockStart);328B((const void *)outerLoopPCInSCRATCH1);329b->checkedEntry = GetCodePtr();330B(CC_LT, backJump);331} else if (jo.useForwardJump) {332b->checkedEntry = GetCodePtr();333bail = B(CC_LT);334} else if (jo.enableBlocklink) {335b->checkedEntry = GetCodePtr();336MOVI2R(SCRATCH1, js.blockStart);337FixupBranch skip = B(CC_GE);338B((const void *)outerLoopPCInSCRATCH1);339SetJumpTarget(skip);340} else {341// No block linking, no need to add headers to blocks.342}343344b->normalEntry = GetCodePtr();345// TODO: this needs work346MIPSAnalyst::AnalysisResults analysis; // = MIPSAnalyst::Analyze(em_address);347348gpr.Start(analysis);349fpr.Start(analysis);350351js.numInstructions = 0;352while (js.compiling) {353gpr.SetCompilerPC(GetCompilerPC()); // Let it know for log messages354// Jit breakpoints are quite fast, so let's do them in release too.355CheckJitBreakpoint(GetCompilerPC(), 0);356357MIPSOpcode inst = Memory::Read_Opcode_JIT(GetCompilerPC());358js.downcountAmount += MIPSGetInstructionCycleEstimate(inst);359360MIPSCompileOp(inst, this);361362js.compilerPC += 4;363js.numInstructions++;364365if (jo.Disabled(JitDisable::REGALLOC_GPR)) {366gpr.FlushAll();367}368if (jo.Disabled(JitDisable::REGALLOC_FPR)) {369fpr.FlushAll();370FlushPrefixV();371}372373// Safety check, in case we get a bunch of really large jit ops without a lot of branching.374if (GetSpaceLeft() < 0x800 || js.numInstructions >= JitBlockCache::MAX_BLOCK_INSTRUCTIONS) {375FlushAll();376WriteExit(GetCompilerPC(), js.nextExit++);377js.compiling = false;378}379}380381if (jo.useForwardJump) {382SetJumpTarget(bail);383gpr.SetRegImm(SCRATCH1, js.blockStart);384B((const void *)outerLoopPCInSCRATCH1);385}386387char temp[256];388if (logBlocks > 0 && dontLogBlocks == 0) {389INFO_LOG(Log::JIT, "=============== mips %d ===============", blocks.GetNumBlocks());390for (u32 cpc = em_address; cpc != GetCompilerPC() + 4; cpc += 4) {391MIPSDisAsm(Memory::Read_Opcode_JIT(cpc), cpc, temp, sizeof(temp), true);392INFO_LOG(Log::JIT, "M: %08x %s", cpc, temp);393}394}395396b->codeSize = GetCodePtr() - b->normalEntry;397if (logBlocks > 0 && dontLogBlocks == 0) {398INFO_LOG(Log::JIT, "=============== ARM (%d instructions -> %d bytes) ===============", js.numInstructions, b->codeSize);399DisassembleArm64Print(b->normalEntry, GetCodePtr() - b->normalEntry);400}401if (logBlocks > 0)402logBlocks--;403if (dontLogBlocks > 0)404dontLogBlocks--;405406if (js.lastContinuedPC == 0) {407b->originalSize = js.numInstructions;408} else {409// We continued at least once. Add the last proxy and set the originalSize correctly.410blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());411b->originalSize = js.initialBlockSize;412}413414return b->normalEntry;415}416417void Arm64Jit::AddContinuedBlock(u32 dest) {418// The first block is the root block. When we continue, we create proxy blocks after that.419if (js.lastContinuedPC == 0)420js.initialBlockSize = js.numInstructions;421else422blocks.ProxyBlock(js.blockStart, js.lastContinuedPC, (GetCompilerPC() - js.lastContinuedPC) / sizeof(u32), GetCodePtr());423js.lastContinuedPC = dest;424}425426bool Arm64Jit::DescribeCodePtr(const u8 *ptr, std::string &name) {427// Used in disassembly viewer.428if (ptr == applyRoundingMode)429name = "applyRoundingMode";430else if (ptr == updateRoundingMode)431name = "updateRoundingMode";432else if (ptr == dispatcher)433name = "dispatcher";434else if (ptr == dispatcherPCInSCRATCH1)435name = "dispatcher (PC in SCRATCH1)";436else if (ptr == dispatcherNoCheck)437name = "dispatcherNoCheck";438else if (ptr == enterDispatcher)439name = "enterDispatcher";440else if (ptr == restoreRoundingMode)441name = "restoreRoundingMode";442else if (ptr == saveStaticRegisters)443name = "saveStaticRegisters";444else if (ptr == loadStaticRegisters)445name = "loadStaticRegisters";446else {447u32 addr = blocks.GetAddressFromBlockPtr(ptr);448// Returns 0 when it's valid, but unknown.449if (addr == 0) {450name = "(unknown or deleted block)";451return true;452} else if (addr != (u32)-1) {453name = "(outside space)";454return true;455}456457int number = blocks.GetBlockNumberFromAddress(addr);458if (number != -1) {459const JitBlock *block = blocks.GetBlock(number);460if (block) {461name = StringFromFormat("(block %d at %08x)", number, block->originalAddress);462return true;463}464}465return false;466}467return true;468}469470void Arm64Jit::Comp_RunBlock(MIPSOpcode op) {471// This shouldn't be necessary, the dispatcher should catch us before we get here.472ERROR_LOG(Log::JIT, "Comp_RunBlock should never be reached!");473}474475void Arm64Jit::LinkBlock(u8 *exitPoint, const u8 *checkedEntry) {476if (PlatformIsWXExclusive()) {477ProtectMemoryPages(exitPoint, 32, MEM_PROT_READ | MEM_PROT_WRITE);478}479ARM64XEmitter emit(GetCodePtrFromWritablePtr(exitPoint), exitPoint);480emit.B(checkedEntry);481// TODO: Write stuff after, convering up the now-unused instructions.482emit.FlushIcache();483if (PlatformIsWXExclusive()) {484ProtectMemoryPages(exitPoint, 32, MEM_PROT_READ | MEM_PROT_EXEC);485}486}487488void Arm64Jit::UnlinkBlock(u8 *checkedEntry, u32 originalAddress) {489// Send anyone who tries to run this block back to the dispatcher.490// Not entirely ideal, but .. works.491// Spurious entrances from previously linked blocks can only come through checkedEntry492if (PlatformIsWXExclusive()) {493ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_WRITE);494}495496ARM64XEmitter emit(GetCodePtrFromWritablePtr(checkedEntry), checkedEntry);497emit.MOVI2R(SCRATCH1, originalAddress);498emit.STR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, pc));499emit.B(MIPSComp::jit->GetDispatcher());500emit.FlushIcache();501502if (PlatformIsWXExclusive()) {503ProtectMemoryPages(checkedEntry, 16, MEM_PROT_READ | MEM_PROT_EXEC);504}505}506507bool Arm64Jit::ReplaceJalTo(u32 dest) {508#if PPSSPP_ARCH(ARM64)509const ReplacementTableEntry *entry = nullptr;510u32 funcSize = 0;511if (!CanReplaceJalTo(dest, &entry, &funcSize)) {512return false;513}514515// Warning - this might be bad if the code at the destination changes...516if (entry->flags & REPFLAG_ALLOWINLINE) {517// Jackpot! Just do it, no flushing. The code will be entirely inlined.518// First, compile the delay slot. It's unconditional so no issues.519CompileDelaySlot(DELAYSLOT_NICE);520// Technically, we should write the unused return address to RA, but meh.521MIPSReplaceFunc repl = entry->jitReplaceFunc;522int cycles = (this->*repl)();523js.downcountAmount += cycles;524} else {525gpr.SetImm(MIPS_REG_RA, GetCompilerPC() + 8);526CompileDelaySlot(DELAYSLOT_NICE);527FlushAll();528SaveStaticRegisters();529RestoreRoundingMode();530QuickCallFunction(SCRATCH1_64, (const void *)(entry->replaceFunc));531ApplyRoundingMode();532LoadStaticRegisters();533WriteDownCountR(W0); // W0 is the return value from entry->replaceFunc. Neither LoadStaticRegisters nor ApplyRoundingMode can trash it.534}535536js.compilerPC += 4;537// No writing exits, keep going!538539if (CBreakPoints::HasMemChecks()) {540// We could modify coreState, so we need to write PC and check.541// Otherwise, PC may end up on the jal. We add 4 to skip the delay slot.542FlushAll();543WriteExit(GetCompilerPC() + 4, js.nextExit++);544js.compiling = false;545}546547// Add a trigger so that if the inlined code changes, we invalidate this block.548blocks.ProxyBlock(js.blockStart, dest, funcSize / sizeof(u32), GetCodePtr());549#endif550return true;551}552553void Arm64Jit::Comp_ReplacementFunc(MIPSOpcode op)554{555// We get here if we execute the first instruction of a replaced function. This means556// that we do need to return to RA.557558// Inlined function calls (caught in jal) are handled differently.559560int index = op.encoding & MIPS_EMUHACK_VALUE_MASK;561562const ReplacementTableEntry *entry = GetReplacementFunc(index);563if (!entry) {564ERROR_LOG_REPORT_ONCE(replFunc, Log::HLE, "Invalid replacement op %08x at %08x", op.encoding, js.compilerPC);565// TODO: What should we do here? We're way off in the weeds probably.566return;567}568569u32 funcSize = g_symbolMap->GetFunctionSize(GetCompilerPC());570bool disabled = (entry->flags & REPFLAG_DISABLED) != 0;571if (!disabled && funcSize != SymbolMap::INVALID_ADDRESS && funcSize > sizeof(u32)) {572// We don't need to disable hooks, the code will still run.573if ((entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) == 0) {574// Any breakpoint at the func entry was already tripped, so we can still run the replacement.575// That's a common case - just to see how often the replacement hits.576disabled = CBreakPoints::RangeContainsBreakPoint(GetCompilerPC() + sizeof(u32), funcSize - sizeof(u32));577}578}579580if (disabled) {581MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);582} else if (entry->jitReplaceFunc) {583MIPSReplaceFunc repl = entry->jitReplaceFunc;584int cycles = (this->*repl)();585586if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {587// Compile the original instruction at this address. We ignore cycles for hooks.588MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);589} else {590FlushAll();591// Flushed, so R1 is safe.592LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, MIPS_REG_RA * 4);593js.downcountAmount += cycles;594WriteExitDestInR(SCRATCH1);595js.compiling = false;596}597} else if (entry->replaceFunc) {598FlushAll();599SaveStaticRegisters();600RestoreRoundingMode();601gpr.SetRegImm(SCRATCH1, GetCompilerPC());602MovToPC(SCRATCH1);603604// Standard function call, nothing fancy.605// The function returns the number of cycles it took in EAX.606QuickCallFunction(SCRATCH1_64, (const void *)(entry->replaceFunc));607608if (entry->flags & (REPFLAG_HOOKENTER | REPFLAG_HOOKEXIT)) {609// Compile the original instruction at this address. We ignore cycles for hooks.610ApplyRoundingMode();611LoadStaticRegisters();612MIPSCompileOp(Memory::Read_Instruction(GetCompilerPC(), true), this);613} else {614ApplyRoundingMode();615LoadStaticRegisters();616617CMPI2R(W0, 0);618FixupBranch positive = B(CC_GE);619620NEG(W0, W0);621MovFromPC(W1);622FixupBranch done = B();623624SetJumpTarget(positive);625LDR(INDEX_UNSIGNED, W1, CTXREG, MIPS_REG_RA * 4);626627SetJumpTarget(done);628WriteDownCountR(W0);629WriteExitDestInR(W1);630js.compiling = false;631}632} else {633ERROR_LOG(Log::HLE, "Replacement function %s has neither jit nor regular impl", entry->name);634}635}636637void Arm64Jit::Comp_Generic(MIPSOpcode op) {638FlushAll();639MIPSInterpretFunc func = MIPSGetInterpretFunc(op);640if (func) {641SaveStaticRegisters();642// TODO: Perhaps keep the rounding mode for interp? Should probably, right?643RestoreRoundingMode();644MOVI2R(SCRATCH1, GetCompilerPC());645MovToPC(SCRATCH1);646MOVI2R(W0, op.encoding);647QuickCallFunction(SCRATCH2_64, (void *)func);648ApplyRoundingMode();649LoadStaticRegisters();650}651652const MIPSInfo info = MIPSGetInfo(op);653if ((info & IS_VFPU) != 0 && (info & VFPU_NO_PREFIX) == 0) {654// If it does eat them, it'll happen in MIPSCompileOp().655if ((info & OUT_EAT_PREFIX) == 0)656js.PrefixUnknown();657658// Even if DISABLE'd, we want to set this flag so we overwrite.659if ((info & OUT_VFPU_PREFIX) != 0)660js.blockWrotePrefixes = true;661}662}663664void Arm64Jit::MovFromPC(ARM64Reg r) {665LDR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));666}667668void Arm64Jit::MovToPC(ARM64Reg r) {669STR(INDEX_UNSIGNED, r, CTXREG, offsetof(MIPSState, pc));670}671672// Should not really be necessary except when entering Advance673void Arm64Jit::SaveStaticRegisters() {674if (jo.useStaticAlloc) {675QuickCallFunction(SCRATCH2_64, saveStaticRegisters);676} else {677// Inline the single operation678STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));679}680}681682void Arm64Jit::LoadStaticRegisters() {683if (jo.useStaticAlloc) {684QuickCallFunction(SCRATCH2_64, loadStaticRegisters);685} else {686LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));687}688}689690void Arm64Jit::WriteDownCount(int offset, bool updateFlags) {691int theDowncount = js.downcountAmount + offset;692if (updateFlags) {693SUBSI2R(DOWNCOUNTREG, DOWNCOUNTREG, theDowncount, SCRATCH1);694} else {695SUBI2R(DOWNCOUNTREG, DOWNCOUNTREG, theDowncount, SCRATCH1);696}697}698699void Arm64Jit::WriteDownCountR(ARM64Reg reg, bool updateFlags) {700if (updateFlags) {701SUBS(DOWNCOUNTREG, DOWNCOUNTREG, reg);702} else {703SUB(DOWNCOUNTREG, DOWNCOUNTREG, reg);704}705}706707// Destroys SCRATCH2708void Arm64Jit::RestoreRoundingMode(bool force) {709// If the game has never set an interesting rounding mode, we can safely skip this.710if (force || js.hasSetRounding) {711QuickCallFunction(SCRATCH2_64, restoreRoundingMode);712}713}714715// Destroys SCRATCH1 and SCRATCH2716void Arm64Jit::ApplyRoundingMode(bool force) {717// If the game has never set an interesting rounding mode, we can safely skip this.718if (force || js.hasSetRounding) {719QuickCallFunction(SCRATCH2_64, applyRoundingMode);720}721}722723// Destroys SCRATCH1 and SCRATCH2724void Arm64Jit::UpdateRoundingMode(u32 fcr31) {725// We must set js.hasSetRounding at compile time, or this block will use the wrong rounding mode.726// The fcr31 parameter is -1 when not known at compile time, so we just assume it was changed.727if (fcr31 & 0x01000003) {728js.hasSetRounding = true;729}730QuickCallFunction(SCRATCH2_64, updateRoundingMode);731}732733// IDEA - could have a WriteDualExit that takes two destinations and two condition flags,734// and just have conditional that set PC "twice". This only works when we fall back to dispatcher735// though, as we need to have the SUBS flag set in the end. So with block linking in the mix,736// I don't think this gives us that much benefit.737void Arm64Jit::WriteExit(u32 destination, int exit_num) {738// NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).739_assert_msg_(exit_num < MAX_JIT_BLOCK_EXITS, "Expected a valid exit_num. dest=%08x", destination);740741// NOTE: Can't blindly check for bad destination addresses here, sometimes exits with bad destinations are written intentionally (like breaks).742WriteDownCount();743//If nobody has taken care of this yet (this can be removed when all branches are done)744JitBlock *b = js.curBlock;745b->exitAddress[exit_num] = destination;746b->exitPtrs[exit_num] = GetWritableCodePtr();747748// Link opportunity!749int block = blocks.GetBlockNumberFromStartAddress(destination);750if (block >= 0 && jo.enableBlocklink) {751// The target block exists! Directly link to its checked entrypoint.752B(blocks.GetBlock(block)->checkedEntry);753b->linkStatus[exit_num] = true;754} else {755MOVI2R(SCRATCH1, destination);756B((const void *)dispatcherPCInSCRATCH1);757}758}759760void Arm64Jit::WriteExitDestInR(ARM64Reg Reg) {761// TODO: If not fast memory, check for invalid address in reg and trigger exception.762MovToPC(Reg);763WriteDownCount();764// TODO: shouldn't need an indirect branch here...765B((const void *)dispatcher);766}767768void Arm64Jit::WriteSyscallExit() {769WriteDownCount();770B((const void *)dispatcherCheckCoreState);771}772773bool Arm64Jit::CheckJitBreakpoint(u32 addr, int downcountOffset) {774if (CBreakPoints::IsAddressBreakPoint(addr)) {775MRS(FLAGTEMPREG, FIELD_NZCV);776FlushAll();777MOVI2R(SCRATCH1, GetCompilerPC());778MovToPC(SCRATCH1);779SaveStaticRegisters();780RestoreRoundingMode();781MOVI2R(W0, addr);782QuickCallFunction(SCRATCH1_64, &JitBreakpoint);783784// If 0, the conditional breakpoint wasn't taken.785CMPI2R(W0, 0);786FixupBranch skip = B(CC_EQ);787WriteDownCount(downcountOffset);788ApplyRoundingMode();789LoadStaticRegisters();790B((const void *)dispatcherCheckCoreState);791SetJumpTarget(skip);792793ApplyRoundingMode();794LoadStaticRegisters();795_MSR(FIELD_NZCV, FLAGTEMPREG);796return true;797}798799return false;800}801802bool Arm64Jit::CheckMemoryBreakpoint(int instructionOffset) {803if (CBreakPoints::HasMemChecks()) {804int off = instructionOffset + (js.inDelaySlot ? 1 : 0);805806MRS(FLAGTEMPREG, FIELD_NZCV);807FlushAll();808RestoreRoundingMode();809MOVI2R(W0, GetCompilerPC());810MovToPC(W0);811if (off != 0)812ADDI2R(W0, W0, off * 4);813QuickCallFunction(SCRATCH2_64, &JitMemCheck);814815// If 0, the breakpoint wasn't tripped.816CMPI2R(W0, 0);817FixupBranch skip = B(CC_EQ);818WriteDownCount(-1 - off);819ApplyRoundingMode();820B((const void *)dispatcherCheckCoreState);821SetJumpTarget(skip);822823ApplyRoundingMode();824_MSR(FIELD_NZCV, FLAGTEMPREG);825return true;826}827828return false;829}830831void Arm64Jit::Comp_DoNothing(MIPSOpcode op) { }832833MIPSOpcode Arm64Jit::GetOriginalOp(MIPSOpcode op) {834JitBlockCache *bc = GetBlockCache();835int block_num = bc->GetBlockNumberFromEmuHackOp(op, true);836if (block_num >= 0) {837return bc->GetOriginalFirstOp(block_num);838} else {839return op;840}841}842843} // namespace844845#endif // PPSSPP_ARCH(ARM64)846847848