Path: blob/master/src/core/cpu_recompiler_riscv64.cpp
4214 views
// SPDX-FileCopyrightText: 2019-2024 Connor McLaughlin <[email protected]>1// SPDX-License-Identifier: CC-BY-NC-ND-4.023#include "cpu_recompiler_riscv64.h"4#include "cpu_code_cache_private.h"5#include "cpu_core_private.h"6#include "cpu_pgxp.h"7#include "gte.h"8#include "settings.h"9#include "timing_event.h"1011#include "common/align.h"12#include "common/assert.h"13#include "common/log.h"14#include "common/memmap.h"15#include "common/string_util.h"1617#include <limits>1819#ifdef CPU_ARCH_RISCV642021LOG_CHANNEL(Recompiler);2223#ifdef ENABLE_HOST_DISASSEMBLY24extern "C" {25#include "riscv-disas.h"26}27#endif2829// For LW/SW/etc.30#define PTR(x) ((u32)(((u8*)(x)) - ((u8*)&g_state))), RSTATE3132static constexpr u32 BLOCK_LINK_SIZE = 8; // auipc+jr3334#define RRET biscuit::a035#define RARG1 biscuit::a036#define RARG2 biscuit::a137#define RARG3 biscuit::a238#define RSCRATCH biscuit::t639#define RSTATE biscuit::s1040#define RMEMBASE biscuit::s114142static bool rvIsCallerSavedRegister(u32 id);43static bool rvIsValidSExtITypeImm(u32 imm);44static std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target);45static void rvMoveAddressToReg(biscuit::Assembler* armAsm, const biscuit::GPR& reg, const void* addr);46static void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm);47static void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm);48static u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg = biscuit::zero);49static u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr);50static void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,51bool sign_extend_word = false);52static void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,53const biscuit::GPR& tempreg = RSCRATCH);54static void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word55static void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word56static void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word57static void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> word58static void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword59static void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs); // -> doubleword6061namespace CPU {6263using namespace biscuit;6465RISCV64Recompiler s_instance;66Recompiler* g_compiler = &s_instance;6768} // namespace CPU6970bool rvIsCallerSavedRegister(u32 id)71{72return (id == 1 || (id >= 3 && id < 8) || (id >= 10 && id <= 17) || (id >= 28 && id <= 31));73}7475bool rvIsValidSExtITypeImm(u32 imm)76{77return (static_cast<u32>((static_cast<s32>(imm) << 20) >> 20) == imm);78}7980std::pair<s32, s32> rvGetAddressImmediates(const void* cur, const void* target)81{82const s64 disp = static_cast<s64>(reinterpret_cast<intptr_t>(target) - reinterpret_cast<intptr_t>(cur));83Assert(disp >= static_cast<s64>(std::numeric_limits<s32>::min()) &&84disp <= static_cast<s64>(std::numeric_limits<s32>::max()));8586const s64 hi = disp + 0x800;87const s64 lo = disp - (hi & 0xFFFFF000);88return std::make_pair(static_cast<s32>(hi >> 12), static_cast<s32>((lo << 52) >> 52));89}9091void rvMoveAddressToReg(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr)92{93const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);94rvAsm->AUIPC(reg, hi);95rvAsm->ADDI(reg, reg, lo);96}9798void rvEmitMov(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, u32 imm)99{100// Borrowed from biscuit, but doesn't emit an ADDI if the lower 12 bits are zero.101const u32 lower = imm & 0xFFF;102const u32 upper = (imm & 0xFFFFF000) >> 12;103const s32 simm = static_cast<s32>(imm);104if (rvIsValidSExtITypeImm(simm))105{106rvAsm->ADDI(rd, biscuit::zero, static_cast<s32>(lower));107}108else109{110const bool needs_increment = (lower & 0x800) != 0;111const u32 upper_imm = needs_increment ? upper + 1 : upper;112rvAsm->LUI(rd, upper_imm);113rvAsm->ADDI(rd, rd, static_cast<int32_t>(lower));114}115}116117void rvEmitMov64(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& scratch, u64 imm)118{119// TODO: Make better..120rvEmitMov(rvAsm, rd, static_cast<u32>(imm >> 32));121rvEmitMov(rvAsm, scratch, static_cast<u32>(imm));122rvAsm->SLLI64(rd, rd, 32);123rvAsm->SLLI64(scratch, scratch, 32);124rvAsm->SRLI64(scratch, scratch, 32);125rvAsm->ADD(rd, rd, scratch);126}127128u32 rvEmitJmp(biscuit::Assembler* rvAsm, const void* ptr, const biscuit::GPR& link_reg)129{130// TODO: use J if displacement is <1MB, needs a bool because backpatch must be 8 bytes131const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), ptr);132rvAsm->AUIPC(RSCRATCH, hi);133rvAsm->JALR(link_reg, lo, RSCRATCH);134return 8;135}136137u32 rvEmitCall(biscuit::Assembler* rvAsm, const void* ptr)138{139return rvEmitJmp(rvAsm, ptr, biscuit::ra);140}141142void rvEmitFarLoad(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr, bool sign_extend_word)143{144const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);145rvAsm->AUIPC(reg, hi);146if (sign_extend_word)147rvAsm->LW(reg, lo, reg);148else149rvAsm->LWU(reg, lo, reg);150}151152[[maybe_unused]] void rvEmitFarStore(biscuit::Assembler* rvAsm, const biscuit::GPR& reg, const void* addr,153const biscuit::GPR& tempreg)154{155const auto [hi, lo] = rvGetAddressImmediates(rvAsm->GetCursorPointer(), addr);156rvAsm->AUIPC(tempreg, hi);157rvAsm->SW(reg, lo, tempreg);158}159160void rvEmitSExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)161{162rvAsm->SLLI(rd, rs, 24);163rvAsm->SRAIW(rd, rd, 24);164}165166void rvEmitUExtB(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)167{168rvAsm->ANDI(rd, rs, 0xFF);169}170171void rvEmitSExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)172{173rvAsm->SLLI(rd, rs, 16);174rvAsm->SRAIW(rd, rd, 16);175}176177void rvEmitUExtH(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)178{179rvAsm->SLLI(rd, rs, 16);180rvAsm->SRLI(rd, rd, 16);181}182183void rvEmitDSExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)184{185rvAsm->ADDIW(rd, rs, 0);186}187188void rvEmitDUExtW(biscuit::Assembler* rvAsm, const biscuit::GPR& rd, const biscuit::GPR& rs)189{190rvAsm->SLLI64(rd, rs, 32);191rvAsm->SRLI64(rd, rd, 32);192}193194void CPU::CodeCache::DisassembleAndLogHostCode(const void* start, u32 size)195{196#ifdef ENABLE_HOST_DISASSEMBLY197const u8* cur = static_cast<const u8*>(start);198const u8* end = cur + size;199char buf[256];200while (cur < end)201{202rv_inst inst;203size_t instlen;204inst_fetch(cur, &inst, &instlen);205disasm_inst(buf, std::size(buf), rv64, static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), inst);206DEBUG_LOG("\t0x{:016X}\t{}", static_cast<u64>(reinterpret_cast<uintptr_t>(cur)), buf);207cur += instlen;208}209#else210ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");211#endif212}213214u32 CPU::CodeCache::GetHostInstructionCount(const void* start, u32 size)215{216#ifdef ENABLE_HOST_DISASSEMBLY217const u8* cur = static_cast<const u8*>(start);218const u8* end = cur + size;219u32 icount = 0;220while (cur < end)221{222rv_inst inst;223size_t instlen;224inst_fetch(cur, &inst, &instlen);225cur += instlen;226icount++;227}228return icount;229#else230ERROR_LOG("Not compiled with ENABLE_HOST_DISASSEMBLY.");231return 0;232#endif233}234235u32 CPU::CodeCache::EmitASMFunctions(void* code, u32 code_size)236{237using namespace biscuit;238239Assembler actual_asm(static_cast<u8*>(code), code_size);240Assembler* rvAsm = &actual_asm;241242Label dispatch;243Label run_events_and_dispatch;244245g_enter_recompiler = reinterpret_cast<decltype(g_enter_recompiler)>(rvAsm->GetCursorPointer());246{247// TODO: reserve some space for saving caller-saved registers248249// Need the CPU state for basically everything :-)250rvMoveAddressToReg(rvAsm, RSTATE, &g_state);251252// Fastmem setup253if (IsUsingFastmem())254rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base));255256// Fall through to event dispatcher257}258259// check events then for frame done260{261Label skip_event_check;262rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));263rvAsm->LW(RARG2, PTR(&g_state.downcount));264rvAsm->BLTU(RARG1, RARG2, &skip_event_check);265266rvAsm->Bind(&run_events_and_dispatch);267g_run_events_and_dispatch = rvAsm->GetCursorPointer();268rvEmitCall(rvAsm, reinterpret_cast<const void*>(&TimingEvents::RunEvents));269270rvAsm->Bind(&skip_event_check);271}272273// TODO: align?274g_dispatcher = rvAsm->GetCursorPointer();275{276rvAsm->Bind(&dispatch);277278// x9 <- s_fast_map[pc >> 16]279rvAsm->LW(RARG1, PTR(&g_state.pc));280rvMoveAddressToReg(rvAsm, RARG3, g_code_lut.data());281rvAsm->SRLIW(RARG2, RARG1, 16);282rvAsm->SLLI(RARG2, RARG2, 3);283rvAsm->ADD(RARG2, RARG2, RARG3);284rvAsm->LD(RARG2, 0, RARG2);285rvAsm->SLLI64(RARG1, RARG1, 48); // idx = (pc & 0xFFFF) >> 2286rvAsm->SRLI64(RARG1, RARG1, 50);287rvAsm->SLLI(RARG1, RARG1, 3);288289// blr(x9[pc * 2]) (fast_map[idx])290rvAsm->ADD(RARG1, RARG1, RARG2);291rvAsm->LD(RARG1, 0, RARG1);292rvAsm->JR(RARG1);293}294295g_compile_or_revalidate_block = rvAsm->GetCursorPointer();296{297rvAsm->LW(RARG1, PTR(&g_state.pc));298rvEmitCall(rvAsm, reinterpret_cast<const void*>(&CompileOrRevalidateBlock));299rvAsm->J(&dispatch);300}301302g_discard_and_recompile_block = rvAsm->GetCursorPointer();303{304rvAsm->LW(RARG1, PTR(&g_state.pc));305rvEmitCall(rvAsm, reinterpret_cast<const void*>(&DiscardAndRecompileBlock));306rvAsm->J(&dispatch);307}308309g_interpret_block = rvAsm->GetCursorPointer();310{311rvEmitCall(rvAsm, CodeCache::GetInterpretUncachedBlockFunction());312rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));313rvAsm->LW(RARG2, PTR(&g_state.downcount));314rvAsm->BGE(RARG1, RARG2, &run_events_and_dispatch);315rvAsm->J(&dispatch);316}317318// TODO: align?319320return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());321}322323void CPU::CodeCache::EmitAlignmentPadding(void* dst, size_t size)324{325constexpr u8 padding_value = 0x00;326std::memset(dst, padding_value, size);327}328329u32 CPU::CodeCache::EmitJump(void* code, const void* dst, bool flush_icache)330{331// TODO: get rid of assembler construction here332{333biscuit::Assembler assembler(static_cast<u8*>(code), BLOCK_LINK_SIZE);334rvEmitCall(&assembler, dst);335336DebugAssert(assembler.GetCodeBuffer().GetSizeInBytes() <= BLOCK_LINK_SIZE);337if (assembler.GetCodeBuffer().GetRemainingBytes() > 0)338assembler.NOP();339}340341if (flush_icache)342MemMap::FlushInstructionCache(code, BLOCK_LINK_SIZE);343344return BLOCK_LINK_SIZE;345}346347CPU::RISCV64Recompiler::RISCV64Recompiler() = default;348349CPU::RISCV64Recompiler::~RISCV64Recompiler() = default;350351const void* CPU::RISCV64Recompiler::GetCurrentCodePointer()352{353return rvAsm->GetCursorPointer();354}355356void CPU::RISCV64Recompiler::Reset(CodeCache::Block* block, u8* code_buffer, u32 code_buffer_space, u8* far_code_buffer,357u32 far_code_space)358{359Recompiler::Reset(block, code_buffer, code_buffer_space, far_code_buffer, far_code_space);360361// TODO: don't recreate this every time..362DebugAssert(!m_emitter && !m_far_emitter && !rvAsm);363m_emitter = std::make_unique<Assembler>(code_buffer, code_buffer_space);364m_far_emitter = std::make_unique<Assembler>(far_code_buffer, far_code_space);365rvAsm = m_emitter.get();366367// Need to wipe it out so it's correct when toggling fastmem.368m_host_regs = {};369370const u32 membase_idx = CodeCache::IsUsingFastmem() ? RMEMBASE.Index() : NUM_HOST_REGS;371for (u32 i = 0; i < NUM_HOST_REGS; i++)372{373HostRegAlloc& hra = m_host_regs[i];374375if (i == RARG1.Index() || i == RARG2.Index() || i == RARG3.Index() || i == RSCRATCH.Index() ||376i == RSTATE.Index() || i == membase_idx || i < 5 /* zero, ra, sp, gp, tp */)377{378continue;379}380381hra.flags = HR_USABLE | (rvIsCallerSavedRegister(i) ? 0 : HR_CALLEE_SAVED);382}383}384385void CPU::RISCV64Recompiler::SwitchToFarCode(bool emit_jump,386void (biscuit::Assembler::*inverted_cond)(biscuit::GPR, biscuit::GPR,387biscuit::Label*) /* = nullptr */,388const biscuit::GPR& rs1 /* = biscuit::zero */,389const biscuit::GPR& rs2 /* = biscuit::zero */)390{391DebugAssert(rvAsm == m_emitter.get());392if (emit_jump)393{394const void* target = m_far_emitter->GetCursorPointer();395if (inverted_cond)396{397Label skip;398(rvAsm->*inverted_cond)(rs1, rs2, &skip);399rvEmitJmp(rvAsm, target);400rvAsm->Bind(&skip);401}402else403{404rvEmitCall(rvAsm, target);405}406}407rvAsm = m_far_emitter.get();408}409410void CPU::RISCV64Recompiler::SwitchToNearCode(bool emit_jump)411{412DebugAssert(rvAsm == m_far_emitter.get());413if (emit_jump)414rvEmitJmp(rvAsm, m_emitter->GetCursorPointer());415rvAsm = m_emitter.get();416}417418void CPU::RISCV64Recompiler::EmitMov(const biscuit::GPR& dst, u32 val)419{420rvEmitMov(rvAsm, dst, val);421}422423void CPU::RISCV64Recompiler::EmitCall(const void* ptr)424{425rvEmitCall(rvAsm, ptr);426}427428void CPU::RISCV64Recompiler::SafeImmSExtIType(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm,429void (biscuit::Assembler::*iop)(GPR, GPR, u32),430void (biscuit::Assembler::*rop)(GPR, GPR, GPR))431{432DebugAssert(rd != RSCRATCH && rs != RSCRATCH);433434if (rvIsValidSExtITypeImm(imm))435{436(rvAsm->*iop)(rd, rs, imm);437return;438}439440rvEmitMov(rvAsm, RSCRATCH, imm);441(rvAsm->*rop)(rd, rs, RSCRATCH);442}443444void CPU::RISCV64Recompiler::SafeADDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)445{446SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDI),447&Assembler::ADD);448}449450void CPU::RISCV64Recompiler::SafeADDIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)451{452SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),453&Assembler::ADDW);454}455456void CPU::RISCV64Recompiler::SafeSUBIW(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)457{458const u32 nimm = static_cast<u32>(-static_cast<s32>(imm));459SafeImmSExtIType(rd, rs, nimm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::ADDIW),460&Assembler::ADDW);461}462463void CPU::RISCV64Recompiler::SafeANDI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)464{465SafeImmSExtIType(rd, rs, imm, &Assembler::ANDI, &Assembler::AND);466}467468void CPU::RISCV64Recompiler::SafeORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)469{470SafeImmSExtIType(rd, rs, imm, &Assembler::ORI, &Assembler::OR);471}472473void CPU::RISCV64Recompiler::SafeXORI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)474{475SafeImmSExtIType(rd, rs, imm, &Assembler::XORI, &Assembler::XOR);476}477478void CPU::RISCV64Recompiler::SafeSLTI(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)479{480SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTI),481&Assembler::SLT);482}483484void CPU::RISCV64Recompiler::SafeSLTIU(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm)485{486SafeImmSExtIType(rd, rs, imm, reinterpret_cast<void (biscuit::Assembler::*)(GPR, GPR, u32)>(&Assembler::SLTIU),487&Assembler::SLTU);488}489490void CPU::RISCV64Recompiler::EmitSExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)491{492rvEmitSExtB(rvAsm, rd, rs);493}494495void CPU::RISCV64Recompiler::EmitUExtB(const biscuit::GPR& rd, const biscuit::GPR& rs)496{497rvEmitUExtB(rvAsm, rd, rs);498}499500void CPU::RISCV64Recompiler::EmitSExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)501{502rvEmitSExtH(rvAsm, rd, rs);503}504505void CPU::RISCV64Recompiler::EmitUExtH(const biscuit::GPR& rd, const biscuit::GPR& rs)506{507rvEmitUExtH(rvAsm, rd, rs);508}509510void CPU::RISCV64Recompiler::EmitDSExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)511{512rvEmitDSExtW(rvAsm, rd, rs);513}514515void CPU::RISCV64Recompiler::EmitDUExtW(const biscuit::GPR& rd, const biscuit::GPR& rs)516{517rvEmitDUExtW(rvAsm, rd, rs);518}519520void CPU::RISCV64Recompiler::GenerateBlockProtectCheck(const u8* ram_ptr, const u8* shadow_ptr, u32 size)521{522// store it first to reduce code size, because we can offset523// TODO: 64-bit displacement is needed :/524// rvMoveAddressToReg(rvAsm, RARG1, ram_ptr);525// rvMoveAddressToReg(rvAsm, RARG2, shadow_ptr);526rvEmitMov64(rvAsm, RARG1, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(ram_ptr)));527rvEmitMov64(rvAsm, RARG2, RSCRATCH, static_cast<u64>(reinterpret_cast<uintptr_t>(shadow_ptr)));528529u32 offset = 0;530Label block_changed;531532while (size >= 8)533{534rvAsm->LD(RARG3, offset, RARG1);535rvAsm->LD(RSCRATCH, offset, RARG2);536rvAsm->BNE(RARG3, RSCRATCH, &block_changed);537offset += 8;538size -= 8;539}540541while (size >= 4)542{543rvAsm->LW(RARG3, offset, RARG1);544rvAsm->LW(RSCRATCH, offset, RARG2);545rvAsm->BNE(RARG3, RSCRATCH, &block_changed);546offset += 4;547size -= 4;548}549550DebugAssert(size == 0);551552Label block_unchanged;553rvAsm->J(&block_unchanged);554rvAsm->Bind(&block_changed);555rvEmitJmp(rvAsm, CodeCache::g_discard_and_recompile_block);556rvAsm->Bind(&block_unchanged);557}558559void CPU::RISCV64Recompiler::GenerateICacheCheckAndUpdate()560{561if (!m_block->HasFlag(CodeCache::BlockFlags::IsUsingICache))562{563if (m_block->HasFlag(CodeCache::BlockFlags::NeedsDynamicFetchTicks))564{565rvEmitFarLoad(rvAsm, RARG2, GetFetchMemoryAccessTimePtr());566rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));567rvEmitMov(rvAsm, RARG3, m_block->size);568rvAsm->MULW(RARG2, RARG2, RARG3);569rvAsm->ADD(RARG1, RARG1, RARG2);570rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));571}572else573{574rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));575SafeADDIW(RARG1, RARG1, static_cast<u32>(m_block->uncached_fetch_ticks));576rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));577}578}579else if (m_block->icache_line_count > 0)580{581const auto& ticks_reg = RARG1;582const auto& current_tag_reg = RARG2;583const auto& existing_tag_reg = RARG3;584585// start of block, nothing should be using this586const auto& maddr_reg = biscuit::t0;587DebugAssert(!IsHostRegAllocated(maddr_reg.Index()));588589VirtualMemoryAddress current_pc = m_block->pc & ICACHE_TAG_ADDRESS_MASK;590rvAsm->LW(ticks_reg, PTR(&g_state.pending_ticks));591rvEmitMov(rvAsm, current_tag_reg, current_pc);592593for (u32 i = 0; i < m_block->icache_line_count; i++, current_pc += ICACHE_LINE_SIZE)594{595const TickCount fill_ticks = GetICacheFillTicks(current_pc);596if (fill_ticks <= 0)597continue;598599const u32 line = GetICacheLine(current_pc);600const u32 offset = OFFSETOF(State, icache_tags) + (line * sizeof(u32));601602// Offsets must fit in signed 12 bits.603Label cache_hit;604if (offset >= 2048)605{606SafeADDI(maddr_reg, RSTATE, offset);607rvAsm->LW(existing_tag_reg, 0, maddr_reg);608rvAsm->BEQ(existing_tag_reg, current_tag_reg, &cache_hit);609rvAsm->SW(current_tag_reg, 0, maddr_reg);610}611else612{613rvAsm->LW(existing_tag_reg, offset, RSTATE);614rvAsm->BEQ(existing_tag_reg, current_tag_reg, &cache_hit);615rvAsm->SW(current_tag_reg, offset, RSTATE);616}617618SafeADDIW(ticks_reg, ticks_reg, static_cast<u32>(fill_ticks));619rvAsm->Bind(&cache_hit);620621if (i != (m_block->icache_line_count - 1))622SafeADDIW(current_tag_reg, current_tag_reg, ICACHE_LINE_SIZE);623}624625rvAsm->SW(ticks_reg, PTR(&g_state.pending_ticks));626}627}628629void CPU::RISCV64Recompiler::GenerateCall(const void* func, s32 arg1reg /*= -1*/, s32 arg2reg /*= -1*/,630s32 arg3reg /*= -1*/)631{632if (arg1reg >= 0 && arg1reg != static_cast<s32>(RARG1.Index()))633rvAsm->MV(RARG1, GPR(arg1reg));634if (arg2reg >= 0 && arg2reg != static_cast<s32>(RARG2.Index()))635rvAsm->MV(RARG2, GPR(arg2reg));636if (arg3reg >= 0 && arg3reg != static_cast<s32>(RARG3.Index()))637rvAsm->MV(RARG3, GPR(arg3reg));638EmitCall(func);639}640641void CPU::RISCV64Recompiler::EndBlock(const std::optional<u32>& newpc, bool do_event_test)642{643if (newpc.has_value())644{645if (m_dirty_pc || m_compiler_pc != newpc)646{647EmitMov(RSCRATCH, newpc.value());648rvAsm->SW(RSCRATCH, PTR(&g_state.pc));649}650}651m_dirty_pc = false;652653// flush regs654Flush(FLUSH_END_BLOCK);655EndAndLinkBlock(newpc, do_event_test, false);656}657658void CPU::RISCV64Recompiler::EndBlockWithException(Exception excode)659{660// flush regs, but not pc, it's going to get overwritten661// flush cycles because of the GTE instruction stuff...662Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL);663664// TODO: flush load delay665666EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(excode, m_current_instruction_branch_delay_slot, false,667inst->cop.cop_n));668EmitMov(RARG2, m_current_instruction_pc);669if (excode != Exception::BP)670{671EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));672}673else674{675EmitMov(RARG3, inst->bits);676EmitCall(reinterpret_cast<const void*>(&CPU::RaiseBreakException));677}678m_dirty_pc = false;679680EndAndLinkBlock(std::nullopt, true, false);681}682683void CPU::RISCV64Recompiler::EndAndLinkBlock(const std::optional<u32>& newpc, bool do_event_test, bool force_run_events)684{685// event test686// pc should've been flushed687DebugAssert(!m_dirty_pc && !m_block_ended);688m_block_ended = true;689690// TODO: try extracting this to a function691// TODO: move the cycle flush in here..692693// save cycles for event test694const TickCount cycles = std::exchange(m_cycles, 0);695696// pending_ticks += cycles697// if (pending_ticks >= downcount) { dispatch_event(); }698if (do_event_test || m_gte_done_cycle > cycles || cycles > 0)699rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));700if (do_event_test)701rvAsm->LW(RARG2, PTR(&g_state.downcount));702if (cycles > 0)703{704SafeADDIW(RARG1, RARG1, cycles);705rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));706}707if (m_gte_done_cycle > cycles)708{709SafeADDIW(RARG2, RARG1, m_gte_done_cycle - cycles);710rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick));711}712713if (do_event_test)714{715// TODO: see if we can do a far jump somehow with this..716Label cont;717rvAsm->BLT(RARG1, RARG2, &cont);718rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch);719rvAsm->Bind(&cont);720}721722// jump to dispatcher or next block723if (force_run_events)724{725rvEmitJmp(rvAsm, CodeCache::g_run_events_and_dispatch);726}727else if (!newpc.has_value())728{729rvEmitJmp(rvAsm, CodeCache::g_dispatcher);730}731else732{733const void* target =734(newpc.value() == m_block->pc) ?735CodeCache::CreateSelfBlockLink(m_block, rvAsm->GetCursorPointer(), rvAsm->GetBufferPointer(0)) :736CodeCache::CreateBlockLink(m_block, rvAsm->GetCursorPointer(), newpc.value());737rvEmitJmp(rvAsm, target);738}739}740741const void* CPU::RISCV64Recompiler::EndCompile(u32* code_size, u32* far_code_size)742{743u8* const code = m_emitter->GetBufferPointer(0);744*code_size = static_cast<u32>(m_emitter->GetCodeBuffer().GetSizeInBytes());745*far_code_size = static_cast<u32>(m_far_emitter->GetCodeBuffer().GetSizeInBytes());746rvAsm = nullptr;747m_far_emitter.reset();748m_emitter.reset();749return code;750}751752const char* CPU::RISCV64Recompiler::GetHostRegName(u32 reg) const753{754static constexpr std::array<const char*, 32> reg64_names = {755{"zero", "ra", "sp", "gp", "tp", "t0", "t1", "t2", "s0", "s1", "a0", "a1", "a2", "a3", "a4", "a5",756"a6", "a7", "s2", "s3", "s4", "s5", "s6", "s7", "s8", "s9", "s10", "s11", "t3", "t4", "t5", "t6"}};757return (reg < reg64_names.size()) ? reg64_names[reg] : "UNKNOWN";758}759760void CPU::RISCV64Recompiler::LoadHostRegWithConstant(u32 reg, u32 val)761{762EmitMov(GPR(reg), val);763}764765void CPU::RISCV64Recompiler::LoadHostRegFromCPUPointer(u32 reg, const void* ptr)766{767rvAsm->LW(GPR(reg), PTR(ptr));768}769770void CPU::RISCV64Recompiler::StoreHostRegToCPUPointer(u32 reg, const void* ptr)771{772rvAsm->SW(GPR(reg), PTR(ptr));773}774775void CPU::RISCV64Recompiler::StoreConstantToCPUPointer(u32 val, const void* ptr)776{777if (val == 0)778{779rvAsm->SW(zero, PTR(ptr));780return;781}782783EmitMov(RSCRATCH, val);784rvAsm->SW(RSCRATCH, PTR(ptr));785}786787void CPU::RISCV64Recompiler::CopyHostReg(u32 dst, u32 src)788{789if (src != dst)790rvAsm->MV(GPR(dst), GPR(src));791}792793void CPU::RISCV64Recompiler::AssertRegOrConstS(CompileFlags cf) const794{795DebugAssert(cf.valid_host_s || cf.const_s);796}797798void CPU::RISCV64Recompiler::AssertRegOrConstT(CompileFlags cf) const799{800DebugAssert(cf.valid_host_t || cf.const_t);801}802803biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegS(CompileFlags cf, const biscuit::GPR& temp_reg)804{805if (cf.valid_host_s)806{807return GPR(cf.host_s);808}809else if (cf.const_s)810{811if (HasConstantRegValue(cf.MipsS(), 0))812return zero;813814EmitMov(temp_reg, GetConstantRegU32(cf.MipsS()));815return temp_reg;816}817else818{819WARNING_LOG("Hit memory path in CFGetSafeRegS() for {}", GetRegName(cf.MipsS()));820rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_s]));821return temp_reg;822}823}824825biscuit::GPR CPU::RISCV64Recompiler::CFGetSafeRegT(CompileFlags cf, const biscuit::GPR& temp_reg)826{827if (cf.valid_host_t)828{829return GPR(cf.host_t);830}831else if (cf.const_t)832{833if (HasConstantRegValue(cf.MipsT(), 0))834return zero;835836EmitMov(temp_reg, GetConstantRegU32(cf.MipsT()));837return temp_reg;838}839else840{841WARNING_LOG("Hit memory path in CFGetSafeRegT() for {}", GetRegName(cf.MipsT()));842rvAsm->LW(temp_reg, PTR(&g_state.regs.r[cf.mips_t]));843return temp_reg;844}845}846847biscuit::GPR CPU::RISCV64Recompiler::CFGetRegD(CompileFlags cf) const848{849DebugAssert(cf.valid_host_d);850return GPR(cf.host_d);851}852853biscuit::GPR CPU::RISCV64Recompiler::CFGetRegS(CompileFlags cf) const854{855DebugAssert(cf.valid_host_s);856return GPR(cf.host_s);857}858859biscuit::GPR CPU::RISCV64Recompiler::CFGetRegT(CompileFlags cf) const860{861DebugAssert(cf.valid_host_t);862return GPR(cf.host_t);863}864865biscuit::GPR CPU::RISCV64Recompiler::CFGetRegLO(CompileFlags cf) const866{867DebugAssert(cf.valid_host_lo);868return GPR(cf.host_lo);869}870871biscuit::GPR CPU::RISCV64Recompiler::CFGetRegHI(CompileFlags cf) const872{873DebugAssert(cf.valid_host_hi);874return GPR(cf.host_hi);875}876877void CPU::RISCV64Recompiler::MoveSToReg(const biscuit::GPR& dst, CompileFlags cf)878{879if (cf.valid_host_s)880{881if (cf.host_s != dst.Index())882rvAsm->MV(dst, GPR(cf.host_s));883}884else if (cf.const_s)885{886EmitMov(dst, GetConstantRegU32(cf.MipsS()));887}888else889{890WARNING_LOG("Hit memory path in MoveSToReg() for {}", GetRegName(cf.MipsS()));891rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));892}893}894895void CPU::RISCV64Recompiler::MoveTToReg(const biscuit::GPR& dst, CompileFlags cf)896{897if (cf.valid_host_t)898{899if (cf.host_t != dst.Index())900rvAsm->MV(dst, GPR(cf.host_t));901}902else if (cf.const_t)903{904EmitMov(dst, GetConstantRegU32(cf.MipsT()));905}906else907{908WARNING_LOG("Hit memory path in MoveTToReg() for {}", GetRegName(cf.MipsT()));909rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_t]));910}911}912913void CPU::RISCV64Recompiler::MoveMIPSRegToReg(const biscuit::GPR& dst, Reg reg)914{915DebugAssert(reg < Reg::count);916if (const std::optional<u32> hreg = CheckHostReg(0, Recompiler::HR_TYPE_CPU_REG, reg))917rvAsm->MV(dst, GPR(hreg.value()));918else if (HasConstantReg(reg))919EmitMov(dst, GetConstantRegU32(reg));920else921rvAsm->LW(dst, PTR(&g_state.regs.r[static_cast<u8>(reg)]));922}923924void CPU::RISCV64Recompiler::GeneratePGXPCallWithMIPSRegs(const void* func, u32 arg1val, Reg arg2reg /* = Reg::count */,925Reg arg3reg /* = Reg::count */)926{927DebugAssert(g_settings.gpu_pgxp_enable);928929Flush(FLUSH_FOR_C_CALL);930931if (arg2reg != Reg::count)932MoveMIPSRegToReg(RARG2, arg2reg);933if (arg3reg != Reg::count)934MoveMIPSRegToReg(RARG3, arg3reg);935936EmitMov(RARG1, arg1val);937EmitCall(func);938}939940void CPU::RISCV64Recompiler::Flush(u32 flags)941{942Recompiler::Flush(flags);943944if (flags & FLUSH_PC && m_dirty_pc)945{946StoreConstantToCPUPointer(m_compiler_pc, &g_state.pc);947m_dirty_pc = false;948}949950if (flags & FLUSH_INSTRUCTION_BITS)951{952// This sucks, but it's only used for fallbacks.953Panic("Not implemented");954}955956if (flags & FLUSH_LOAD_DELAY_FROM_STATE && m_load_delay_dirty)957{958// This sucks :(959// TODO: make it a function?960rvAsm->LBU(RARG1, PTR(&g_state.load_delay_reg));961rvAsm->LW(RARG2, PTR(&g_state.load_delay_value));962rvAsm->SLLI(RARG1, RARG1, 2); // *4963rvAsm->ADD(RARG1, RARG1, RSTATE);964rvAsm->SW(RARG2, OFFSETOF(CPU::State, regs.r[0]), RARG1);965rvAsm->LI(RSCRATCH, static_cast<u8>(Reg::count));966rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg));967m_load_delay_dirty = false;968}969970if (flags & FLUSH_LOAD_DELAY && m_load_delay_register != Reg::count)971{972if (m_load_delay_value_register != NUM_HOST_REGS)973FreeHostReg(m_load_delay_value_register);974975EmitMov(RSCRATCH, static_cast<u8>(m_load_delay_register));976rvAsm->SB(RSCRATCH, PTR(&g_state.load_delay_reg));977m_load_delay_register = Reg::count;978m_load_delay_dirty = true;979}980981if (flags & FLUSH_GTE_STALL_FROM_STATE && m_dirty_gte_done_cycle)982{983// May as well flush cycles while we're here.984// GTE spanning blocks is very rare, we _could_ disable this for speed.985rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));986rvAsm->LW(RARG2, PTR(&g_state.gte_completion_tick));987if (m_cycles > 0)988{989SafeADDIW(RARG1, RARG1, m_cycles);990m_cycles = 0;991}992Label no_stall;993rvAsm->BGE(RARG1, RARG2, &no_stall);994rvAsm->MV(RARG1, RARG2);995rvAsm->Bind(&no_stall);996rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));997m_dirty_gte_done_cycle = false;998}9991000if (flags & FLUSH_GTE_DONE_CYCLE && m_gte_done_cycle > m_cycles)1001{1002rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));10031004// update cycles at the same time1005if (flags & FLUSH_CYCLES && m_cycles > 0)1006{1007SafeADDIW(RARG1, RARG1, m_cycles);1008rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));1009m_gte_done_cycle -= m_cycles;1010m_cycles = 0;1011}10121013SafeADDIW(RARG1, RARG1, m_gte_done_cycle);1014rvAsm->SW(RARG1, PTR(&g_state.gte_completion_tick));1015m_gte_done_cycle = 0;1016m_dirty_gte_done_cycle = true;1017}10181019if (flags & FLUSH_CYCLES && m_cycles > 0)1020{1021rvAsm->LW(RARG1, PTR(&g_state.pending_ticks));1022SafeADDIW(RARG1, RARG1, m_cycles);1023rvAsm->SW(RARG1, PTR(&g_state.pending_ticks));1024m_gte_done_cycle = std::max<TickCount>(m_gte_done_cycle - m_cycles, 0);1025m_cycles = 0;1026}1027}10281029void CPU::RISCV64Recompiler::Compile_Fallback()1030{1031WARNING_LOG("Compiling instruction fallback at PC=0x{:08X}, instruction=0x{:08X}", m_current_instruction_pc,1032inst->bits);10331034Flush(FLUSH_FOR_INTERPRETER);10351036#if 01037cg->call(&CPU::RecompilerThunks::InterpretInstruction);10381039// TODO: make me less garbage1040// TODO: this is wrong, it flushes the load delay on the same cycle when we return.1041// but nothing should be going through here..1042Label no_load_delay;1043cg->movzx(RWARG1, cg->byte[PTR(&g_state.next_load_delay_reg)]);1044cg->cmp(RWARG1, static_cast<u8>(Reg::count));1045cg->je(no_load_delay, CodeGenerator::T_SHORT);1046cg->mov(RWARG2, cg->dword[PTR(&g_state.next_load_delay_value)]);1047cg->mov(cg->byte[PTR(&g_state.load_delay_reg)], RWARG1);1048cg->mov(cg->dword[PTR(&g_state.load_delay_value)], RWARG2);1049cg->mov(cg->byte[PTR(&g_state.next_load_delay_reg)], static_cast<u32>(Reg::count));1050cg->L(no_load_delay);10511052m_load_delay_dirty = EMULATE_LOAD_DELAYS;1053#else1054Panic("Fixme");1055#endif1056}10571058void CPU::RISCV64Recompiler::CheckBranchTarget(const biscuit::GPR& pcreg)1059{1060if (!g_settings.cpu_recompiler_memory_exceptions)1061return;10621063DebugAssert(pcreg != RSCRATCH);1064rvAsm->ANDI(RSCRATCH, pcreg, 0x3);1065SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero);10661067BackupHostState();1068EndBlockWithException(Exception::AdEL);10691070RestoreHostState();1071SwitchToNearCode(false);1072}10731074void CPU::RISCV64Recompiler::Compile_jr(CompileFlags cf)1075{1076const GPR pcreg = CFGetRegS(cf);1077CheckBranchTarget(pcreg);10781079rvAsm->SW(pcreg, PTR(&g_state.pc));10801081CompileBranchDelaySlot(false);1082EndBlock(std::nullopt, true);1083}10841085void CPU::RISCV64Recompiler::Compile_jalr(CompileFlags cf)1086{1087const GPR pcreg = CFGetRegS(cf);1088if (MipsD() != Reg::zero)1089SetConstantReg(MipsD(), GetBranchReturnAddress(cf));10901091CheckBranchTarget(pcreg);1092rvAsm->SW(pcreg, PTR(&g_state.pc));10931094CompileBranchDelaySlot(false);1095EndBlock(std::nullopt, true);1096}10971098void CPU::RISCV64Recompiler::Compile_bxx(CompileFlags cf, BranchCondition cond)1099{1100AssertRegOrConstS(cf);11011102const u32 taken_pc = GetConditionalBranchTarget(cf);11031104Flush(FLUSH_FOR_BRANCH);11051106DebugAssert(cf.valid_host_s);11071108// MipsT() here should equal zero for zero branches.1109DebugAssert(cond == BranchCondition::Equal || cond == BranchCondition::NotEqual || cf.MipsT() == Reg::zero);11101111Label taken;1112const GPR rs = CFGetRegS(cf);1113switch (cond)1114{1115case BranchCondition::Equal:1116case BranchCondition::NotEqual:1117{1118AssertRegOrConstT(cf);1119if (cf.const_t && HasConstantRegValue(cf.MipsT(), 0))1120{1121(cond == BranchCondition::Equal) ? rvAsm->BEQZ(rs, &taken) : rvAsm->BNEZ(rs, &taken);1122}1123else1124{1125const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG1;1126if (!cf.valid_host_t)1127MoveTToReg(RARG1, cf);1128if (cond == Recompiler::BranchCondition::Equal)1129rvAsm->BEQ(rs, rt, &taken);1130else1131rvAsm->BNE(rs, rt, &taken);1132}1133}1134break;11351136case BranchCondition::GreaterThanZero:1137{1138rvAsm->BGTZ(rs, &taken);1139}1140break;11411142case BranchCondition::GreaterEqualZero:1143{1144rvAsm->BGEZ(rs, &taken);1145}1146break;11471148case BranchCondition::LessThanZero:1149{1150rvAsm->BLTZ(rs, &taken);1151}1152break;11531154case BranchCondition::LessEqualZero:1155{1156rvAsm->BLEZ(rs, &taken);1157}1158break;1159}11601161BackupHostState();1162if (!cf.delay_slot_swapped)1163CompileBranchDelaySlot();11641165EndBlock(m_compiler_pc, true);11661167rvAsm->Bind(&taken);11681169RestoreHostState();1170if (!cf.delay_slot_swapped)1171CompileBranchDelaySlot();11721173EndBlock(taken_pc, true);1174}11751176void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf, bool overflow)1177{1178const GPR rs = CFGetRegS(cf);1179const GPR rt = CFGetRegT(cf);1180if (const u32 imm = inst->i.imm_sext32(); imm != 0)1181{1182if (!overflow)1183{1184SafeADDIW(rt, rs, imm);1185}1186else1187{1188SafeADDI(RARG1, rs, imm);1189SafeADDIW(rt, rs, imm);1190TestOverflow(RARG1, rt, rt);1191}1192}1193else if (rt.Index() != rs.Index())1194{1195rvAsm->MV(rt, rs);1196}1197}11981199void CPU::RISCV64Recompiler::Compile_addi(CompileFlags cf)1200{1201Compile_addi(cf, g_settings.cpu_recompiler_memory_exceptions);1202}12031204void CPU::RISCV64Recompiler::Compile_addiu(CompileFlags cf)1205{1206Compile_addi(cf, false);1207}12081209void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf)1210{1211Compile_slti(cf, true);1212}12131214void CPU::RISCV64Recompiler::Compile_sltiu(CompileFlags cf)1215{1216Compile_slti(cf, false);1217}12181219void CPU::RISCV64Recompiler::Compile_slti(CompileFlags cf, bool sign)1220{1221if (sign)1222SafeSLTI(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());1223else1224SafeSLTIU(CFGetRegT(cf), CFGetRegS(cf), inst->i.imm_sext32());1225}12261227void CPU::RISCV64Recompiler::Compile_andi(CompileFlags cf)1228{1229const GPR rt = CFGetRegT(cf);1230if (const u32 imm = inst->i.imm_zext32(); imm != 0)1231SafeANDI(rt, CFGetRegS(cf), imm);1232else1233EmitMov(rt, 0);1234}12351236void CPU::RISCV64Recompiler::Compile_ori(CompileFlags cf)1237{1238const GPR rt = CFGetRegT(cf);1239const GPR rs = CFGetRegS(cf);1240if (const u32 imm = inst->i.imm_zext32(); imm != 0)1241SafeORI(rt, rs, imm);1242else if (rt.Index() != rs.Index())1243rvAsm->MV(rt, rs);1244}12451246void CPU::RISCV64Recompiler::Compile_xori(CompileFlags cf)1247{1248const GPR rt = CFGetRegT(cf);1249const GPR rs = CFGetRegS(cf);1250if (const u32 imm = inst->i.imm_zext32(); imm != 0)1251SafeXORI(rt, rs, imm);1252else if (rt.Index() != rs.Index())1253rvAsm->MV(rt, rs);1254}12551256void CPU::RISCV64Recompiler::Compile_shift(CompileFlags cf,1257void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),1258void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))1259{1260const GPR rd = CFGetRegD(cf);1261const GPR rt = CFGetRegT(cf);1262if (inst->r.shamt > 0)1263(rvAsm->*op_const)(rd, rt, inst->r.shamt);1264else if (rd.Index() != rt.Index())1265rvAsm->MV(rd, rt);1266}12671268void CPU::RISCV64Recompiler::Compile_sll(CompileFlags cf)1269{1270Compile_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);1271}12721273void CPU::RISCV64Recompiler::Compile_srl(CompileFlags cf)1274{1275Compile_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);1276}12771278void CPU::RISCV64Recompiler::Compile_sra(CompileFlags cf)1279{1280Compile_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);1281}12821283void CPU::RISCV64Recompiler::Compile_variable_shift(1284CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),1285void (biscuit::Assembler::*op_const)(biscuit::GPR, biscuit::GPR, unsigned))1286{1287const GPR rd = CFGetRegD(cf);12881289AssertRegOrConstS(cf);1290AssertRegOrConstT(cf);12911292const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;1293if (!cf.valid_host_t)1294MoveTToReg(rt, cf);12951296if (cf.const_s)1297{1298if (const u32 shift = GetConstantRegU32(cf.MipsS()); shift != 0)1299(rvAsm->*op_const)(rd, rt, shift & 31u);1300else if (rd.Index() != rt.Index())1301rvAsm->MV(rd, rt);1302}1303else1304{1305(rvAsm->*op)(rd, rt, CFGetRegS(cf));1306}1307}13081309void CPU::RISCV64Recompiler::Compile_sllv(CompileFlags cf)1310{1311Compile_variable_shift(cf, &Assembler::SLLW, &Assembler::SLLIW);1312}13131314void CPU::RISCV64Recompiler::Compile_srlv(CompileFlags cf)1315{1316Compile_variable_shift(cf, &Assembler::SRLW, &Assembler::SRLIW);1317}13181319void CPU::RISCV64Recompiler::Compile_srav(CompileFlags cf)1320{1321Compile_variable_shift(cf, &Assembler::SRAW, &Assembler::SRAIW);1322}13231324void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf, bool sign)1325{1326const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;1327if (!cf.valid_host_s)1328MoveSToReg(rs, cf);13291330const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;1331if (!cf.valid_host_t)1332MoveTToReg(rt, cf);13331334// TODO: if lo/hi gets killed, we can use a 32-bit multiply1335const GPR lo = CFGetRegLO(cf);1336const GPR hi = CFGetRegHI(cf);13371338if (sign)1339{1340rvAsm->MUL(lo, rs, rt);1341rvAsm->SRAI64(hi, lo, 32);1342EmitDSExtW(lo, lo);1343}1344else1345{1346// Need to make it unsigned.1347EmitDUExtW(RARG1, rs);1348EmitDUExtW(RARG2, rt);1349rvAsm->MUL(lo, RARG1, RARG2);1350rvAsm->SRAI64(hi, lo, 32);1351EmitDSExtW(lo, lo);1352}1353}13541355void CPU::RISCV64Recompiler::Compile_mult(CompileFlags cf)1356{1357Compile_mult(cf, true);1358}13591360void CPU::RISCV64Recompiler::Compile_multu(CompileFlags cf)1361{1362Compile_mult(cf, false);1363}13641365void CPU::RISCV64Recompiler::Compile_div(CompileFlags cf)1366{1367// 36 Volume I: RISC-V User-Level ISA V2.21368const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;1369if (!cf.valid_host_s)1370MoveSToReg(rs, cf);13711372const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;1373if (!cf.valid_host_t)1374MoveTToReg(rt, cf);13751376const GPR rlo = CFGetRegLO(cf);1377const GPR rhi = CFGetRegHI(cf);13781379Label done;1380Label not_divide_by_zero;1381rvAsm->BNEZ(rt, ¬_divide_by_zero);1382rvAsm->MV(rhi, rs); // hi = num1383rvAsm->SRAI64(rlo, rs, 63);1384rvAsm->ANDI(rlo, rlo, 2);1385rvAsm->ADDI(rlo, rlo, -1); // lo = s >= 0 ? -1 : 11386rvAsm->J(&done);13871388rvAsm->Bind(¬_divide_by_zero);1389Label not_unrepresentable;1390EmitMov(RSCRATCH, static_cast<u32>(-1));1391rvAsm->BNE(rt, RSCRATCH, ¬_unrepresentable);1392EmitMov(rlo, 0x80000000u);1393rvAsm->BNE(rs, rlo, ¬_unrepresentable);1394EmitMov(rhi, 0);1395rvAsm->J(&done);13961397rvAsm->Bind(¬_unrepresentable);13981399rvAsm->DIVW(rlo, rs, rt);1400rvAsm->REMW(rhi, rs, rt);14011402rvAsm->Bind(&done);1403}14041405void CPU::RISCV64Recompiler::Compile_divu(CompileFlags cf)1406{1407const GPR rs = cf.valid_host_s ? CFGetRegS(cf) : RARG1;1408if (!cf.valid_host_s)1409MoveSToReg(rs, cf);14101411const GPR rt = cf.valid_host_t ? CFGetRegT(cf) : RARG2;1412if (!cf.valid_host_t)1413MoveTToReg(rt, cf);14141415const GPR rlo = CFGetRegLO(cf);1416const GPR rhi = CFGetRegHI(cf);14171418// Semantics match? :-)1419rvAsm->DIVUW(rlo, rs, rt);1420rvAsm->REMUW(rhi, rs, rt);1421}14221423void CPU::RISCV64Recompiler::TestOverflow(const biscuit::GPR& long_res, const biscuit::GPR& res,1424const biscuit::GPR& reg_to_discard)1425{1426SwitchToFarCode(true, &Assembler::BEQ, long_res, res);14271428BackupHostState();14291430// toss the result1431ClearHostReg(reg_to_discard.Index());14321433EndBlockWithException(Exception::Ov);14341435RestoreHostState();14361437SwitchToNearCode(false);1438}14391440void CPU::RISCV64Recompiler::Compile_dst_op(1441CompileFlags cf, void (biscuit::Assembler::*op)(biscuit::GPR, biscuit::GPR, biscuit::GPR),1442void (RISCV64Recompiler::*op_const)(const biscuit::GPR& rd, const biscuit::GPR& rs, u32 imm),1443void (biscuit::Assembler::*op_long)(biscuit::GPR, biscuit::GPR, biscuit::GPR), bool commutative, bool overflow)1444{1445AssertRegOrConstS(cf);1446AssertRegOrConstT(cf);14471448const GPR rd = CFGetRegD(cf);14491450if (overflow)1451{1452const GPR rs = CFGetSafeRegS(cf, RARG1);1453const GPR rt = CFGetSafeRegT(cf, RARG2);1454(rvAsm->*op)(RARG3, rs, rt);1455(rvAsm->*op_long)(rd, rs, rt);1456TestOverflow(RARG3, rd, rd);1457return;1458}14591460if (cf.valid_host_s && cf.valid_host_t)1461{1462(rvAsm->*op)(rd, CFGetRegS(cf), CFGetRegT(cf));1463}1464else if (commutative && (cf.const_s || cf.const_t))1465{1466const GPR src = cf.const_s ? CFGetRegT(cf) : CFGetRegS(cf);1467if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)1468{1469(this->*op_const)(rd, src, cv);1470}1471else1472{1473if (rd.Index() != src.Index())1474rvAsm->MV(rd, src);1475overflow = false;1476}1477}1478else if (cf.const_s)1479{1480if (HasConstantRegValue(cf.MipsS(), 0))1481{1482(rvAsm->*op)(rd, zero, CFGetRegT(cf));1483}1484else1485{1486EmitMov(RSCRATCH, GetConstantRegU32(cf.MipsS()));1487(rvAsm->*op)(rd, RSCRATCH, CFGetRegT(cf));1488}1489}1490else if (cf.const_t)1491{1492const GPR rs = CFGetRegS(cf);1493if (const u32 cv = GetConstantRegU32(cf.const_s ? cf.MipsS() : cf.MipsT()); cv != 0)1494{1495(this->*op_const)(rd, rs, cv);1496}1497else1498{1499if (rd.Index() != rs.Index())1500rvAsm->MV(rd, rs);1501overflow = false;1502}1503}1504}15051506void CPU::RISCV64Recompiler::Compile_add(CompileFlags cf)1507{1508Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true,1509g_settings.cpu_recompiler_memory_exceptions);1510}15111512void CPU::RISCV64Recompiler::Compile_addu(CompileFlags cf)1513{1514Compile_dst_op(cf, &Assembler::ADDW, &RISCV64Recompiler::SafeADDIW, &Assembler::ADD, true, false);1515}15161517void CPU::RISCV64Recompiler::Compile_sub(CompileFlags cf)1518{1519Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false,1520g_settings.cpu_recompiler_memory_exceptions);1521}15221523void CPU::RISCV64Recompiler::Compile_subu(CompileFlags cf)1524{1525Compile_dst_op(cf, &Assembler::SUBW, &RISCV64Recompiler::SafeSUBIW, &Assembler::SUB, false, false);1526}15271528void CPU::RISCV64Recompiler::Compile_and(CompileFlags cf)1529{1530AssertRegOrConstS(cf);1531AssertRegOrConstT(cf);15321533// special cases - and with self -> self, and with 0 -> 01534const GPR regd = CFGetRegD(cf);1535if (cf.MipsS() == cf.MipsT())1536{1537rvAsm->MV(regd, CFGetRegS(cf));1538return;1539}1540else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0))1541{1542EmitMov(regd, 0);1543return;1544}15451546Compile_dst_op(cf, &Assembler::AND, &RISCV64Recompiler::SafeANDI, &Assembler::AND, true, false);1547}15481549void CPU::RISCV64Recompiler::Compile_or(CompileFlags cf)1550{1551AssertRegOrConstS(cf);1552AssertRegOrConstT(cf);15531554// or/nor with 0 -> no effect1555const GPR regd = CFGetRegD(cf);1556if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0) || cf.MipsS() == cf.MipsT())1557{1558cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);1559return;1560}15611562Compile_dst_op(cf, &Assembler::OR, &RISCV64Recompiler::SafeORI, &Assembler::OR, true, false);1563}15641565void CPU::RISCV64Recompiler::Compile_xor(CompileFlags cf)1566{1567AssertRegOrConstS(cf);1568AssertRegOrConstT(cf);15691570const GPR regd = CFGetRegD(cf);1571if (cf.MipsS() == cf.MipsT())1572{1573// xor with self -> zero1574EmitMov(regd, 0);1575return;1576}1577else if (HasConstantRegValue(cf.MipsS(), 0) || HasConstantRegValue(cf.MipsT(), 0))1578{1579// xor with zero -> no effect1580cf.const_s ? MoveTToReg(regd, cf) : MoveSToReg(regd, cf);1581return;1582}15831584Compile_dst_op(cf, &Assembler::XOR, &RISCV64Recompiler::SafeXORI, &Assembler::XOR, true, false);1585}15861587void CPU::RISCV64Recompiler::Compile_nor(CompileFlags cf)1588{1589Compile_or(cf);1590rvAsm->NOT(CFGetRegD(cf), CFGetRegD(cf));1591}15921593void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf)1594{1595Compile_slt(cf, true);1596}15971598void CPU::RISCV64Recompiler::Compile_sltu(CompileFlags cf)1599{1600Compile_slt(cf, false);1601}16021603void CPU::RISCV64Recompiler::Compile_slt(CompileFlags cf, bool sign)1604{1605AssertRegOrConstS(cf);1606AssertRegOrConstT(cf);16071608const GPR rd = CFGetRegD(cf);1609const GPR rs = CFGetSafeRegS(cf, RARG1);16101611if (cf.const_t && rvIsValidSExtITypeImm(GetConstantRegU32(cf.MipsT())))1612{1613if (sign)1614rvAsm->SLTI(rd, rs, GetConstantRegS32(cf.MipsT()));1615else1616rvAsm->SLTIU(rd, rs, GetConstantRegS32(cf.MipsT()));1617}1618else1619{1620const GPR rt = CFGetSafeRegT(cf, RARG2);1621if (sign)1622rvAsm->SLT(rd, rs, rt);1623else1624rvAsm->SLTU(rd, rs, rt);1625}1626}16271628biscuit::GPR CPU::RISCV64Recompiler::ComputeLoadStoreAddressArg(CompileFlags cf,1629const std::optional<VirtualMemoryAddress>& address,1630const std::optional<const biscuit::GPR>& reg)1631{1632const u32 imm = inst->i.imm_sext32();1633if (cf.valid_host_s && imm == 0 && !reg.has_value())1634return CFGetRegS(cf);16351636const GPR dst = reg.has_value() ? reg.value() : RARG1;1637if (address.has_value())1638{1639EmitMov(dst, address.value());1640}1641else if (imm == 0)1642{1643if (cf.valid_host_s)1644{1645if (const GPR src = CFGetRegS(cf); src.Index() != dst.Index())1646rvAsm->MV(dst, CFGetRegS(cf));1647}1648else1649{1650rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));1651}1652}1653else1654{1655if (cf.valid_host_s)1656{1657SafeADDIW(dst, CFGetRegS(cf), inst->i.imm_sext32());1658}1659else1660{1661rvAsm->LW(dst, PTR(&g_state.regs.r[cf.mips_s]));1662SafeADDIW(dst, dst, inst->i.imm_sext32());1663}1664}16651666return dst;1667}16681669template<typename RegAllocFn>1670biscuit::GPR CPU::RISCV64Recompiler::GenerateLoad(const biscuit::GPR& addr_reg, MemoryAccessSize size, bool sign,1671bool use_fastmem, const RegAllocFn& dst_reg_alloc)1672{1673if (use_fastmem)1674{1675m_cycles += Bus::RAM_READ_TICKS;16761677// TODO: Make this better. If we're loading the address from state, we can use LWU instead, and skip this.1678// TODO: LUT fastmem1679const GPR dst = dst_reg_alloc();1680rvAsm->SLLI64(RSCRATCH, addr_reg, 32);1681rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32);16821683if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)1684{1685DebugAssert(addr_reg.Index() != RARG3.Index());1686rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT);1687rvAsm->SLLI64(RARG3, RARG3, 8);1688rvAsm->ADD(RARG3, RARG3, RMEMBASE);1689rvAsm->LD(RARG3, 0, RARG3);1690rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3);1691}1692else1693{1694rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE);1695}16961697u8* start = m_emitter->GetCursorPointer();1698switch (size)1699{1700case MemoryAccessSize::Byte:1701sign ? rvAsm->LB(dst, 0, RSCRATCH) : rvAsm->LBU(dst, 0, RSCRATCH);1702break;17031704case MemoryAccessSize::HalfWord:1705sign ? rvAsm->LH(dst, 0, RSCRATCH) : rvAsm->LHU(dst, 0, RSCRATCH);1706break;17071708case MemoryAccessSize::Word:1709rvAsm->LW(dst, 0, RSCRATCH);1710break;1711}17121713// We need a nop, because the slowmem jump might be more than 1MB away.1714rvAsm->NOP();17151716AddLoadStoreInfo(start, 8, addr_reg.Index(), dst.Index(), size, sign, true);1717return dst;1718}17191720if (addr_reg.Index() != RARG1.Index())1721rvAsm->MV(RARG1, addr_reg);17221723const bool checked = g_settings.cpu_recompiler_memory_exceptions;1724switch (size)1725{1726case MemoryAccessSize::Byte:1727{1728EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryByte) :1729reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryByte));1730}1731break;1732case MemoryAccessSize::HalfWord:1733{1734EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryHalfWord) :1735reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryHalfWord));1736}1737break;1738case MemoryAccessSize::Word:1739{1740EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::ReadMemoryWord) :1741reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryWord));1742}1743break;1744}17451746// TODO: turn this into an asm function instead1747if (checked)1748{1749rvAsm->SRLI64(RSCRATCH, RRET, 63);1750SwitchToFarCode(true, &Assembler::BEQ, RSCRATCH, zero);1751BackupHostState();17521753// Need to stash this in a temp because of the flush.1754const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));1755rvAsm->NEG(temp, RRET);1756rvAsm->SLLIW(temp, temp, 2);17571758Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION);17591760// cause_bits = (-result << 2) | BD | cop_n1761SafeORI(RARG1, temp,1762Cop0Registers::CAUSE::MakeValueForException(1763static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n));1764EmitMov(RARG2, m_current_instruction_pc);1765EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));1766FreeHostReg(temp.Index());1767EndBlock(std::nullopt, true);17681769RestoreHostState();1770SwitchToNearCode(false);1771}17721773const GPR dst_reg = dst_reg_alloc();1774switch (size)1775{1776case MemoryAccessSize::Byte:1777{1778sign ? EmitSExtB(dst_reg, RRET) : EmitUExtB(dst_reg, RRET);1779}1780break;1781case MemoryAccessSize::HalfWord:1782{1783sign ? EmitSExtH(dst_reg, RRET) : EmitUExtH(dst_reg, RRET);1784}1785break;1786case MemoryAccessSize::Word:1787{1788// Need to undo the zero-extend.1789if (checked)1790rvEmitDSExtW(rvAsm, dst_reg, RRET);1791else if (dst_reg.Index() != RRET.Index())1792rvAsm->MV(dst_reg, RRET);1793}1794break;1795}17961797return dst_reg;1798}17991800void CPU::RISCV64Recompiler::GenerateStore(const biscuit::GPR& addr_reg, const biscuit::GPR& value_reg,1801MemoryAccessSize size, bool use_fastmem)1802{1803if (use_fastmem)1804{1805DebugAssert(value_reg != RSCRATCH);1806rvAsm->SLLI64(RSCRATCH, addr_reg, 32);1807rvAsm->SRLI64(RSCRATCH, RSCRATCH, 32);18081809if (g_settings.cpu_fastmem_mode == CPUFastmemMode::LUT)1810{1811DebugAssert(addr_reg.Index() != RARG3.Index());1812rvAsm->SRLI64(RARG3, RSCRATCH, Bus::FASTMEM_LUT_PAGE_SHIFT);1813rvAsm->SLLI64(RARG3, RARG3, 8);1814rvAsm->ADD(RARG3, RARG3, RMEMBASE);1815rvAsm->LD(RARG3, 0, RARG3);1816rvAsm->ADD(RSCRATCH, RSCRATCH, RARG3);1817}1818else1819{1820rvAsm->ADD(RSCRATCH, RSCRATCH, RMEMBASE);1821}18221823u8* start = m_emitter->GetCursorPointer();1824switch (size)1825{1826case MemoryAccessSize::Byte:1827rvAsm->SB(value_reg, 0, RSCRATCH);1828break;18291830case MemoryAccessSize::HalfWord:1831rvAsm->SH(value_reg, 0, RSCRATCH);1832break;18331834case MemoryAccessSize::Word:1835rvAsm->SW(value_reg, 0, RSCRATCH);1836break;1837}18381839// We need a nop, because the slowmem jump might be more than 1MB away.1840rvAsm->NOP();18411842AddLoadStoreInfo(start, 8, addr_reg.Index(), value_reg.Index(), size, false, false);1843return;1844}18451846if (addr_reg.Index() != RARG1.Index())1847rvAsm->MV(RARG1, addr_reg);1848if (value_reg.Index() != RARG2.Index())1849rvAsm->MV(RARG2, value_reg);18501851const bool checked = g_settings.cpu_recompiler_memory_exceptions;1852switch (size)1853{1854case MemoryAccessSize::Byte:1855{1856EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryByte) :1857reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryByte));1858}1859break;1860case MemoryAccessSize::HalfWord:1861{1862EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryHalfWord) :1863reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryHalfWord));1864}1865break;1866case MemoryAccessSize::Word:1867{1868EmitCall(checked ? reinterpret_cast<const void*>(&RecompilerThunks::WriteMemoryWord) :1869reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryWord));1870}1871break;1872}18731874// TODO: turn this into an asm function instead1875if (checked)1876{1877SwitchToFarCode(true, &Assembler::BEQ, RRET, zero);1878BackupHostState();18791880// Need to stash this in a temp because of the flush.1881const GPR temp = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));1882rvAsm->SLLIW(temp, RRET, 2);18831884Flush(FLUSH_FOR_C_CALL | FLUSH_FLUSH_MIPS_REGISTERS | FLUSH_FOR_EXCEPTION);18851886// cause_bits = (result << 2) | BD | cop_n1887SafeORI(RARG1, temp,1888Cop0Registers::CAUSE::MakeValueForException(1889static_cast<Exception>(0), m_current_instruction_branch_delay_slot, false, inst->cop.cop_n));1890EmitMov(RARG2, m_current_instruction_pc);1891EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));1892FreeHostReg(temp.Index());1893EndBlock(std::nullopt, true);18941895RestoreHostState();1896SwitchToNearCode(false);1897}1898}18991900void CPU::RISCV64Recompiler::Compile_lxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,1901const std::optional<VirtualMemoryAddress>& address)1902{1903const std::optional<GPR> addr_reg = (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero) ?1904std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) :1905std::optional<GPR>();1906FlushForLoadStore(address, false, use_fastmem);1907const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);1908const GPR data = GenerateLoad(addr, size, sign, use_fastmem, [this, cf]() {1909if (cf.MipsT() == Reg::zero)1910return RRET;19111912return GPR(AllocateHostReg(GetFlagsForNewLoadDelayedReg(),1913EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, cf.MipsT()));1914});19151916if (g_settings.gpu_pgxp_enable && cf.MipsT() != Reg::zero)1917{1918Flush(FLUSH_FOR_C_CALL);19191920EmitMov(RARG1, inst->bits);1921rvAsm->MV(RARG2, addr);1922rvAsm->MV(RARG3, data);1923EmitCall(s_pgxp_mem_load_functions[static_cast<u32>(size)][static_cast<u32>(sign)]);1924FreeHostReg(addr_reg.value().Index());1925}1926}19271928void CPU::RISCV64Recompiler::Compile_lwx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,1929const std::optional<VirtualMemoryAddress>& address)1930{1931DebugAssert(size == MemoryAccessSize::Word && !sign);19321933const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));1934FlushForLoadStore(address, false, use_fastmem);19351936// TODO: if address is constant, this can be simplified..19371938// If we're coming from another block, just flush the load delay and hope for the best..1939if (m_load_delay_dirty)1940UpdateLoadDelay();19411942// We'd need to be careful here if we weren't overwriting it..1943ComputeLoadStoreAddressArg(cf, address, addr);1944rvAsm->ANDI(RARG1, addr, ~0x3u);1945GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });19461947if (inst->r.rt == Reg::zero)1948{1949FreeHostReg(addr.Index());1950return;1951}19521953// lwl/lwr from a load-delayed value takes the new value, but it itself, is load delayed, so the original value is1954// never written back. NOTE: can't trust T in cf because of the flush1955const Reg rt = inst->r.rt;1956GPR value;1957if (m_load_delay_register == rt)1958{1959const u32 existing_ld_rt = (m_load_delay_value_register == NUM_HOST_REGS) ?1960AllocateHostReg(HR_MODE_READ, HR_TYPE_LOAD_DELAY_VALUE, rt) :1961m_load_delay_value_register;1962RenameHostReg(existing_ld_rt, HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt);1963value = GPR(existing_ld_rt);1964}1965else1966{1967if constexpr (EMULATE_LOAD_DELAYS)1968{1969value = GPR(AllocateHostReg(HR_MODE_WRITE, HR_TYPE_NEXT_LOAD_DELAY_VALUE, rt));1970if (const std::optional<u32> rtreg = CheckHostReg(HR_MODE_READ, HR_TYPE_CPU_REG, rt); rtreg.has_value())1971rvAsm->MV(value, GPR(rtreg.value()));1972else if (HasConstantReg(rt))1973EmitMov(value, GetConstantRegU32(rt));1974else1975rvAsm->LW(value, PTR(&g_state.regs.r[static_cast<u8>(rt)]));1976}1977else1978{1979value = GPR(AllocateHostReg(HR_MODE_READ | HR_MODE_WRITE, HR_TYPE_CPU_REG, rt));1980}1981}19821983DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index());1984rvAsm->ANDI(RARG2, addr, 3);1985rvAsm->SLLIW(RARG2, RARG2, 3); // *81986EmitMov(RARG3, 24);1987rvAsm->SUBW(RARG3, RARG3, RARG2);19881989if (inst->op == InstructionOp::lwl)1990{1991// const u32 mask = UINT32_C(0x00FFFFFF) >> shift;1992// new_value = (value & mask) | (RWRET << (24 - shift));1993EmitMov(RSCRATCH, 0xFFFFFFu);1994rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG2);1995rvAsm->AND(value, value, RSCRATCH);1996rvAsm->SLLW(RRET, RRET, RARG3);1997rvAsm->OR(value, value, RRET);1998}1999else2000{2001// const u32 mask = UINT32_C(0xFFFFFF00) << (24 - shift);2002// new_value = (value & mask) | (RWRET >> shift);2003rvAsm->SRLW(RRET, RRET, RARG2);2004EmitMov(RSCRATCH, 0xFFFFFF00u);2005rvAsm->SLLW(RSCRATCH, RSCRATCH, RARG3);2006rvAsm->AND(value, value, RSCRATCH);2007rvAsm->OR(value, value, RRET);2008}20092010FreeHostReg(addr.Index());20112012if (g_settings.gpu_pgxp_enable)2013{2014Flush(FLUSH_FOR_C_CALL);2015rvAsm->MV(RARG3, value);2016rvAsm->ANDI(RARG2, addr, ~0x3u);2017EmitMov(RARG1, inst->bits);2018EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LW));2019}2020}20212022void CPU::RISCV64Recompiler::Compile_lwc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,2023const std::optional<VirtualMemoryAddress>& address)2024{2025const u32 index = static_cast<u32>(inst->r.rt.GetValue());2026const auto [ptr, action] = GetGTERegisterPointer(index, true);2027const std::optional<GPR> addr_reg =2028g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();2029FlushForLoadStore(address, false, use_fastmem);2030const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);2031const GPR value = GenerateLoad(addr, MemoryAccessSize::Word, false, use_fastmem, [this, action = action]() {2032return (action == GTERegisterAccessAction::CallHandler && g_settings.gpu_pgxp_enable) ?2033GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) :2034RRET;2035});20362037switch (action)2038{2039case GTERegisterAccessAction::Ignore:2040{2041break;2042}20432044case GTERegisterAccessAction::Direct:2045{2046rvAsm->SW(value, PTR(ptr));2047break;2048}20492050case GTERegisterAccessAction::SignExtend16:2051{2052EmitSExtH(RARG3, value);2053rvAsm->SW(RARG3, PTR(ptr));2054break;2055}20562057case GTERegisterAccessAction::ZeroExtend16:2058{2059EmitUExtH(RARG3, value);2060rvAsm->SW(RARG3, PTR(ptr));2061break;2062}20632064case GTERegisterAccessAction::CallHandler:2065{2066Flush(FLUSH_FOR_C_CALL);2067rvAsm->MV(RARG2, value);2068EmitMov(RARG1, index);2069EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));2070break;2071}20722073case GTERegisterAccessAction::PushFIFO:2074{2075// SXY0 <- SXY12076// SXY1 <- SXY22077// SXY2 <- SXYP2078DebugAssert(value.Index() != RARG2.Index() && value.Index() != RARG3.Index());2079rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));2080rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));2081rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));2082rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));2083rvAsm->SW(value, PTR(&g_state.gte_regs.SXY2[0]));2084break;2085}20862087default:2088{2089Panic("Unknown action");2090return;2091}2092}20932094if (g_settings.gpu_pgxp_enable)2095{2096Flush(FLUSH_FOR_C_CALL);2097rvAsm->MV(RARG3, value);2098if (value.Index() != RRET.Index())2099FreeHostReg(value.Index());2100rvAsm->MV(RARG2, addr);2101FreeHostReg(addr_reg.value().Index());2102EmitMov(RARG1, inst->bits);2103EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_LWC2));2104}2105}21062107void CPU::RISCV64Recompiler::Compile_sxx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,2108const std::optional<VirtualMemoryAddress>& address)2109{2110AssertRegOrConstS(cf);2111AssertRegOrConstT(cf);21122113const std::optional<GPR> addr_reg =2114g_settings.gpu_pgxp_enable ? std::optional<GPR>(GPR(AllocateTempHostReg(HR_CALLEE_SAVED))) : std::optional<GPR>();2115FlushForLoadStore(address, true, use_fastmem);2116const GPR addr = ComputeLoadStoreAddressArg(cf, address, addr_reg);2117const GPR data = cf.valid_host_t ? CFGetRegT(cf) : RARG2;2118if (!cf.valid_host_t)2119MoveTToReg(RARG2, cf);21202121GenerateStore(addr, data, size, use_fastmem);21222123if (g_settings.gpu_pgxp_enable)2124{2125Flush(FLUSH_FOR_C_CALL);2126MoveMIPSRegToReg(RARG3, cf.MipsT());2127rvAsm->MV(RARG2, addr);2128EmitMov(RARG1, inst->bits);2129EmitCall(s_pgxp_mem_store_functions[static_cast<u32>(size)]);2130FreeHostReg(addr_reg.value().Index());2131}2132}21332134void CPU::RISCV64Recompiler::Compile_swx(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,2135const std::optional<VirtualMemoryAddress>& address)2136{2137DebugAssert(size == MemoryAccessSize::Word && !sign);21382139// TODO: this can take over rt's value if it's no longer needed2140// NOTE: can't trust T in cf because of the alloc2141const GPR addr = GPR(AllocateTempHostReg(HR_CALLEE_SAVED));2142const GPR value = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;2143if (g_settings.gpu_pgxp_enable)2144MoveMIPSRegToReg(value, inst->r.rt);21452146FlushForLoadStore(address, true, use_fastmem);21472148// TODO: if address is constant, this can be simplified..2149// We'd need to be careful here if we weren't overwriting it..2150ComputeLoadStoreAddressArg(cf, address, addr);2151rvAsm->ANDI(RARG1, addr, ~0x3u);2152GenerateLoad(RARG1, MemoryAccessSize::Word, false, use_fastmem, []() { return RRET; });21532154rvAsm->ANDI(RSCRATCH, addr, 3);2155rvAsm->SLLIW(RSCRATCH, RSCRATCH, 3); // *82156rvAsm->ANDI(addr, addr, ~0x3u);21572158// Need to load down here for PGXP-off, because it's in a volatile reg that can get overwritten by flush.2159if (!g_settings.gpu_pgxp_enable)2160MoveMIPSRegToReg(value, inst->r.rt);21612162if (inst->op == InstructionOp::swl)2163{2164// const u32 mem_mask = UINT32_C(0xFFFFFF00) << shift;2165// new_value = (RWRET & mem_mask) | (value >> (24 - shift));2166EmitMov(RARG3, 0xFFFFFF00u);2167rvAsm->SLLW(RARG3, RARG3, RSCRATCH);2168rvAsm->AND(RRET, RRET, RARG3);21692170EmitMov(RARG3, 24);2171rvAsm->SUBW(RARG3, RARG3, RSCRATCH);2172rvAsm->SRLW(value, value, RARG3);2173rvAsm->OR(value, value, RRET);2174}2175else2176{2177// const u32 mem_mask = UINT32_C(0x00FFFFFF) >> (24 - shift);2178// new_value = (RWRET & mem_mask) | (value << shift);2179rvAsm->SLLW(value, value, RSCRATCH);21802181EmitMov(RARG3, 24);2182rvAsm->SUBW(RARG3, RARG3, RSCRATCH);2183EmitMov(RSCRATCH, 0x00FFFFFFu);2184rvAsm->SRLW(RSCRATCH, RSCRATCH, RARG3);2185rvAsm->AND(RRET, RRET, RSCRATCH);2186rvAsm->OR(value, value, RRET);2187}21882189if (!g_settings.gpu_pgxp_enable)2190{2191GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);2192FreeHostReg(addr.Index());2193}2194else2195{2196GenerateStore(addr, value, MemoryAccessSize::Word, use_fastmem);21972198Flush(FLUSH_FOR_C_CALL);2199rvAsm->MV(RARG3, value);2200FreeHostReg(value.Index());2201rvAsm->MV(RARG2, addr);2202FreeHostReg(addr.Index());2203EmitMov(RARG1, inst->bits);2204EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SW));2205}2206}22072208void CPU::RISCV64Recompiler::Compile_swc2(CompileFlags cf, MemoryAccessSize size, bool sign, bool use_fastmem,2209const std::optional<VirtualMemoryAddress>& address)2210{2211const u32 index = static_cast<u32>(inst->r.rt.GetValue());2212const auto [ptr, action] = GetGTERegisterPointer(index, false);2213const GPR addr = (g_settings.gpu_pgxp_enable || action == GTERegisterAccessAction::CallHandler) ?2214GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) :2215RARG1;2216const GPR data = g_settings.gpu_pgxp_enable ? GPR(AllocateTempHostReg(HR_CALLEE_SAVED)) : RARG2;2217FlushForLoadStore(address, true, use_fastmem);2218ComputeLoadStoreAddressArg(cf, address, addr);22192220switch (action)2221{2222case GTERegisterAccessAction::Direct:2223{2224rvAsm->LW(data, PTR(ptr));2225}2226break;22272228case GTERegisterAccessAction::CallHandler:2229{2230// should already be flushed.. except in fastmem case2231Flush(FLUSH_FOR_C_CALL);2232EmitMov(RARG1, index);2233EmitCall(reinterpret_cast<const void*>(>E::ReadRegister));2234rvAsm->MV(data, RRET);2235}2236break;22372238default:2239{2240Panic("Unknown action");2241}2242break;2243}22442245GenerateStore(addr, data, size, use_fastmem);22462247if (!g_settings.gpu_pgxp_enable)2248{2249if (addr.Index() != RARG1.Index())2250FreeHostReg(addr.Index());2251}2252else2253{2254// TODO: This can be simplified because we don't need to validate in PGXP..2255Flush(FLUSH_FOR_C_CALL);2256rvAsm->MV(RARG3, data);2257FreeHostReg(data.Index());2258rvAsm->MV(RARG2, addr);2259FreeHostReg(addr.Index());2260EmitMov(RARG1, inst->bits);2261EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_SWC2));2262}2263}22642265void CPU::RISCV64Recompiler::Compile_mtc0(CompileFlags cf)2266{2267// TODO: we need better constant setting here.. which will need backprop2268AssertRegOrConstT(cf);22692270const Cop0Reg reg = static_cast<Cop0Reg>(MipsD());2271const u32* ptr = GetCop0RegPtr(reg);2272const u32 mask = GetCop0RegWriteMask(reg);2273if (!ptr)2274{2275Compile_Fallback();2276return;2277}22782279if (mask == 0)2280{2281// if it's a read-only register, ignore2282DEBUG_LOG("Ignoring write to read-only cop0 reg {}", static_cast<u32>(reg));2283return;2284}22852286// for some registers, we need to test certain bits2287const bool needs_bit_test = (reg == Cop0Reg::SR);2288const GPR new_value = RARG1;2289const GPR old_value = RARG2;2290const GPR changed_bits = RARG3;2291const GPR mask_reg = RSCRATCH;22922293// Load old value2294rvAsm->LW(old_value, PTR(ptr));22952296// No way we fit this in an immediate..2297EmitMov(mask_reg, mask);22982299// update value2300// TODO: This is creating pointless MV instructions.. why?2301if (cf.valid_host_t)2302rvAsm->AND(new_value, CFGetRegT(cf), mask_reg);2303else2304EmitMov(new_value, GetConstantRegU32(cf.MipsT()) & mask);23052306if (needs_bit_test)2307rvAsm->XOR(changed_bits, old_value, new_value);2308rvAsm->NOT(mask_reg, mask_reg);2309rvAsm->AND(old_value, old_value, mask_reg);2310rvAsm->OR(new_value, old_value, new_value);2311rvAsm->SW(new_value, PTR(ptr));23122313if (reg == Cop0Reg::SR)2314{2315// TODO: replace with register backup2316// We could just inline the whole thing..2317Flush(FLUSH_FOR_C_CALL);23182319Label caches_unchanged;2320rvAsm->SRLIW(RSCRATCH, changed_bits, 16);2321rvAsm->ANDI(RSCRATCH, RSCRATCH, 1);2322rvAsm->BEQ(RSCRATCH, zero, &caches_unchanged);2323EmitCall(reinterpret_cast<const void*>(&CPU::UpdateMemoryPointers));2324rvAsm->LW(new_value, PTR(ptr));2325if (CodeCache::IsUsingFastmem())2326rvAsm->LD(RMEMBASE, PTR(&g_state.fastmem_base));2327rvAsm->Bind(&caches_unchanged);23282329TestInterrupts(RARG1);2330}2331else if (reg == Cop0Reg::CAUSE)2332{2333rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits));2334TestInterrupts(RARG1);2335}2336else if (reg == Cop0Reg::DCIC || reg == Cop0Reg::BPCM)2337{2338// need to check whether we're switching to debug mode2339Flush(FLUSH_FOR_C_CALL);2340EmitCall(reinterpret_cast<const void*>(&CPU::UpdateDebugDispatcherFlag));2341SwitchToFarCode(true, &Assembler::BEQ, RRET, zero);2342BackupHostState();2343Flush(FLUSH_FOR_EARLY_BLOCK_EXIT);2344EmitCall(reinterpret_cast<const void*>(&CPU::ExitExecution)); // does not return2345RestoreHostState();2346SwitchToNearCode(false);2347}2348}23492350void CPU::RISCV64Recompiler::Compile_rfe(CompileFlags cf)2351{2352// shift mode bits right two, preserving upper bits2353rvAsm->LW(RARG1, PTR(&g_state.cop0_regs.sr.bits));2354rvAsm->SRLIW(RSCRATCH, RARG1, 2);2355rvAsm->ANDI(RSCRATCH, RSCRATCH, 0xf);2356rvAsm->ANDI(RARG1, RARG1, ~0xfu);2357rvAsm->OR(RARG1, RARG1, RSCRATCH);2358rvAsm->SW(RARG1, PTR(&g_state.cop0_regs.sr.bits));23592360TestInterrupts(RARG1);2361}23622363void CPU::RISCV64Recompiler::TestInterrupts(const biscuit::GPR& sr)2364{2365DebugAssert(sr != RSCRATCH);23662367// if Iec == 0 then goto no_interrupt2368Label no_interrupt;2369rvAsm->ANDI(RSCRATCH, sr, 1);2370rvAsm->BEQZ(RSCRATCH, &no_interrupt);23712372// sr & cause2373rvAsm->LW(RSCRATCH, PTR(&g_state.cop0_regs.cause.bits));2374rvAsm->AND(sr, sr, RSCRATCH);23752376// ((sr & cause) & 0xff00) == 0 goto no_interrupt2377rvAsm->SRLIW(sr, sr, 8);2378rvAsm->ANDI(sr, sr, 0xFF);2379SwitchToFarCode(true, &Assembler::BEQ, sr, zero);23802381BackupHostState();23822383// Update load delay, this normally happens at the end of an instruction, but we're finishing it early.2384UpdateLoadDelay();23852386Flush(FLUSH_END_BLOCK | FLUSH_FOR_EXCEPTION | FLUSH_FOR_C_CALL);23872388// Can't use EndBlockWithException() here, because it'll use the wrong PC.2389// Can't use RaiseException() on the fast path if we're the last instruction, because the next PC is unknown.2390if (!iinfo->is_last_instruction)2391{2392EmitMov(RARG1, Cop0Registers::CAUSE::MakeValueForException(Exception::INT, iinfo->is_branch_instruction, false,2393(inst + 1)->cop.cop_n));2394EmitMov(RARG2, m_compiler_pc);2395EmitCall(reinterpret_cast<const void*>(static_cast<void (*)(u32, u32)>(&CPU::RaiseException)));2396m_dirty_pc = false;2397EndAndLinkBlock(std::nullopt, true, false);2398}2399else2400{2401if (m_dirty_pc)2402EmitMov(RARG1, m_compiler_pc);2403rvAsm->SW(biscuit::zero, PTR(&g_state.downcount));2404if (m_dirty_pc)2405rvAsm->SW(RARG1, PTR(&g_state.pc));2406m_dirty_pc = false;2407EndAndLinkBlock(std::nullopt, false, true);2408}24092410RestoreHostState();2411SwitchToNearCode(false);24122413rvAsm->Bind(&no_interrupt);2414}24152416void CPU::RISCV64Recompiler::Compile_mfc2(CompileFlags cf)2417{2418const u32 index = inst->cop.Cop2Index();2419const Reg rt = inst->r.rt;24202421const auto [ptr, action] = GetGTERegisterPointer(index, false);2422if (action == GTERegisterAccessAction::Ignore)2423return;24242425u32 hreg;2426if (action == GTERegisterAccessAction::Direct)2427{2428hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(),2429EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt);2430rvAsm->LW(GPR(hreg), PTR(ptr));2431}2432else if (action == GTERegisterAccessAction::CallHandler)2433{2434Flush(FLUSH_FOR_C_CALL);2435EmitMov(RARG1, index);2436EmitCall(reinterpret_cast<const void*>(>E::ReadRegister));24372438hreg = AllocateHostReg(GetFlagsForNewLoadDelayedReg(),2439EMULATE_LOAD_DELAYS ? HR_TYPE_NEXT_LOAD_DELAY_VALUE : HR_TYPE_CPU_REG, rt);2440rvAsm->MV(GPR(hreg), RRET);2441}2442else2443{2444Panic("Unknown action");2445}24462447if (g_settings.gpu_pgxp_enable)2448{2449Flush(FLUSH_FOR_C_CALL);2450EmitMov(RARG1, inst->bits);2451rvAsm->MV(RARG2, GPR(hreg));2452EmitCall(reinterpret_cast<const void*>(&PGXP::CPU_MFC2));2453}2454}24552456void CPU::RISCV64Recompiler::Compile_mtc2(CompileFlags cf)2457{2458const u32 index = inst->cop.Cop2Index();2459const auto [ptr, action] = GetGTERegisterPointer(index, true);2460if (action == GTERegisterAccessAction::Ignore)2461return;24622463if (action == GTERegisterAccessAction::Direct)2464{2465if (cf.const_t)2466StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), ptr);2467else2468rvAsm->SW(CFGetRegT(cf), PTR(ptr));2469}2470else if (action == GTERegisterAccessAction::SignExtend16 || action == GTERegisterAccessAction::ZeroExtend16)2471{2472const bool sign = (action == GTERegisterAccessAction::SignExtend16);2473if (cf.valid_host_t)2474{2475sign ? EmitSExtH(RARG1, CFGetRegT(cf)) : EmitUExtH(RARG1, CFGetRegT(cf));2476rvAsm->SW(RARG1, PTR(ptr));2477}2478else if (cf.const_t)2479{2480const u16 cv = Truncate16(GetConstantRegU32(cf.MipsT()));2481StoreConstantToCPUPointer(sign ? ::SignExtend32(cv) : ::ZeroExtend32(cv), ptr);2482}2483else2484{2485Panic("Unsupported setup");2486}2487}2488else if (action == GTERegisterAccessAction::CallHandler)2489{2490Flush(FLUSH_FOR_C_CALL);2491EmitMov(RARG1, index);2492MoveTToReg(RARG2, cf);2493EmitCall(reinterpret_cast<const void*>(>E::WriteRegister));2494}2495else if (action == GTERegisterAccessAction::PushFIFO)2496{2497// SXY0 <- SXY12498// SXY1 <- SXY22499// SXY2 <- SXYP2500DebugAssert(RRET.Index() != RARG2.Index() && RRET.Index() != RARG3.Index());2501rvAsm->LW(RARG2, PTR(&g_state.gte_regs.SXY1[0]));2502rvAsm->LW(RARG3, PTR(&g_state.gte_regs.SXY2[0]));2503rvAsm->SW(RARG2, PTR(&g_state.gte_regs.SXY0[0]));2504rvAsm->SW(RARG3, PTR(&g_state.gte_regs.SXY1[0]));2505if (cf.valid_host_t)2506rvAsm->SW(CFGetRegT(cf), PTR(&g_state.gte_regs.SXY2[0]));2507else if (cf.const_t)2508StoreConstantToCPUPointer(GetConstantRegU32(cf.MipsT()), &g_state.gte_regs.SXY2[0]);2509else2510Panic("Unsupported setup");2511}2512else2513{2514Panic("Unknown action");2515}2516}25172518void CPU::RISCV64Recompiler::Compile_cop2(CompileFlags cf)2519{2520TickCount func_ticks;2521GTE::InstructionImpl func = GTE::GetInstructionImpl(inst->bits, &func_ticks);25222523Flush(FLUSH_FOR_C_CALL);2524EmitMov(RARG1, inst->bits & GTE::Instruction::REQUIRED_BITS_MASK);2525EmitCall(reinterpret_cast<const void*>(func));25262527AddGTETicks(func_ticks);2528}25292530u32 CPU::Recompiler::CompileLoadStoreThunk(void* thunk_code, u32 thunk_space, void* code_address, u32 code_size,2531TickCount cycles_to_add, TickCount cycles_to_remove, u32 gpr_bitmask,2532u8 address_register, u8 data_register, MemoryAccessSize size, bool is_signed,2533bool is_load)2534{2535Assembler arm_asm(static_cast<u8*>(thunk_code), thunk_space);2536Assembler* rvAsm = &arm_asm;25372538static constexpr u32 GPR_SIZE = 8;25392540// save regs2541u32 num_gprs = 0;25422543for (u32 i = 0; i < NUM_HOST_REGS; i++)2544{2545if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))2546num_gprs++;2547}25482549const u32 stack_size = (((num_gprs + 1) & ~1u) * GPR_SIZE);25502551if (stack_size > 0)2552{2553rvAsm->ADDI(sp, sp, -static_cast<s32>(stack_size));25542555u32 stack_offset = 0;2556for (u32 i = 0; i < NUM_HOST_REGS; i++)2557{2558if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))2559{2560rvAsm->SD(GPR(i), stack_offset, sp);2561stack_offset += GPR_SIZE;2562}2563}2564}25652566if (cycles_to_add != 0)2567{2568// NOTE: we have to reload here, because memory writes can run DMA, which can screw with cycles2569Assert(rvIsValidSExtITypeImm(cycles_to_add));2570rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks));2571rvAsm->ADDIW(RSCRATCH, RSCRATCH, cycles_to_add);2572rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks));2573}25742575if (address_register != RARG1.Index())2576rvAsm->MV(RARG1, GPR(address_register));25772578if (!is_load)2579{2580if (data_register != RARG2.Index())2581rvAsm->MV(RARG2, GPR(data_register));2582}25832584switch (size)2585{2586case MemoryAccessSize::Byte:2587{2588rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryByte) :2589reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryByte));2590}2591break;2592case MemoryAccessSize::HalfWord:2593{2594rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryHalfWord) :2595reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryHalfWord));2596}2597break;2598case MemoryAccessSize::Word:2599{2600rvEmitCall(rvAsm, is_load ? reinterpret_cast<const void*>(&RecompilerThunks::UncheckedReadMemoryWord) :2601reinterpret_cast<const void*>(&RecompilerThunks::UncheckedWriteMemoryWord));2602}2603break;2604}26052606if (is_load)2607{2608const GPR dst = GPR(data_register);2609switch (size)2610{2611case MemoryAccessSize::Byte:2612{2613is_signed ? rvEmitSExtB(rvAsm, dst, RRET) : rvEmitUExtB(rvAsm, dst, RRET);2614}2615break;2616case MemoryAccessSize::HalfWord:2617{2618is_signed ? rvEmitSExtH(rvAsm, dst, RRET) : rvEmitUExtH(rvAsm, dst, RRET);2619}2620break;2621case MemoryAccessSize::Word:2622{2623if (dst.Index() != RRET.Index())2624rvAsm->MV(dst, RRET);2625}2626break;2627}2628}26292630if (cycles_to_remove != 0)2631{2632Assert(rvIsValidSExtITypeImm(-cycles_to_remove));2633rvAsm->LW(RSCRATCH, PTR(&g_state.pending_ticks));2634rvAsm->ADDIW(RSCRATCH, RSCRATCH, -cycles_to_remove);2635rvAsm->SW(RSCRATCH, PTR(&g_state.pending_ticks));2636}26372638// restore regs2639if (stack_size > 0)2640{2641u32 stack_offset = 0;2642for (u32 i = 0; i < NUM_HOST_REGS; i++)2643{2644if ((gpr_bitmask & (1u << i)) && rvIsCallerSavedRegister(i) && (!is_load || data_register != i))2645{2646rvAsm->LD(GPR(i), stack_offset, sp);2647stack_offset += GPR_SIZE;2648}2649}26502651rvAsm->ADDI(sp, sp, stack_size);2652}26532654rvEmitJmp(rvAsm, static_cast<const u8*>(code_address) + code_size);26552656return static_cast<u32>(rvAsm->GetCodeBuffer().GetSizeInBytes());2657}26582659#endif // CPU_ARCH_RISCV64266026612662