CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/ARM64/Arm64Asm.cpp
Views: 1401
// Copyright (c) 2015- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"1819#if PPSSPP_ARCH(ARM64)2021#include "Common/Log.h"22#include "Common/MemoryUtil.h"23#include "Common/CPUDetect.h"24#include "Common/Arm64Emitter.h"25#include "Core/MemMap.h"26#include "Core/MIPS/MIPS.h"27#include "Core/System.h"28#include "Core/CoreTiming.h"29#include "Core/MIPS/ARM64/Arm64Jit.h"30#include "Core/MIPS/JitCommon/JitCommon.h"3132using namespace Arm64Gen;3334//static int temp32; // unused?3536static const bool enableDebug = false;37static const bool enableDisasm = false;3839//static bool enableStatistics = false; //unused?404142// ARM64 calling conventions43// Standard: http://infocenter.arm.com/help/topic/com.arm.doc.ihi0055b/IHI0055B_aapcs64.pdf44// Apple: https://developer.apple.com/library/ios/documentation/Xcode/Conceptual/iPhoneOSABIReference/Articles/ARM64FunctionCallingConventions.html4546// Summary:47// ===========48// SP ("x31") is not a GPR so irrelevant.49// x0-x7: 8 parameter/result registers50// x8: "Indirect result location register" (points to struct return values? I think we can map this)51// x9-x15: 7 temporary registers (no need to save)52// x16: temporary register/procedure call scratch register 153// x17: temporary register/procedure call scratch register 254// x18: unavailable (reserved for use by the OS or linker or whatever - iOS, for example, uses it)55// x19-x28: 10 callee-saved registers56// x29: the frame pointer register57// x30: link register for procedure calls5859// So: Scratch registers: x16, x1760// Mappable registers in priority order:61// x19, x20, x21, x22, x23, (x24, x25, x26, x27, x28), x0, x1, x2, x3, x4, x5, x6, x7, x8, x9, x10, x11, x12, x13, x14, x15, x0, x1,62// That's a whole lot of registers so we might be able to statically allocate a bunch of common MIPS registers.63// We should put statically allocated registers in the 7 callee-save regs that are left over after the system regs (x19-x25), so we don't have to bother with64// saving them when we call out of the JIT. We will perform regular dynamic register allocation in the rest (x0-x15)6566// STATIC ALLOCATION ARM64 (these are all callee-save registers):67// x23 : Down counter68// x24 : PC save on JR with non-nice delay slot (to be eliminated later?)69// x25 : MSR/MRS temporary (to be eliminated later)70// x26 : JIT base reg71// x27 : MIPS state (Could eliminate by placing the MIPS state right at the memory base)72// x28 : Memory base pointer.7374extern volatile CoreState coreState;7576void ShowPC(u32 downcount, void *membase, void *jitbase) {77static int count = 0;78if (currentMIPS) {79ERROR_LOG(Log::JIT, "ShowPC : %08x Downcount : %08x %d %p %p", currentMIPS->pc, downcount, count, membase, jitbase);80} else {81ERROR_LOG(Log::JIT, "Universe corrupt?");82}83//if (count > 2000)84// exit(0);85count++;86}8788void DisassembleArm(const u8 *data, int size);8990// PLAN: no more block numbers - crazy opcodes just contain offset within91// dynarec buffer92// At this offset - 4, there is an int specifying the block number.9394namespace MIPSComp {9596using namespace Arm64JitConstants;9798void Arm64Jit::GenerateFixedCode(const JitOptions &jo) {99BeginWrite(GetMemoryProtectPageSize());100const u8 *start = AlignCodePage();101102if (jo.useStaticAlloc) {103saveStaticRegisters = AlignCode16();104STR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));105gpr.EmitSaveStaticRegisters();106RET();107108loadStaticRegisters = AlignCode16();109gpr.EmitLoadStaticRegisters();110LDR(INDEX_UNSIGNED, DOWNCOUNTREG, CTXREG, offsetof(MIPSState, downcount));111RET();112113start = saveStaticRegisters;114} else {115saveStaticRegisters = nullptr;116loadStaticRegisters = nullptr;117}118119restoreRoundingMode = AlignCode16(); {120MRS(SCRATCH2_64, FIELD_FPCR);121// We are not in flush-to-zero mode outside the JIT, so let's turn it off.122uint32_t mask = ~(4 << 22);123// Assume we're always in round-to-nearest mode beforehand.124mask &= ~(3 << 22);125ANDI2R(SCRATCH2, SCRATCH2, mask);126_MSR(FIELD_FPCR, SCRATCH2_64);127RET();128}129130applyRoundingMode = AlignCode16(); {131LDR(INDEX_UNSIGNED, SCRATCH2, CTXREG, offsetof(MIPSState, fcr31));132TSTI2R(SCRATCH2, 1 << 24);133ANDI2R(SCRATCH2, SCRATCH2, 3);134FixupBranch skip1 = B(CC_EQ);135ADDI2R(SCRATCH2, SCRATCH2, 4);136SetJumpTarget(skip1);137138// We can skip if the rounding mode is nearest (0) and flush is not set.139// (as restoreRoundingMode cleared it out anyway)140CMPI2R(SCRATCH2, 0);141FixupBranch skip = B(CC_EQ);142143// MIPS Rounding Mode: ARM Rounding Mode144// 0: Round nearest 0145// 1: Round to zero 3146// 2: Round up (ceil) 1147// 3: Round down (floor) 2148ANDI2R(SCRATCH1, SCRATCH2, 3);149CMPI2R(SCRATCH1, 1);150151FixupBranch skipadd = B(CC_NEQ);152ADDI2R(SCRATCH2, SCRATCH2, 2);153SetJumpTarget(skipadd);154FixupBranch skipsub = B(CC_LE);155SUBI2R(SCRATCH2, SCRATCH2, 1);156SetJumpTarget(skipsub);157158// Actually change the system FPCR register159MRS(SCRATCH1_64, FIELD_FPCR);160// Clear both flush-to-zero and rounding before re-setting them.161ANDI2R(SCRATCH1, SCRATCH1, ~((4 | 3) << 22));162ORR(SCRATCH1, SCRATCH1, SCRATCH2, ArithOption(SCRATCH2, ST_LSL, 22));163_MSR(FIELD_FPCR, SCRATCH1_64);164165SetJumpTarget(skip);166RET();167}168169updateRoundingMode = AlignCode16(); {170LDR(INDEX_UNSIGNED, SCRATCH2, CTXREG, offsetof(MIPSState, fcr31));171172// Set SCRATCH2 to FZ:RM (FZ is bit 24, and RM are lowest 2 bits.)173TSTI2R(SCRATCH2, 1 << 24);174ANDI2R(SCRATCH2, SCRATCH2, 3);175FixupBranch skip = B(CC_EQ);176ADDI2R(SCRATCH2, SCRATCH2, 4);177SetJumpTarget(skip);178179// Let's update js.currentRoundingFunc with the right convertS0ToSCRATCH1 func.180MOVP2R(SCRATCH1_64, convertS0ToSCRATCH1);181LSL(SCRATCH2, SCRATCH2, 3);182LDR(SCRATCH2_64, SCRATCH1_64, SCRATCH2);183MOVP2R(SCRATCH1_64, &js.currentRoundingFunc);184STR(INDEX_UNSIGNED, SCRATCH2_64, SCRATCH1_64, 0);185RET();186}187188enterDispatcher = AlignCode16();189190uint32_t regs_to_save = Arm64Gen::ALL_CALLEE_SAVED;191uint32_t regs_to_save_fp = Arm64Gen::ALL_CALLEE_SAVED_FP;192fp.ABI_PushRegisters(regs_to_save, regs_to_save_fp);193194// Fixed registers, these are always kept when in Jit context.195MOVP2R(MEMBASEREG, Memory::base);196MOVP2R(CTXREG, mips_);197MOVP2R(JITBASEREG, GetBasePtr());198199LoadStaticRegisters();200MovFromPC(SCRATCH1);201outerLoopPCInSCRATCH1 = GetCodePtr();202MovToPC(SCRATCH1);203outerLoop = GetCodePtr();204SaveStaticRegisters(); // Advance can change the downcount, so must save/restore205RestoreRoundingMode(true);206QuickCallFunction(SCRATCH1_64, &CoreTiming::Advance);207ApplyRoundingMode(true);208LoadStaticRegisters();209FixupBranch skipToCoreStateCheck = B(); //skip the downcount check210211dispatcherCheckCoreState = GetCodePtr();212213// The result of slice decrementation should be in flags if somebody jumped here214// IMPORTANT - We jump on negative, not carry!!!215FixupBranch bailCoreState = B(CC_MI);216217SetJumpTarget(skipToCoreStateCheck);218219MOVP2R(SCRATCH1_64, &coreState);220LDR(INDEX_UNSIGNED, SCRATCH1, SCRATCH1_64, 0);221CMP(SCRATCH1, 0);222FixupBranch badCoreState = B(CC_NEQ);223FixupBranch skipToRealDispatch2 = B(); //skip the sync and compare first time224225dispatcherPCInSCRATCH1 = GetCodePtr();226// TODO: Do we always need to write PC to RAM here?227MovToPC(SCRATCH1);228229// At this point : flags = EQ. Fine for the next check, no need to jump over it.230dispatcher = GetCodePtr();231232// The result of slice decrementation should be in flags if somebody jumped here233// IMPORTANT - We jump on negative, not carry!!!234FixupBranch bail = B(CC_MI);235236SetJumpTarget(skipToRealDispatch2);237238dispatcherNoCheck = GetCodePtr();239240// Debug241if (enableDebug) {242MOV(W0, DOWNCOUNTREG);243MOV(X1, MEMBASEREG);244MOV(X2, JITBASEREG);245QuickCallFunction(SCRATCH1_64, (void *)&ShowPC);246}247248LDR(INDEX_UNSIGNED, SCRATCH1, CTXREG, offsetof(MIPSState, pc));249#ifdef MASKED_PSP_MEMORY250ANDI2R(SCRATCH1, SCRATCH1, 0x3FFFFFFF);251#endif252dispatcherFetch = GetCodePtr();253LDR(SCRATCH1, MEMBASEREG, SCRATCH1_64);254LSR(SCRATCH2, SCRATCH1, 24); // or UBFX(SCRATCH2, SCRATCH1, 24, 8)255ANDI2R(SCRATCH1, SCRATCH1, 0x00FFFFFF);256CMP(SCRATCH2, MIPS_EMUHACK_OPCODE >> 24);257FixupBranch skipJump = B(CC_NEQ);258ADD(SCRATCH1_64, JITBASEREG, SCRATCH1_64);259BR(SCRATCH1_64);260SetJumpTarget(skipJump);261262// No block found, let's jit. I don't think we actually need to save static regs that are in callee-save regs here but whatever.263// Also, rounding mode gotta be irrelevant here..264SaveStaticRegisters();265RestoreRoundingMode(true);266QuickCallFunction(SCRATCH1_64, (void *)&MIPSComp::JitAt);267ApplyRoundingMode(true);268LoadStaticRegisters();269270B(dispatcherNoCheck); // no point in special casing this271272SetJumpTarget(bail);273SetJumpTarget(bailCoreState);274275MOVP2R(SCRATCH1_64, &coreState);276LDR(INDEX_UNSIGNED, SCRATCH1, SCRATCH1_64, 0);277CMP(SCRATCH1, 0);278B(CC_EQ, outerLoop);279280const uint8_t *quitLoop = GetCodePtr();281SetJumpTarget(badCoreState);282283SaveStaticRegisters();284RestoreRoundingMode(true);285286fp.ABI_PopRegisters(regs_to_save, regs_to_save_fp);287288RET();289290crashHandler = GetCodePtr();291MOVP2R(SCRATCH1_64, &coreState);292MOVI2R(SCRATCH2, CORE_RUNTIME_ERROR);293STR(INDEX_UNSIGNED, SCRATCH2, SCRATCH1_64, 0);294B(quitLoop);295296// Generate some integer conversion funcs.297// MIPS order!298static const RoundingMode roundModes[8] = { ROUND_N, ROUND_Z, ROUND_P, ROUND_M, ROUND_N, ROUND_Z, ROUND_P, ROUND_M };299for (size_t i = 0; i < ARRAY_SIZE(roundModes); ++i) {300convertS0ToSCRATCH1[i] = AlignCode16();301302fp.FCMP(S0, S0); // Detect NaN303fp.FCVTS(S0, S0, roundModes[i]);304FixupBranch skip = B(CC_VC);305MOVI2R(SCRATCH2, 0x7FFFFFFF);306fp.FMOV(S0, SCRATCH2);307SetJumpTarget(skip);308309RET();310}311312// Leave this at the end, add more stuff above.313if (enableDisasm) {314std::vector<std::string> lines = DisassembleArm64(start, (int)(GetCodePtr() - start));315for (auto s : lines) {316INFO_LOG(Log::JIT, "%s", s.c_str());317}318}319320// Let's spare the pre-generated code from unprotect-reprotect.321AlignCodePage();322jitStartOffset = (int)(GetCodePtr() - start);323// Don't forget to zap the instruction cache! This must stay at the end of this function.324FlushIcache();325EndWrite();326}327328} // namespace MIPSComp329330#endif // PPSSPP_ARCH(ARM64)331332333