CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/ARM64/Arm64CompLoadStore.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#if PPSSPP_ARCH(ARM64)1920#include "Core/MemMap.h"21#include "Core/Config.h"22#include "Core/MIPS/MIPS.h"23#include "Core/MIPS/MIPSAnalyst.h"24#include "Core/MIPS/MIPSCodeUtils.h"25#include "Core/MIPS/ARM64/Arm64Jit.h"26#include "Core/MIPS/ARM64/Arm64RegCache.h"2728#define _RS MIPS_GET_RS(op)29#define _RT MIPS_GET_RT(op)30#define _RD MIPS_GET_RD(op)31#define _FS MIPS_GET_FS(op)32#define _FT MIPS_GET_FT(op)33#define _FD MIPS_GET_FD(op)34#define _SA MIPS_GET_SA(op)35#define _POS ((op>> 6) & 0x1F)36#define _SIZE ((op>>11) & 0x1F)37#define _IMM16 (signed short)(op & 0xFFFF)38#define _IMM26 (op & 0x03FFFFFF)3940// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.41// Currently known non working ones should have DISABLE.4243//#define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; }44#define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }45#define DISABLE { Comp_Generic(op); return; }4647namespace MIPSComp {48using namespace Arm64Gen;49using namespace Arm64JitConstants;5051// Destroys SCRATCH252void Arm64Jit::SetScratch1ToEffectiveAddress(MIPSGPReg rs, s16 offset) {53if (offset) {54ADDI2R(SCRATCH1, gpr.R(rs), offset, SCRATCH2);55} else {56MOV(SCRATCH1, gpr.R(rs));57}58#ifdef MASKED_PSP_MEMORY59ANDI2R(SCRATCH1, SCRATCH1, 0x3FFFFFFF);60#endif61}6263std::vector<FixupBranch> Arm64Jit::SetScratch1ForSafeAddress(MIPSGPReg rs, s16 offset, ARM64Reg tempReg) {64std::vector<FixupBranch> skips;6566SetScratch1ToEffectiveAddress(rs, offset);6768// We can do this a little smarter by shifting out the lower 8 bits, since blocks are 0x100 aligned.69// PSP_GetUserMemoryEnd() is dynamic, but the others encode to imms just fine.70// So we only need to safety check the one value.71// This is because ARM64 immediates for many instructions like CMP can only encode72// immediates up to 12 bits, shifted by 12 or not.7374if ((PSP_GetUserMemoryEnd() & 0x000FFFFF) == 0) {75// In other words, shift right 8, and kill off the top 4 bits as we don't want them involved in the ocmpares.76UBFX(tempReg, SCRATCH1, 8, 24 - 4);77// Now check if we're higher than that.78CMPI2R(tempReg, PSP_GetUserMemoryEnd() >> 8);79} else {80// Compare first using the tempReg (need it because we have a full 28-bit value), then shift into it.81ANDI2R(SCRATCH1, SCRATCH1, 0x0FFFFFFF);82CMPI2R(SCRATCH1, PSP_GetUserMemoryEnd(), tempReg);83UBFX(tempReg, SCRATCH1, 8, 24);84}85skips.push_back(B(CC_HS));8687// If its higher than memory start and we didn't skip yet, it must be good. Hurray.88CMPI2R(tempReg, PSP_GetKernelMemoryBase() >> 8);89FixupBranch inRAM = B(CC_HS);9091// If we got here and it's higher, then it's between VRAM and RAM - skip.92CMPI2R(tempReg, PSP_GetVidMemEnd() >> 8);93skips.push_back(B(CC_HS));9495// And if it's higher the VRAM and we're still here again, it's in VRAM.96CMPI2R(tempReg, PSP_GetVidMemBase() >> 8);97FixupBranch inVRAM = B(CC_HS);9899// Last gap, this is between SRAM and VRAM. Skip it.100CMPI2R(tempReg, PSP_GetScratchpadMemoryEnd() >> 8);101skips.push_back(B(CC_HS));102103// And for lower than SRAM, we just skip again.104CMPI2R(tempReg, PSP_GetScratchpadMemoryBase() >> 8);105skips.push_back(B(CC_LO));106107// At this point, we're either in SRAM (above) or in RAM/VRAM.108SetJumpTarget(inRAM);109SetJumpTarget(inVRAM);110111return skips;112}113114void Arm64Jit::Comp_ITypeMemLR(MIPSOpcode op, bool load) {115CONDITIONAL_DISABLE(LSU);116CheckMemoryBreakpoint();117int offset = SignExtend16ToS32(op & 0xFFFF);118MIPSGPReg rt = _RT;119MIPSGPReg rs = _RS;120int o = op >> 26;121122if (!js.inDelaySlot && !jo.Disabled(JitDisable::LSU_UNALIGNED)) {123// Optimisation: Combine to single unaligned load/store124bool isLeft = (o == 34 || o == 42);125CheckMemoryBreakpoint(1);126MIPSOpcode nextOp = GetOffsetInstruction(1);127// Find a matching shift in opposite direction with opposite offset.128if (nextOp == (isLeft ? (op.encoding + (4 << 26) - 3) : (op.encoding - (4 << 26) + 3))) {129EatInstruction(nextOp);130nextOp = MIPSOpcode(((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x03FFFFFF)); //lw, sw131Comp_ITypeMem(nextOp);132return;133}134}135136u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;137std::vector<FixupBranch> skips;138139if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {140#ifdef MASKED_PSP_MEMORY141u32 addr = iaddr & 0x3FFFFFFF;142#else143u32 addr = iaddr;144#endif145// Need to initialize since this only loads part of the register.146// But rs no longer matters (even if rs == rt) since we have the address.147gpr.MapReg(rt, load ? MAP_DIRTY : 0);148gpr.SetRegImm(SCRATCH1, addr & ~3);149150u8 shift = (addr & 3) * 8;151152switch (o) {153case 34: // lwl154LDR(SCRATCH1, MEMBASEREG, SCRATCH1);155ANDI2R(gpr.R(rt), gpr.R(rt), 0x00ffffff >> shift, INVALID_REG);156ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSL, 24 - shift));157break;158159case 38: // lwr160LDR(SCRATCH1, MEMBASEREG, SCRATCH1);161ANDI2R(gpr.R(rt), gpr.R(rt), 0xffffff00 << (24 - shift), INVALID_REG);162ORR(gpr.R(rt), gpr.R(rt), SCRATCH1, ArithOption(gpr.R(rt), ST_LSR, shift));163break;164165case 42: // swl166LDR(SCRATCH2, MEMBASEREG, SCRATCH1);167ANDI2R(SCRATCH2, SCRATCH2, 0xffffff00 << shift, INVALID_REG);168ORR(SCRATCH2, SCRATCH2, gpr.R(rt), ArithOption(gpr.R(rt), ST_LSR, 24 - shift));169STR(SCRATCH2, MEMBASEREG, SCRATCH1);170break;171172case 46: // swr173LDR(SCRATCH2, MEMBASEREG, SCRATCH1);174ANDI2R(SCRATCH2, SCRATCH2, 0x00ffffff >> (24 - shift), INVALID_REG);175ORR(SCRATCH2, SCRATCH2, gpr.R(rt), ArithOption(gpr.R(rt), ST_LSL, shift));176STR(SCRATCH2, MEMBASEREG, SCRATCH1);177break;178}179return;180}181182_dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address %08x? CPU bug?", iaddr);183if (load) {184gpr.MapDirtyIn(rt, rs, false);185} else {186gpr.MapInIn(rt, rs);187}188gpr.SpillLock(rt);189gpr.SpillLock(rs);190// Need to get temps before skipping safe mem.191ARM64Reg LR_SCRATCH3 = gpr.GetAndLockTempR();192ARM64Reg LR_SCRATCH4 = o == 42 || o == 46 ? gpr.GetAndLockTempR() : INVALID_REG;193194if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {195skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);196} else {197SetScratch1ToEffectiveAddress(rs, offset);198}199200// Here's our shift amount.201ANDI2R(SCRATCH2, SCRATCH1, 3);202LSL(SCRATCH2, SCRATCH2, 3);203204// Now align the address for the actual read.205ANDI2R(SCRATCH1, SCRATCH1, ~3U);206207switch (o) {208case 34: // lwl209MOVI2R(LR_SCRATCH3, 0x00ffffff);210LDR(SCRATCH1, MEMBASEREG, ArithOption(SCRATCH1));211LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);212AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3);213NEG(SCRATCH2, SCRATCH2);214ADDI2R(SCRATCH2, SCRATCH2, 24);215LSLV(SCRATCH1, SCRATCH1, SCRATCH2);216ORR(gpr.R(rt), gpr.R(rt), SCRATCH1);217break;218219case 38: // lwr220MOVI2R(LR_SCRATCH3, 0xffffff00);221LDR(SCRATCH1, MEMBASEREG, ArithOption(SCRATCH1));222LSRV(SCRATCH1, SCRATCH1, SCRATCH2);223NEG(SCRATCH2, SCRATCH2);224ADDI2R(SCRATCH2, SCRATCH2, 24);225LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);226AND(gpr.R(rt), gpr.R(rt), LR_SCRATCH3);227ORR(gpr.R(rt), gpr.R(rt), SCRATCH1);228break;229230case 42: // swl231MOVI2R(LR_SCRATCH3, 0xffffff00);232LDR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));233LSLV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);234AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);235NEG(SCRATCH2, SCRATCH2);236ADDI2R(SCRATCH2, SCRATCH2, 24);237238LSRV(LR_SCRATCH3, gpr.R(rt), SCRATCH2);239ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);240STR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));241break;242243case 46: // swr244MOVI2R(LR_SCRATCH3, 0x00ffffff);245LDR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));246NEG(SCRATCH2, SCRATCH2);247ADDI2R(SCRATCH2, SCRATCH2, 24);248LSRV(LR_SCRATCH3, LR_SCRATCH3, SCRATCH2);249AND(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);250NEG(SCRATCH2, SCRATCH2);251ADDI2R(SCRATCH2, SCRATCH2, 24);252LSLV(LR_SCRATCH3, gpr.R(rt), SCRATCH2);253ORR(LR_SCRATCH4, LR_SCRATCH4, LR_SCRATCH3);254STR(LR_SCRATCH4, MEMBASEREG, ArithOption(SCRATCH1));255break;256}257258for (auto skip : skips) {259SetJumpTarget(skip);260}261262gpr.ReleaseSpillLocksAndDiscardTemps();263}264265void Arm64Jit::Comp_ITypeMem(MIPSOpcode op) {266CONDITIONAL_DISABLE(LSU);267CheckMemoryBreakpoint();268269int offset = SignExtend16ToS32(op & 0xFFFF);270bool load = false;271MIPSGPReg rt = _RT;272MIPSGPReg rs = _RS;273int o = op >> 26;274if (((op >> 29) & 1) == 0 && rt == MIPS_REG_ZERO) {275// Don't load anything into $zr276return;277}278279u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;280std::vector<FixupBranch> skips;281ARM64Reg targetReg = INVALID_REG;282ARM64Reg addrReg = INVALID_REG;283284int dataSize = 4;285switch (o) {286case 37:287case 33:288dataSize = 2;289break;290case 36:291case 32:292dataSize = 1;293break;294// Store295case 41:296dataSize = 2;297break;298case 40:299dataSize = 1;300break;301}302303switch (o) {304case 32: //lb305case 33: //lh306case 35: //lw307case 36: //lbu308case 37: //lhu309load = true;310case 40: //sb311case 41: //sh312case 43: //sw313#ifndef MASKED_PSP_MEMORY314if (jo.cachePointers && g_Config.bFastMemory) {315// ARM has smaller load/store immediate displacements than MIPS, 12 bits - and some memory ops only have 8 bits.316int offsetRange = 0x3ff;317if (o == 41 || o == 33 || o == 37 || o == 32)318offsetRange = 0xff; // 8 bit offset only319if (!gpr.IsImm(rs) && rs != rt && (offset <= offsetRange) && offset >= 0 &&320(dataSize == 1 || (offset & (dataSize - 1)) == 0)) { // Check that the offset is aligned to the access size as that's required for INDEX_UNSIGNED encodings. we can get here through fallback from lwl/lwr321gpr.SpillLock(rs, rt);322gpr.MapRegAsPointer(rs);323324// For a store, try to avoid mapping a reg if not needed.325targetReg = load ? INVALID_REG : gpr.TryMapTempImm(rt);326if (targetReg == INVALID_REG) {327gpr.MapReg(rt, load ? MAP_NOINIT : 0);328targetReg = gpr.R(rt);329}330331switch (o) {332case 35: LDR(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;333case 37: LDRH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;334case 33: LDRSH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;335case 36: LDRB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;336case 32: LDRSB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;337// Store338case 43: STR(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;339case 41: STRH(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;340case 40: STRB(INDEX_UNSIGNED, targetReg, gpr.RPtr(rs), offset); break;341}342gpr.ReleaseSpillLocksAndDiscardTemps();343break;344}345}346#endif347348if (!load && gpr.IsImm(rt) && gpr.TryMapTempImm(rt) != INVALID_REG) {349// We're storing an immediate value, let's see if we can optimize rt.350if (!gpr.IsImm(rs) || !Memory::IsValidAddress(iaddr) || offset == 0) {351// In this case, we're always going to need rs mapped, which may flush the temp imm.352// We handle that in the cases below since targetReg is INVALID_REG.353gpr.MapIn(rs);354}355356targetReg = gpr.TryMapTempImm(rt);357}358359if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {360#ifdef MASKED_PSP_MEMORY361u32 addr = iaddr & 0x3FFFFFFF;362#else363u32 addr = iaddr;364#endif365if (addr == iaddr && offset == 0) {366// It was already safe. Let's shove it into a reg and use it directly.367if (targetReg == INVALID_REG) {368load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);369targetReg = gpr.R(rt);370}371addrReg = gpr.R(rs);372} else {373// In this case, only map rt. rs+offset will be in SCRATCH1.374if (targetReg == INVALID_REG) {375gpr.MapReg(rt, load ? MAP_NOINIT : 0);376targetReg = gpr.R(rt);377}378gpr.SetRegImm(SCRATCH1, addr);379addrReg = SCRATCH1;380}381} else {382// This gets hit in a few games, as a result of never-taken delay slots (some branch types383// conditionally execute the delay slot instructions). Ignore in those cases.384if (!js.inDelaySlot) {385_dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address %08x? CPU bug?", iaddr);386}387388// If we already have a targetReg, we optimized an imm, and rs is already mapped.389if (targetReg == INVALID_REG) {390if (load) {391gpr.MapDirtyIn(rt, rs);392} else {393gpr.MapInIn(rt, rs);394}395targetReg = gpr.R(rt);396}397398if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {399skips = SetScratch1ForSafeAddress(rs, offset, SCRATCH2);400} else {401SetScratch1ToEffectiveAddress(rs, offset);402}403addrReg = SCRATCH1;404}405406switch (o) {407// Load408case 35: LDR(targetReg, MEMBASEREG, addrReg); break;409case 37: LDRH(targetReg, MEMBASEREG, addrReg); break;410case 33: LDRSH(targetReg, MEMBASEREG, addrReg); break;411case 36: LDRB(targetReg, MEMBASEREG, addrReg); break;412case 32: LDRSB(targetReg, MEMBASEREG, addrReg); break;413// Store414case 43: STR(targetReg, MEMBASEREG, addrReg); break;415case 41: STRH(targetReg, MEMBASEREG, addrReg); break;416case 40: STRB(targetReg, MEMBASEREG, addrReg); break;417}418for (auto skip : skips) {419SetJumpTarget(skip);420// TODO: Could clear to zero here on load, if skipping this for good reads.421}422break;423case 34: //lwl424case 38: //lwr425load = true;426case 42: //swl427case 46: //swr428Comp_ITypeMemLR(op, load);429break;430default:431Comp_Generic(op);432return;433}434}435436void Arm64Jit::Comp_StoreSync(MIPSOpcode op) {437CONDITIONAL_DISABLE(LSU);438439DISABLE;440}441442void Arm64Jit::Comp_Cache(MIPSOpcode op) {443CONDITIONAL_DISABLE(LSU);444445int func = (op >> 16) & 0x1F;446447// See Int_Cache for the definitions.448switch (func) {449case 24: break;450case 25: break;451case 27: break;452case 30: break;453default:454// Fall back to the interpreter.455DISABLE;456}457}458}459460#endif // PPSSPP_ARCH(ARM64)461462463