CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/ARM/ArmCompLoadStore.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "ppsspp_config.h"18#if PPSSPP_ARCH(ARM)1920#include "Core/MemMap.h"21#include "Core/Config.h"22#include "Core/MIPS/MIPS.h"23#include "Core/MIPS/MIPSAnalyst.h"24#include "Core/MIPS/MIPSCodeUtils.h"25#include "Core/MIPS/ARM/ArmJit.h"26#include "Core/MIPS/ARM/ArmRegCache.h"2728#define _RS MIPS_GET_RS(op)29#define _RT MIPS_GET_RT(op)30#define _RD MIPS_GET_RD(op)31#define _FS MIPS_GET_FS(op)32#define _FT MIPS_GET_FT(op)33#define _FD MIPS_GET_FD(op)34#define _SA MIPS_GET_SA(op)35#define _POS ((op>> 6) & 0x1F)36#define _SIZE ((op>>11) & 0x1F)37#define _IMM16 (signed short)(op & 0xFFFF)38#define _IMM26 (op & 0x03FFFFFF)3940// All functions should have CONDITIONAL_DISABLE, so we can narrow things down to a file quickly.41// Currently known non working ones should have DISABLE.4243// #define CONDITIONAL_DISABLE(flag) { Comp_Generic(op); return; }44#define CONDITIONAL_DISABLE(flag) if (jo.Disabled(JitDisable::flag)) { Comp_Generic(op); return; }45#define DISABLE { Comp_Generic(op); return; }4647namespace MIPSComp48{49using namespace ArmGen;50using namespace ArmJitConstants;5152void ArmJit::SetR0ToEffectiveAddress(MIPSGPReg rs, s16 offset) {53Operand2 op2;54if (offset) {55bool negated;56if (TryMakeOperand2_AllowNegation(offset, op2, &negated)) {57if (!negated)58ADD(R0, gpr.R(rs), op2);59else60SUB(R0, gpr.R(rs), op2);61} else {62// Try to avoid using MOVT63if (offset < 0) {64gpr.SetRegImm(R0, (u32)(-offset));65SUB(R0, gpr.R(rs), R0);66} else {67gpr.SetRegImm(R0, (u32)offset);68ADD(R0, gpr.R(rs), R0);69}70}71BIC(R0, R0, Operand2(0xC0, 4)); // &= 0x3FFFFFFF72} else {73BIC(R0, gpr.R(rs), Operand2(0xC0, 4)); // &= 0x3FFFFFFF74}75}7677void ArmJit::SetCCAndR0ForSafeAddress(MIPSGPReg rs, s16 offset, ARMReg tempReg, bool reverse) {78SetR0ToEffectiveAddress(rs, offset);7980// There are three valid ranges. Each one gets a bit.81const u32 BIT_SCRATCH = 1, BIT_RAM = 2, BIT_VRAM = 4;82MOVI2R(tempReg, BIT_SCRATCH | BIT_RAM | BIT_VRAM);8384CMP(R0, AssumeMakeOperand2(PSP_GetScratchpadMemoryBase()));85SetCC(CC_LO);86BIC(tempReg, tempReg, BIT_SCRATCH);87SetCC(CC_HS);88CMP(R0, AssumeMakeOperand2(PSP_GetScratchpadMemoryEnd()));89BIC(tempReg, tempReg, BIT_SCRATCH);9091// If it was in that range, later compares don't matter.92CMP(R0, AssumeMakeOperand2(PSP_GetVidMemBase()));93SetCC(CC_LO);94BIC(tempReg, tempReg, BIT_VRAM);95SetCC(CC_HS);96CMP(R0, AssumeMakeOperand2(PSP_GetVidMemEnd()));97BIC(tempReg, tempReg, BIT_VRAM);9899CMP(R0, AssumeMakeOperand2(PSP_GetKernelMemoryBase()));100SetCC(CC_LO);101BIC(tempReg, tempReg, BIT_RAM);102SetCC(CC_HS);103CMP(R0, AssumeMakeOperand2(PSP_GetUserMemoryEnd()));104BIC(tempReg, tempReg, BIT_RAM);105106// If we left any bit set, the address is OK.107SetCC(CC_AL);108CMP(tempReg, 0);109SetCC(reverse ? CC_EQ : CC_GT);110}111112void ArmJit::Comp_ITypeMemLR(MIPSOpcode op, bool load) {113CONDITIONAL_DISABLE(LSU);114CheckMemoryBreakpoint();115int offset = SignExtend16ToS32(op & 0xFFFF);116MIPSGPReg rt = _RT;117MIPSGPReg rs = _RS;118int o = op >> 26;119120if (!js.inDelaySlot && !jo.Disabled(JitDisable::LSU_UNALIGNED)) {121// Optimisation: Combine to single unaligned load/store122bool isLeft = (o == 34 || o == 42);123CheckMemoryBreakpoint(1);124MIPSOpcode nextOp = GetOffsetInstruction(1);125// Find a matching shift in opposite direction with opposite offset.126if (nextOp == (isLeft ? (op.encoding + (4<<26) - 3)127: (op.encoding - (4<<26) + 3)))128{129EatInstruction(nextOp);130nextOp = MIPSOpcode(((load ? 35 : 43) << 26) | ((isLeft ? nextOp : op) & 0x03FFFFFF)); //lw, sw131Comp_ITypeMem(nextOp);132return;133}134}135136u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;137bool doCheck = false;138FixupBranch skip;139140if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {141u32 addr = iaddr & 0x3FFFFFFF;142// Need to initialize since this only loads part of the register.143// But rs no longer matters (even if rs == rt) since we have the address.144gpr.MapReg(rt, load ? MAP_DIRTY : 0);145gpr.SetRegImm(R0, addr & ~3);146147u8 shift = (addr & 3) * 8;148149switch (o) {150case 34: // lwl151LDR(R0, MEMBASEREG, R0);152ANDI2R(gpr.R(rt), gpr.R(rt), 0x00ffffff >> shift, SCRATCHREG2);153ORR(gpr.R(rt), gpr.R(rt), Operand2(R0, ST_LSL, 24 - shift));154break;155156case 38: // lwr157LDR(R0, MEMBASEREG, R0);158ANDI2R(gpr.R(rt), gpr.R(rt), 0xffffff00 << (24 - shift), SCRATCHREG2);159ORR(gpr.R(rt), gpr.R(rt), Operand2(R0, ST_LSR, shift));160break;161162case 42: // swl163LDR(SCRATCHREG2, MEMBASEREG, R0);164// Don't worry, can't use temporary.165ANDI2R(SCRATCHREG2, SCRATCHREG2, 0xffffff00 << shift, R0);166ORR(SCRATCHREG2, SCRATCHREG2, Operand2(gpr.R(rt), ST_LSR, 24 - shift));167STR(SCRATCHREG2, MEMBASEREG, R0);168break;169170case 46: // swr171LDR(SCRATCHREG2, MEMBASEREG, R0);172// Don't worry, can't use temporary.173ANDI2R(SCRATCHREG2, SCRATCHREG2, 0x00ffffff >> (24 - shift), R0);174ORR(SCRATCHREG2, SCRATCHREG2, Operand2(gpr.R(rt), ST_LSL, shift));175STR(SCRATCHREG2, MEMBASEREG, R0);176break;177}178return;179}180181// This gets hit in a few games, as a result of never-taken delay slots (some branch types182// conditionally execute the delay slot instructions). Ignore in those cases.183if (!js.inDelaySlot) {184_dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address %08x? CPU bug?", iaddr);185}186187if (load) {188gpr.MapDirtyIn(rt, rs, false);189} else {190gpr.MapInIn(rt, rs);191}192193if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {194SetCCAndR0ForSafeAddress(rs, offset, SCRATCHREG2, true);195doCheck = true;196} else {197SetR0ToEffectiveAddress(rs, offset);198}199if (doCheck) {200skip = B();201}202SetCC(CC_AL);203204// Need temp regs. TODO: Get from the regcache?205static const ARMReg LR_SCRATCHREG3 = R9;206static const ARMReg LR_SCRATCHREG4 = R10;207if (load) {208PUSH(1, LR_SCRATCHREG3);209} else {210PUSH(2, LR_SCRATCHREG3, LR_SCRATCHREG4);211}212213// Here's our shift amount.214AND(SCRATCHREG2, R0, 3);215LSL(SCRATCHREG2, SCRATCHREG2, 3);216217// Now align the address for the actual read.218BIC(R0, R0, 3);219220switch (o) {221case 34: // lwl222MOVI2R(LR_SCRATCHREG3, 0x00ffffff);223LDR(R0, MEMBASEREG, R0);224AND(gpr.R(rt), gpr.R(rt), Operand2(LR_SCRATCHREG3, ST_LSR, SCRATCHREG2));225RSB(SCRATCHREG2, SCRATCHREG2, 24);226ORR(gpr.R(rt), gpr.R(rt), Operand2(R0, ST_LSL, SCRATCHREG2));227break;228229case 38: // lwr230MOVI2R(LR_SCRATCHREG3, 0xffffff00);231LDR(R0, MEMBASEREG, R0);232LSR(R0, R0, SCRATCHREG2);233RSB(SCRATCHREG2, SCRATCHREG2, 24);234AND(gpr.R(rt), gpr.R(rt), Operand2(LR_SCRATCHREG3, ST_LSL, SCRATCHREG2));235ORR(gpr.R(rt), gpr.R(rt), R0);236break;237238case 42: // swl239MOVI2R(LR_SCRATCHREG3, 0xffffff00);240LDR(LR_SCRATCHREG4, MEMBASEREG, R0);241AND(LR_SCRATCHREG4, LR_SCRATCHREG4, Operand2(LR_SCRATCHREG3, ST_LSL, SCRATCHREG2));242RSB(SCRATCHREG2, SCRATCHREG2, 24);243ORR(LR_SCRATCHREG4, LR_SCRATCHREG4, Operand2(gpr.R(rt), ST_LSR, SCRATCHREG2));244STR(LR_SCRATCHREG4, MEMBASEREG, R0);245break;246247case 46: // swr248MOVI2R(LR_SCRATCHREG3, 0x00ffffff);249LDR(LR_SCRATCHREG4, MEMBASEREG, R0);250RSB(SCRATCHREG2, SCRATCHREG2, 24);251AND(LR_SCRATCHREG4, LR_SCRATCHREG4, Operand2(LR_SCRATCHREG3, ST_LSR, SCRATCHREG2));252RSB(SCRATCHREG2, SCRATCHREG2, 24);253ORR(LR_SCRATCHREG4, LR_SCRATCHREG4, Operand2(gpr.R(rt), ST_LSL, SCRATCHREG2));254STR(LR_SCRATCHREG4, MEMBASEREG, R0);255break;256}257258if (load) {259POP(1, LR_SCRATCHREG3);260} else {261POP(2, LR_SCRATCHREG3, LR_SCRATCHREG4);262}263264if (doCheck) {265SetJumpTarget(skip);266}267}268269void ArmJit::Comp_ITypeMem(MIPSOpcode op)270{271CONDITIONAL_DISABLE(LSU);272CheckMemoryBreakpoint();273int offset = (signed short)(op&0xFFFF);274bool load = false;275MIPSGPReg rt = _RT;276MIPSGPReg rs = _RS;277int o = op>>26;278if (((op >> 29) & 1) == 0 && rt == MIPS_REG_ZERO) {279// Don't load anything into $zr280return;281}282283u32 iaddr = gpr.IsImm(rs) ? offset + gpr.GetImm(rs) : 0xFFFFFFFF;284bool doCheck = false;285ARMReg addrReg = R0;286287switch (o) {288case 32: //lb289case 33: //lh290case 35: //lw291case 36: //lbu292case 37: //lhu293load = true;294case 40: //sb295case 41: //sh296case 43: //sw297// Map base register as pointer and go from there - if the displacement isn't too big.298// This is faster if there are multiple loads from the same pointer. Need to hook up the MIPS analyzer..299if (jo.cachePointers && g_Config.bFastMemory) {300// ARM has smaller load/store immediate displacements than MIPS, 12 bits - and some memory ops only have 8 bits.301int offsetRange = 0x3ff;302if (o == 41 || o == 33 || o == 37 || o == 32)303offsetRange = 0xff; // 8 bit offset only304if (!gpr.IsImm(rs) && rs != rt && (offset <= offsetRange) && offset >= -offsetRange) {305gpr.SpillLock(rs, rt);306gpr.MapRegAsPointer(rs);307gpr.MapReg(rt, load ? MAP_NOINIT : 0);308switch (o) {309case 35: LDR (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break;310case 37: LDRH (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break;311case 33: LDRSH(gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break;312case 36: LDRB (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break;313case 32: LDRSB(gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break;314// Store315case 43: STR (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break;316case 41: STRH (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break;317case 40: STRB (gpr.R(rt), gpr.RPtr(rs), Operand2(offset, TYPE_IMM)); break;318}319gpr.ReleaseSpillLocks();320break;321}322}323324if (gpr.IsImm(rs) && Memory::IsValidAddress(iaddr)) {325// TODO: Avoid mapping a register for the "zero" register, use R0 instead.326327// We can compute the full address at compile time. Kickass.328u32 addr = iaddr & 0x3FFFFFFF;329330if (addr == iaddr && offset == 0) {331// It was already safe. Let's shove it into a reg and use it directly.332load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);333addrReg = gpr.R(rs);334} else {335// In this case, only map rt. rs+offset will be in R0.336gpr.MapReg(rt, load ? MAP_NOINIT : 0);337gpr.SetRegImm(R0, addr);338addrReg = R0;339}340} else {341_dbg_assert_msg_(!gpr.IsImm(rs), "Invalid immediate address? CPU bug?");342load ? gpr.MapDirtyIn(rt, rs) : gpr.MapInIn(rt, rs);343344if (!g_Config.bFastMemory && rs != MIPS_REG_SP) {345SetCCAndR0ForSafeAddress(rs, offset, SCRATCHREG2);346doCheck = true;347} else {348SetR0ToEffectiveAddress(rs, offset);349}350addrReg = R0;351}352353switch (o)354{355// Load356case 35: LDR (gpr.R(rt), MEMBASEREG, addrReg); break;357case 37: LDRH (gpr.R(rt), MEMBASEREG, addrReg); break;358case 33: LDRSH(gpr.R(rt), MEMBASEREG, addrReg); break;359case 36: LDRB (gpr.R(rt), MEMBASEREG, addrReg); break;360case 32: LDRSB(gpr.R(rt), MEMBASEREG, addrReg); break;361// Store362case 43: STR (gpr.R(rt), MEMBASEREG, addrReg); break;363case 41: STRH (gpr.R(rt), MEMBASEREG, addrReg); break;364case 40: STRB (gpr.R(rt), MEMBASEREG, addrReg); break;365}366if (doCheck) {367if (load) {368SetCC(CC_EQ);369MOVI2R(gpr.R(rt), 0);370}371SetCC(CC_AL);372}373break;374case 34: //lwl375case 38: //lwr376load = true;377case 42: //swl378case 46: //swr379Comp_ITypeMemLR(op, load);380break;381default:382Comp_Generic(op);383return;384}385}386387void ArmJit::Comp_StoreSync(MIPSOpcode op) {388CONDITIONAL_DISABLE(LSU);389390DISABLE;391}392393void ArmJit::Comp_Cache(MIPSOpcode op) {394CONDITIONAL_DISABLE(LSU);395396int func = (op >> 16) & 0x1F;397398// See Int_Cache for the definitions.399switch (func) {400case 24: break;401case 25: break;402case 27: break;403case 30: break;404default:405// Fall back to the interpreter.406DISABLE;407}408}409}410411#endif // PPSSPP_ARCH(ARM)412413414