CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/ARM/ArmRegCacheFPU.h
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#pragma once1819#pragma once2021#include "Core/MIPS/MIPS.h"22#include "Core/MIPS/ARM/ArmRegCache.h"23#include "Core/MIPS/MIPSVFPUUtils.h"24#include "Common/ArmEmitter.h"2526namespace ArmJitConstants {2728enum {29NUM_TEMPS = 16,30TEMP0 = 32 + 128,31TOTAL_MAPPABLE_MIPSFPUREGS = 32 + 128 + NUM_TEMPS,32};3334enum {35MAP_READ = 0,36MAP_MTX_TRANSPOSED = 16,37MAP_PREFER_LOW = 16,38MAP_PREFER_HIGH = 32,3940// Force is not yet correctly implemented, if the reg is already mapped it will not move41MAP_FORCE_LOW = 64, // Only map Q0-Q7 (and probably not Q0-Q3 as they are S registers so that leaves Q8-Q15)42MAP_FORCE_HIGH = 128, // Only map Q8-Q1543};4445}4647namespace MIPSAnalyst {48struct AnalysisResults;49};5051struct FPURegARM {52int mipsReg; // if -1, no mipsreg attached.53bool isDirty; // Should the register be written back?54};5556struct FPURegQuad {57int mipsVec;58VectorSize sz;59u8 vregs[4];60bool isDirty;61bool spillLock;62bool isTemp;63};6465struct FPURegMIPS {66// Where is this MIPS register?67ArmJitConstants::RegMIPSLoc loc;68// Data (only one of these is used, depending on loc. Could make a union).69u32 reg;70int lane;7172bool spillLock; // if true, this register cannot be spilled.73bool tempLock;74// If loc == ML_MEM, it's back in its location in the CPU context struct.75};7677namespace MIPSComp {78struct JitOptions;79struct JitState;80}8182class ArmRegCacheFPU {83public:84ArmRegCacheFPU(MIPSState *mipsState, MIPSComp::JitState *js, MIPSComp::JitOptions *jo);85~ArmRegCacheFPU() {}8687void Init(ArmGen::ARMXEmitter *emitter);8889void Start(MIPSAnalyst::AnalysisResults &stats);9091// Protect the arm register containing a MIPS register from spilling, to ensure that92// it's being kept allocated.93void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);94void SpillLockV(MIPSReg r) { SpillLock(r + 32); }9596void ReleaseSpillLocksAndDiscardTemps();97void ReleaseSpillLock(int mipsreg) {98mr[mipsreg].spillLock = false;99}100void ReleaseSpillLockV(int mipsreg) {101ReleaseSpillLock(mipsreg + 32);102}103104void SetImm(MIPSReg reg, u32 immVal);105bool IsImm(MIPSReg reg) const;106u32 GetImm(MIPSReg reg) const;107108// Returns an ARM register containing the requested MIPS register.109ArmGen::ARMReg MapReg(MIPSReg reg, int mapFlags = 0);110void MapInIn(MIPSReg rd, MIPSReg rs);111void MapDirty(MIPSReg rd);112void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);113void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);114bool IsMapped(MIPSReg r);115void FlushArmReg(ArmGen::ARMReg r);116void FlushR(MIPSReg r);117void DiscardR(MIPSReg r);118ArmGen::ARMReg R(int preg); // Returns a cached register119120// VFPU register as single ARM VFP registers. Must not be used in the upcoming NEON mode!121void MapRegV(int vreg, int flags = 0);122void LoadToRegV(ArmGen::ARMReg armReg, int vreg);123void MapInInV(int rt, int rs);124void MapDirtyInV(int rd, int rs, bool avoidLoad = true);125void MapDirtyInInV(int rd, int rs, int rt, bool avoidLoad = true);126127bool IsTempX(ArmGen::ARMReg r) const;128MIPSReg GetTempV() { return GetTempR() - 32; }129// VFPU registers as single VFP registers.130ArmGen::ARMReg V(int vreg) { return R(vreg + 32); }131132int FlushGetSequential(int a);133void FlushAll();134135// This one is allowed at any point.136void FlushV(MIPSReg r);137138// VFPU registers mapped to match NEON quads (and doubles, for pairs and singles)139// Here we return the ARM register directly instead of providing a "V" accessor140// and so on. Might switch to this model for the other regallocs later.141142// Quad mapping does NOT look into the ar array. Instead we use the qr array to keep143// track of what's in each quad.144145// Note that we automatically spill-lock EVERY Q REGISTER we map, unlike other types.146// Need to explicitly allow spilling to get spilling.147ArmGen::ARMReg QMapReg(int vreg, VectorSize sz, int flags);148149// TODO150// Maps a matrix as a set of columns (yes, even transposed ones, always columns151// as those are faster to load/flush). When possible it will map into consecutive152// quad registers, enabling blazing-fast full-matrix loads, transposed or not.153void QMapMatrix(ArmGen::ARMReg *regs, int matrix, MatrixSize mz, int flags);154155ArmGen::ARMReg QAllocTemp(VectorSize sz);156157void QAllowSpill(int quad);158void QFlush(int quad);159void QLoad4x4(MIPSGPReg regPtr, int vquads[4]);160//void FlushQWithV(MIPSReg r);161162// NOTE: These require you to release spill locks manually!163void MapRegsAndSpillLockV(int vec, VectorSize vsz, int flags);164void MapRegsAndSpillLockV(const u8 *v, VectorSize vsz, int flags);165166void SpillLockV(const u8 *v, VectorSize vsz);167void SpillLockV(int vec, VectorSize vsz);168169void SetEmitter(ArmGen::ARMXEmitter *emitter) { emit_ = emitter; }170171int GetMipsRegOffset(MIPSReg r);172173private:174bool Consecutive(int v1, int v2) const;175bool Consecutive(int v1, int v2, int v3) const;176bool Consecutive(int v1, int v2, int v3, int v4) const;177178MIPSReg GetTempR();179const ArmGen::ARMReg *GetMIPSAllocationOrder(int &count);180int GetMipsRegOffsetV(MIPSReg r) {181return GetMipsRegOffset(r + 32);182}183// This one WILL get a free quad as long as you haven't spill-locked them all.184int QGetFreeQuad(int start, int count, const char *reason);185186void SetupInitialRegs();187188MIPSState *mips_;189ArmGen::ARMXEmitter *emit_;190MIPSComp::JitState *js_;191MIPSComp::JitOptions *jo_;192193int qTime_;194195enum {196// With NEON, we have 64 S = 32 D = 16 Q registers. Only the first 32 S registers197// are individually mappable though.198NUM_ARMFPUREG = 32,199NUM_ARMQUADS = 16,200NUM_MIPSFPUREG = ArmJitConstants::TOTAL_MAPPABLE_MIPSFPUREGS,201};202203FPURegARM ar[NUM_ARMFPUREG];204FPURegMIPS mr[NUM_MIPSFPUREG];205FPURegQuad qr[NUM_ARMQUADS];206FPURegMIPS *vr;207208bool pendingFlush;209bool initialReady = false;210FPURegARM arInitial[NUM_ARMFPUREG];211FPURegMIPS mrInitial[NUM_MIPSFPUREG];212};213214215