CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/Core/MIPS/x86/RegCacheFPU.h
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#pragma once1819#include "Common/x64Emitter.h"20#include "Core/MIPS/MIPS.h"21#include "Core/MIPS/MIPSVFPUUtils.h"2223#undef MAP_NOINIT2425// GPRs are numbered 0 to 3126// VFPU regs are numbered 32 to 159.27// Then we have some temp regs for VFPU handling from 160 to 175.2829// Temp regs: 4 from S prefix, 4 from T prefix, 4 from D mask, and 4 for work (worst case.)30// But most of the time prefixes aren't used that heavily so we won't use all of them.3132// PLANS FOR PROPER SIMD33// 1, 2, 3, and 4-vectors will be loaded into single XMM registers34// Matrices will be loaded into pairs, triads, or quads of XMM registers - simply by loading35// the columns or the rows one by one.3637// On x86 this means that only one 4x4 matrix can be fully loaded at once but that's alright.38// We might want to keep "linearized" columns in memory.3940// Implement optimized vec/matrix multiplications of all types and transposes that41// take into account in which XMM registers the values are. Fallback: Just dump out the values42// and do it the old way.4344#include "ppsspp_config.h"4546enum {47TEMP0 = 32 + 128,48NUM_MIPS_FPRS = 32 + 128 + NUM_X86_FPU_TEMPS,49};5051#if PPSSPP_ARCH(AMD64)52#define NUM_X_FPREGS 1653#elif PPSSPP_ARCH(X86)54#define NUM_X_FPREGS 855#endif5657namespace MIPSAnalyst {58struct AnalysisResults;59};6061struct X64CachedFPReg {62union {63int mipsReg;64int mipsRegs[4];65};66bool dirty;67};6869struct MIPSCachedFPReg {70Gen::OpArg location;71int lane;72bool away; // value not in source register (memory)73u8 locked;74// Only for temp regs.75bool tempLocked;76};7778struct FPURegCacheState {79MIPSCachedFPReg regs[NUM_MIPS_FPRS];80X64CachedFPReg xregs[NUM_X_FPREGS];81};8283namespace MIPSComp {84struct JitOptions;85struct JitState;86}8788enum {89MAP_DIRTY = 1,90MAP_NOINIT = 2 | MAP_DIRTY,91// Only for MapRegsV, MapRegsVS.92MAP_NOLOCK = 4,93};9495// The PSP has 160 FP registers: 32 FPRs + 128 VFPU registers.96// Soon we will support them all.9798class FPURegCache99{100public:101FPURegCache();102~FPURegCache() {}103104void Start(MIPSState *mipsState, MIPSComp::JitState *js, MIPSComp::JitOptions *jo, MIPSAnalyst::AnalysisResults &stats, bool useRip);105void MapReg(int preg, bool doLoad = true, bool makeDirty = true);106void StoreFromRegister(int preg);107void StoreFromRegisterV(int preg) {108StoreFromRegister(preg + 32);109}110Gen::OpArg GetDefaultLocation(int reg) const;111void DiscardR(int freg);112void DiscardV(int vreg) {113DiscardR(vreg + 32);114}115void DiscardVS(int vreg);116bool IsTempX(Gen::X64Reg xreg);117int GetTempR();118int GetTempV() {119return GetTempR() - 32;120}121int GetTempVS(u8 *v, VectorSize vsz);122123void SetEmitter(Gen::XEmitter *emitter) {emit = emitter;}124125// Flushes one register and reuses the register for another one. Dirtyness is implied.126void FlushRemap(int oldreg, int newreg);127128void Flush();129int SanityCheck() const;130131const Gen::OpArg &R(int freg) const {return regs[freg].location;}132const Gen::OpArg &V(int vreg) const {133_dbg_assert_msg_(vregs[vreg].lane == 0, "SIMD reg %d used as V reg (use VS instead). pc=%08x", vreg, mips_->pc);134return vregs[vreg].location;135}136const Gen::OpArg &VS(const u8 *vs) const {137_dbg_assert_msg_(vregs[vs[0]].lane != 0, "V reg %d used as VS reg (use V instead). pc=%08x", vs[0], mips_->pc);138return vregs[vs[0]].location;139}140141Gen::X64Reg RX(int freg) const {142if (regs[freg].away && regs[freg].location.IsSimpleReg())143return regs[freg].location.GetSimpleReg();144_assert_msg_(false, "Not so simple - f%i", freg);145return (Gen::X64Reg)-1;146}147148Gen::X64Reg VX(int vreg) const {149_dbg_assert_msg_(vregs[vreg].lane == 0, "SIMD reg %d used as V reg (use VSX instead). pc=%08x", vreg, mips_->pc);150if (vregs[vreg].away && vregs[vreg].location.IsSimpleReg())151return vregs[vreg].location.GetSimpleReg();152_assert_msg_(false, "Not so simple - v%i", vreg);153return (Gen::X64Reg)-1;154}155156Gen::X64Reg VSX(const u8 *vs) const {157_dbg_assert_msg_(vregs[vs[0]].lane != 0, "V reg %d used as VS reg (use VX instead). pc=%08x", vs[0], mips_->pc);158if (vregs[vs[0]].away && vregs[vs[0]].location.IsSimpleReg())159return vregs[vs[0]].location.GetSimpleReg();160_assert_msg_(false, "Not so simple - v%i", vs[0]);161return (Gen::X64Reg)-1;162}163164// Just to avoid coding mistakes, defined here to prevent compilation.165void R(Gen::X64Reg r);166167// Register locking. Prevents them from being spilled.168void SpillLock(int p1, int p2=0xff, int p3=0xff, int p4=0xff);169void ReleaseSpillLock(int mipsreg);170void ReleaseSpillLocks();171172bool IsMapped(int r) {173return R(r).IsSimpleReg();174}175bool IsMappedV(int v) {176return vregs[v].lane == 0 && V(v).IsSimpleReg();177}178bool IsMappedVS(u8 v) {179return vregs[v].lane != 0 && VS(&v).IsSimpleReg();180}181bool IsMappedVS(const u8 *v, VectorSize vsz);182bool CanMapVS(const u8 *v, VectorSize vsz);183184void MapRegV(int vreg, int flags);185void MapRegsV(int vec, VectorSize vsz, int flags);186void MapRegsV(const u8 *v, VectorSize vsz, int flags);187void SpillLockV(int vreg) {188SpillLock(vreg + 32);189}190void SpillLockV(const u8 *v, VectorSize vsz);191void SpillLockV(int vec, VectorSize vsz);192void ReleaseSpillLockV(int vreg) {193ReleaseSpillLock(vreg + 32);194}195void ReleaseSpillLockV(const u8 *vec, VectorSize sz);196197// TODO: This may trash XMM0/XMM1 some day.198void MapRegsVS(const u8 *v, VectorSize vsz, int flags);199bool TryMapRegsVS(const u8 *v, VectorSize vsz, int flags);200bool TryMapDirtyInVS(const u8 *vd, VectorSize vdsz, const u8 *vs, VectorSize vssz, bool avoidLoad = true);201bool TryMapDirtyInInVS(const u8 *vd, VectorSize vdsz, const u8 *vs, VectorSize vssz, const u8 *vt, VectorSize vtsz, bool avoidLoad = true);202// TODO: If s/t overlap differently, need read-only copies? Maybe finalize d? Major design flaw...203// TODO: Matrix versions? Cols/Rows?204// No MapRegVS, that'd be silly.205206void SimpleRegsV(const u8 *v, VectorSize vsz, int flags);207void SimpleRegsV(const u8 *v, MatrixSize msz, int flags);208void SimpleRegV(const u8 v, int flags);209210void GetState(FPURegCacheState &state) const;211void RestoreState(const FPURegCacheState& state);212213MIPSState *mips_ = nullptr;214215void FlushX(Gen::X64Reg reg);216Gen::X64Reg GetFreeXReg();217int GetFreeXRegs(Gen::X64Reg *regs, int n, bool spill = true);218219void Invariant() const;220221private:222const int *GetAllocationOrder(int &count);223void SetupInitialRegs();224225// These are intentionally not public so the interface is "locked" or "unlocked", no levels.226void ReduceSpillLock(int mreg);227void ReduceSpillLockV(int vreg) {228ReduceSpillLock(vreg + 32);229}230void ReduceSpillLockV(const u8 *vec, VectorSize sz);231232Gen::X64Reg LoadRegsVS(const u8 *v, int n);233234MIPSCachedFPReg regs[NUM_MIPS_FPRS]{};235X64CachedFPReg xregs[NUM_X_FPREGS]{};236MIPSCachedFPReg *vregs;237238bool useRip_;239bool pendingFlush;240bool initialReady = false;241MIPSCachedFPReg regsInitial[NUM_MIPS_FPRS];242X64CachedFPReg xregsInitial[NUM_X_FPREGS];243244Gen::XEmitter *emit = nullptr;245MIPSComp::JitState *js_;246MIPSComp::JitOptions *jo_;247};248249250