CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Software/RasterizerRegCache.h
Views: 1401
// Copyright (c) 2021- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#pragma once1819#include "ppsspp_config.h"2021#include <cstdint>22#include <string>23#include <unordered_map>24#include <vector>2526#include "Common/Common.h"27#if defined(_M_SSE)28#include <emmintrin.h>29#endif30#if PPSSPP_ARCH(ARM64_NEON)31#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)32#include <arm64_neon.h>33#else34#include <arm_neon.h>35#endif36#endif3738#if PPSSPP_ARCH(ARM)39#include "Common/ArmEmitter.h"40#elif PPSSPP_ARCH(ARM64_NEON)41#include "Common/Arm64Emitter.h"42#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)43#include "Common/x64Emitter.h"44#elif PPSSPP_ARCH(MIPS)45#include "Common/MipsEmitter.h"46#elif PPSSPP_ARCH(RISCV64)47#include "Common/RiscVEmitter.h"48#else49#include "Common/FakeEmitter.h"50#endif51#include "GPU/Math3D.h"5253namespace Rasterizer {5455// While not part of the reg cache proper, this is the type it is built for.56#if PPSSPP_ARCH(ARM)57typedef ArmGen::ARMXCodeBlock BaseCodeBlock;58#elif PPSSPP_ARCH(ARM64_NEON)59typedef Arm64Gen::ARM64CodeBlock BaseCodeBlock;60#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)61typedef Gen::XCodeBlock BaseCodeBlock;62#elif PPSSPP_ARCH(MIPS)63typedef MIPSGen::MIPSCodeBlock BaseCodeBlock;64#elif PPSSPP_ARCH(RISCV64)65typedef RiscVGen::RiscVCodeBlock BaseCodeBlock;66#else67typedef FakeGen::FakeXCodeBlock BaseCodeBlock;68#endif6970// We also have the types of things that end up in regs.71#if PPSSPP_ARCH(ARM64_NEON)72typedef int32x4_t Vec4IntArg;73typedef int32x4_t Vec4IntResult;74typedef float32x4_t Vec4FloatArg;75static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return vld1q_s32(a.AsArray()); }76static inline Vec4IntArg ToVec4IntArg(const Vec4IntResult &a) { return a; }77static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return vld1q_s32(a.AsArray()); }78static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return vld1q_f32(a.AsArray()); }79#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)80typedef __m128i Vec4IntArg;81typedef __m128i Vec4IntResult;82typedef __m128 Vec4FloatArg;83static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a.ivec; }84static inline Vec4IntArg ToVec4IntArg(const Vec4IntResult &a) { return a; }85static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return a.ivec; }86static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return a.vec; }87#else88typedef const Math3D::Vec4<int> &Vec4IntArg;89typedef Math3D::Vec4<int> Vec4IntResult;90typedef const Math3D::Vec4<float> &Vec4FloatArg;91static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a; }92static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return a; }93static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return a; }94#endif9596#if PPSSPP_ARCH(AMD64) && PPSSPP_PLATFORM(WINDOWS) && (defined(_MSC_VER) || defined(__clang__) || defined(__INTEL_COMPILER))97#define SOFTRAST_CALL __vectorcall98#else99#define SOFTRAST_CALL100#endif101102struct RegCache {103enum Purpose {104FLAG_GEN = 0x0100,105FLAG_TEMP = 0x1000,106107VEC_ZERO = 0x0000,108VEC_RESULT = 0x0001,109VEC_RESULT1 = 0x0002,110VEC_U1 = 0x0003,111VEC_V1 = 0x0004,112VEC_INDEX = 0x0005,113VEC_INDEX1 = 0x0006,114115GEN_SRC_ALPHA = 0x0100,116GEN_ID = 0x0101,117GEN_STENCIL = 0x0103,118GEN_COLOR_OFF = 0x0104,119GEN_DEPTH_OFF = 0x0105,120GEN_RESULT = 0x0106,121GEN_SHIFTVAL = 0x0107,122123GEN_ARG_X = 0x0180,124GEN_ARG_Y = 0x0181,125GEN_ARG_Z = 0x0182,126GEN_ARG_FOG = 0x0183,127GEN_ARG_ID = 0x0184,128GEN_ARG_U = 0x0185,129GEN_ARG_V = 0x0186,130GEN_ARG_TEXPTR = 0x0187,131GEN_ARG_BUFW = 0x0188,132GEN_ARG_LEVEL = 0x0189,133GEN_ARG_TEXPTR_PTR = 0x018A,134GEN_ARG_BUFW_PTR = 0x018B,135GEN_ARG_LEVELFRAC = 0x018C,136VEC_ARG_COLOR = 0x0080,137VEC_ARG_MASK = 0x0081,138VEC_ARG_U = 0x0082,139VEC_ARG_V = 0x0083,140VEC_ARG_S = 0x0084,141VEC_ARG_T = 0x0085,142VEC_FRAC = 0x0086,143144VEC_TEMP0 = 0x1000,145VEC_TEMP1 = 0x1001,146VEC_TEMP2 = 0x1002,147VEC_TEMP3 = 0x1003,148VEC_TEMP4 = 0x1004,149VEC_TEMP5 = 0x1005,150151GEN_TEMP0 = 0x1100,152GEN_TEMP1 = 0x1101,153GEN_TEMP2 = 0x1102,154GEN_TEMP3 = 0x1103,155GEN_TEMP4 = 0x1104,156GEN_TEMP5 = 0x1105,157GEN_TEMP_HELPER = 0x1106,158159VEC_INVALID = 0xFEFF,160GEN_INVALID = 0xFFFF,161};162163#if PPSSPP_ARCH(ARM)164typedef ArmGen::ARMReg Reg;165static constexpr Reg REG_INVALID_VALUE = ArmGen::INVALID_REG;166#elif PPSSPP_ARCH(ARM64_NEON)167typedef Arm64Gen::ARM64Reg Reg;168static constexpr Reg REG_INVALID_VALUE = Arm64Gen::INVALID_REG;169#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)170typedef Gen::X64Reg Reg;171static constexpr Reg REG_INVALID_VALUE = Gen::INVALID_REG;172#elif PPSSPP_ARCH(MIPS)173typedef MIPSGen::MIPSReg Reg;174static constexpr Reg REG_INVALID_VALUE = MIPSGen::INVALID_REG;175#elif PPSSPP_ARCH(RISCV64)176typedef RiscVGen::RiscVReg Reg;177static constexpr Reg REG_INVALID_VALUE = RiscVGen::INVALID_REG;178#else179typedef int Reg;180static constexpr Reg REG_INVALID_VALUE = -1;181#endif182183struct RegStatus {184Reg reg;185Purpose purpose;186uint8_t locked = 0;187bool forceRetained = false;188bool everLocked = false;189};190191// Note: Assumes __vectorcall on Windows.192// Keep in mind, some args won't fit in regs, this ignores stack and tracks what's in regs.193void SetupABI(const std::vector<Purpose> &args, bool forceRetain = true);194// Reset after compile complete, pass false for validate if compile failed.195void Reset(bool validate);196// Add register to cache for tracking with initial purpose (won't be locked or force retained.)197void Add(Reg r, Purpose p);198// Find registers with one purpose and change to the other.199void Change(Purpose history, Purpose destiny);200// Release a previously found or allocated register, setting purpose to invalid.201void Release(Reg &r, Purpose p);202// Unlock a previously found or allocated register, but try to retain it.203void Unlock(Reg &r, Purpose p);204// Check if the purpose is currently in a register.205bool Has(Purpose p);206// Return the register for a given purpose (check with Has() first if not certainly there.)207Reg Find(Purpose p);208// Allocate a new register for the given purpose.209Reg Alloc(Purpose p);210// Force a register to be retained, even if we run short on regs.211void ForceRetain(Purpose p);212// Reverse ForceRetain, and release the register back to invalid.213void ForceRelease(Purpose p);214215// For getting a specific reg. WARNING: May return a locked reg, so you have to check.216void GrabReg(Reg r, Purpose p, bool &needsSwap, Reg swapReg, Purpose swapPurpose);217// For setting the purpose of a specific reg. Returns false if it is locked.218bool ChangeReg(Reg r, Purpose p);219// Retrieves whether reg was ever used.220bool UsedReg(Reg r, Purpose flag);221222private:223RegStatus *FindReg(Reg r, Purpose p);224225std::vector<RegStatus> regs;226};227228class CodeBlock : public BaseCodeBlock {229public:230virtual std::string DescribeCodePtr(const u8 *ptr);231virtual void Clear();232233protected:234CodeBlock(int size);235236RegCache::Reg GetZeroVec();237238void Describe(const std::string &message);239// Returns amount of stack space used.240int WriteProlog(int extraStack, const std::vector<RegCache::Reg> &vec, const std::vector<RegCache::Reg> &gen);241// Returns updated function start position, modifies prolog and finishes writing.242const u8 *WriteFinalizedEpilog();243244void WriteSimpleConst16x8(const u8 *&ptr, uint8_t value);245void WriteSimpleConst8x16(const u8 *&ptr, uint16_t value);246void WriteSimpleConst4x32(const u8 *&ptr, uint32_t value);247void WriteDynamicConst16x8(const u8 *&ptr, uint8_t value);248void WriteDynamicConst8x16(const u8 *&ptr, uint16_t value);249void WriteDynamicConst4x32(const u8 *&ptr, uint32_t value);250251#if PPSSPP_ARCH(ARM64_NEON)252Arm64Gen::ARM64FloatEmitter fp;253#endif254255std::unordered_map<const u8 *, std::string> descriptions_;256Rasterizer::RegCache regCache_;257258private:259u8 *lastPrologStart_ = nullptr;260u8 *lastPrologEnd_ = nullptr;261int savedStack_;262int firstVecStack_;263std::vector<RegCache::Reg> prologVec_;264std::vector<RegCache::Reg> prologGen_;265};266267};268269270