CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Software/RasterizerRegCache.h
Views: 1401
1
// Copyright (c) 2021- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#pragma once
19
20
#include "ppsspp_config.h"
21
22
#include <cstdint>
23
#include <string>
24
#include <unordered_map>
25
#include <vector>
26
27
#include "Common/Common.h"
28
#if defined(_M_SSE)
29
#include <emmintrin.h>
30
#endif
31
#if PPSSPP_ARCH(ARM64_NEON)
32
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
33
#include <arm64_neon.h>
34
#else
35
#include <arm_neon.h>
36
#endif
37
#endif
38
39
#if PPSSPP_ARCH(ARM)
40
#include "Common/ArmEmitter.h"
41
#elif PPSSPP_ARCH(ARM64_NEON)
42
#include "Common/Arm64Emitter.h"
43
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
44
#include "Common/x64Emitter.h"
45
#elif PPSSPP_ARCH(MIPS)
46
#include "Common/MipsEmitter.h"
47
#elif PPSSPP_ARCH(RISCV64)
48
#include "Common/RiscVEmitter.h"
49
#else
50
#include "Common/FakeEmitter.h"
51
#endif
52
#include "GPU/Math3D.h"
53
54
namespace Rasterizer {
55
56
// While not part of the reg cache proper, this is the type it is built for.
57
#if PPSSPP_ARCH(ARM)
58
typedef ArmGen::ARMXCodeBlock BaseCodeBlock;
59
#elif PPSSPP_ARCH(ARM64_NEON)
60
typedef Arm64Gen::ARM64CodeBlock BaseCodeBlock;
61
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
62
typedef Gen::XCodeBlock BaseCodeBlock;
63
#elif PPSSPP_ARCH(MIPS)
64
typedef MIPSGen::MIPSCodeBlock BaseCodeBlock;
65
#elif PPSSPP_ARCH(RISCV64)
66
typedef RiscVGen::RiscVCodeBlock BaseCodeBlock;
67
#else
68
typedef FakeGen::FakeXCodeBlock BaseCodeBlock;
69
#endif
70
71
// We also have the types of things that end up in regs.
72
#if PPSSPP_ARCH(ARM64_NEON)
73
typedef int32x4_t Vec4IntArg;
74
typedef int32x4_t Vec4IntResult;
75
typedef float32x4_t Vec4FloatArg;
76
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return vld1q_s32(a.AsArray()); }
77
static inline Vec4IntArg ToVec4IntArg(const Vec4IntResult &a) { return a; }
78
static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return vld1q_s32(a.AsArray()); }
79
static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return vld1q_f32(a.AsArray()); }
80
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
81
typedef __m128i Vec4IntArg;
82
typedef __m128i Vec4IntResult;
83
typedef __m128 Vec4FloatArg;
84
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a.ivec; }
85
static inline Vec4IntArg ToVec4IntArg(const Vec4IntResult &a) { return a; }
86
static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return a.ivec; }
87
static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return a.vec; }
88
#else
89
typedef const Math3D::Vec4<int> &Vec4IntArg;
90
typedef Math3D::Vec4<int> Vec4IntResult;
91
typedef const Math3D::Vec4<float> &Vec4FloatArg;
92
static inline Vec4IntArg ToVec4IntArg(const Math3D::Vec4<int> &a) { return a; }
93
static inline Vec4IntResult ToVec4IntResult(const Math3D::Vec4<int> &a) { return a; }
94
static inline Vec4FloatArg ToVec4FloatArg(const Math3D::Vec4<float> &a) { return a; }
95
#endif
96
97
#if PPSSPP_ARCH(AMD64) && PPSSPP_PLATFORM(WINDOWS) && (defined(_MSC_VER) || defined(__clang__) || defined(__INTEL_COMPILER))
98
#define SOFTRAST_CALL __vectorcall
99
#else
100
#define SOFTRAST_CALL
101
#endif
102
103
struct RegCache {
104
enum Purpose {
105
FLAG_GEN = 0x0100,
106
FLAG_TEMP = 0x1000,
107
108
VEC_ZERO = 0x0000,
109
VEC_RESULT = 0x0001,
110
VEC_RESULT1 = 0x0002,
111
VEC_U1 = 0x0003,
112
VEC_V1 = 0x0004,
113
VEC_INDEX = 0x0005,
114
VEC_INDEX1 = 0x0006,
115
116
GEN_SRC_ALPHA = 0x0100,
117
GEN_ID = 0x0101,
118
GEN_STENCIL = 0x0103,
119
GEN_COLOR_OFF = 0x0104,
120
GEN_DEPTH_OFF = 0x0105,
121
GEN_RESULT = 0x0106,
122
GEN_SHIFTVAL = 0x0107,
123
124
GEN_ARG_X = 0x0180,
125
GEN_ARG_Y = 0x0181,
126
GEN_ARG_Z = 0x0182,
127
GEN_ARG_FOG = 0x0183,
128
GEN_ARG_ID = 0x0184,
129
GEN_ARG_U = 0x0185,
130
GEN_ARG_V = 0x0186,
131
GEN_ARG_TEXPTR = 0x0187,
132
GEN_ARG_BUFW = 0x0188,
133
GEN_ARG_LEVEL = 0x0189,
134
GEN_ARG_TEXPTR_PTR = 0x018A,
135
GEN_ARG_BUFW_PTR = 0x018B,
136
GEN_ARG_LEVELFRAC = 0x018C,
137
VEC_ARG_COLOR = 0x0080,
138
VEC_ARG_MASK = 0x0081,
139
VEC_ARG_U = 0x0082,
140
VEC_ARG_V = 0x0083,
141
VEC_ARG_S = 0x0084,
142
VEC_ARG_T = 0x0085,
143
VEC_FRAC = 0x0086,
144
145
VEC_TEMP0 = 0x1000,
146
VEC_TEMP1 = 0x1001,
147
VEC_TEMP2 = 0x1002,
148
VEC_TEMP3 = 0x1003,
149
VEC_TEMP4 = 0x1004,
150
VEC_TEMP5 = 0x1005,
151
152
GEN_TEMP0 = 0x1100,
153
GEN_TEMP1 = 0x1101,
154
GEN_TEMP2 = 0x1102,
155
GEN_TEMP3 = 0x1103,
156
GEN_TEMP4 = 0x1104,
157
GEN_TEMP5 = 0x1105,
158
GEN_TEMP_HELPER = 0x1106,
159
160
VEC_INVALID = 0xFEFF,
161
GEN_INVALID = 0xFFFF,
162
};
163
164
#if PPSSPP_ARCH(ARM)
165
typedef ArmGen::ARMReg Reg;
166
static constexpr Reg REG_INVALID_VALUE = ArmGen::INVALID_REG;
167
#elif PPSSPP_ARCH(ARM64_NEON)
168
typedef Arm64Gen::ARM64Reg Reg;
169
static constexpr Reg REG_INVALID_VALUE = Arm64Gen::INVALID_REG;
170
#elif PPSSPP_ARCH(X86) || PPSSPP_ARCH(AMD64)
171
typedef Gen::X64Reg Reg;
172
static constexpr Reg REG_INVALID_VALUE = Gen::INVALID_REG;
173
#elif PPSSPP_ARCH(MIPS)
174
typedef MIPSGen::MIPSReg Reg;
175
static constexpr Reg REG_INVALID_VALUE = MIPSGen::INVALID_REG;
176
#elif PPSSPP_ARCH(RISCV64)
177
typedef RiscVGen::RiscVReg Reg;
178
static constexpr Reg REG_INVALID_VALUE = RiscVGen::INVALID_REG;
179
#else
180
typedef int Reg;
181
static constexpr Reg REG_INVALID_VALUE = -1;
182
#endif
183
184
struct RegStatus {
185
Reg reg;
186
Purpose purpose;
187
uint8_t locked = 0;
188
bool forceRetained = false;
189
bool everLocked = false;
190
};
191
192
// Note: Assumes __vectorcall on Windows.
193
// Keep in mind, some args won't fit in regs, this ignores stack and tracks what's in regs.
194
void SetupABI(const std::vector<Purpose> &args, bool forceRetain = true);
195
// Reset after compile complete, pass false for validate if compile failed.
196
void Reset(bool validate);
197
// Add register to cache for tracking with initial purpose (won't be locked or force retained.)
198
void Add(Reg r, Purpose p);
199
// Find registers with one purpose and change to the other.
200
void Change(Purpose history, Purpose destiny);
201
// Release a previously found or allocated register, setting purpose to invalid.
202
void Release(Reg &r, Purpose p);
203
// Unlock a previously found or allocated register, but try to retain it.
204
void Unlock(Reg &r, Purpose p);
205
// Check if the purpose is currently in a register.
206
bool Has(Purpose p);
207
// Return the register for a given purpose (check with Has() first if not certainly there.)
208
Reg Find(Purpose p);
209
// Allocate a new register for the given purpose.
210
Reg Alloc(Purpose p);
211
// Force a register to be retained, even if we run short on regs.
212
void ForceRetain(Purpose p);
213
// Reverse ForceRetain, and release the register back to invalid.
214
void ForceRelease(Purpose p);
215
216
// For getting a specific reg. WARNING: May return a locked reg, so you have to check.
217
void GrabReg(Reg r, Purpose p, bool &needsSwap, Reg swapReg, Purpose swapPurpose);
218
// For setting the purpose of a specific reg. Returns false if it is locked.
219
bool ChangeReg(Reg r, Purpose p);
220
// Retrieves whether reg was ever used.
221
bool UsedReg(Reg r, Purpose flag);
222
223
private:
224
RegStatus *FindReg(Reg r, Purpose p);
225
226
std::vector<RegStatus> regs;
227
};
228
229
class CodeBlock : public BaseCodeBlock {
230
public:
231
virtual std::string DescribeCodePtr(const u8 *ptr);
232
virtual void Clear();
233
234
protected:
235
CodeBlock(int size);
236
237
RegCache::Reg GetZeroVec();
238
239
void Describe(const std::string &message);
240
// Returns amount of stack space used.
241
int WriteProlog(int extraStack, const std::vector<RegCache::Reg> &vec, const std::vector<RegCache::Reg> &gen);
242
// Returns updated function start position, modifies prolog and finishes writing.
243
const u8 *WriteFinalizedEpilog();
244
245
void WriteSimpleConst16x8(const u8 *&ptr, uint8_t value);
246
void WriteSimpleConst8x16(const u8 *&ptr, uint16_t value);
247
void WriteSimpleConst4x32(const u8 *&ptr, uint32_t value);
248
void WriteDynamicConst16x8(const u8 *&ptr, uint8_t value);
249
void WriteDynamicConst8x16(const u8 *&ptr, uint16_t value);
250
void WriteDynamicConst4x32(const u8 *&ptr, uint32_t value);
251
252
#if PPSSPP_ARCH(ARM64_NEON)
253
Arm64Gen::ARM64FloatEmitter fp;
254
#endif
255
256
std::unordered_map<const u8 *, std::string> descriptions_;
257
Rasterizer::RegCache regCache_;
258
259
private:
260
u8 *lastPrologStart_ = nullptr;
261
u8 *lastPrologEnd_ = nullptr;
262
int savedStack_;
263
int firstVecStack_;
264
std::vector<RegCache::Reg> prologVec_;
265
std::vector<RegCache::Reg> prologGen_;
266
};
267
268
};
269
270