CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/x86/RegCacheFPU.h
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#pragma once
19
20
#include "Common/x64Emitter.h"
21
#include "Core/MIPS/MIPS.h"
22
#include "Core/MIPS/MIPSVFPUUtils.h"
23
24
#undef MAP_NOINIT
25
26
// GPRs are numbered 0 to 31
27
// VFPU regs are numbered 32 to 159.
28
// Then we have some temp regs for VFPU handling from 160 to 175.
29
30
// Temp regs: 4 from S prefix, 4 from T prefix, 4 from D mask, and 4 for work (worst case.)
31
// But most of the time prefixes aren't used that heavily so we won't use all of them.
32
33
// PLANS FOR PROPER SIMD
34
// 1, 2, 3, and 4-vectors will be loaded into single XMM registers
35
// Matrices will be loaded into pairs, triads, or quads of XMM registers - simply by loading
36
// the columns or the rows one by one.
37
38
// On x86 this means that only one 4x4 matrix can be fully loaded at once but that's alright.
39
// We might want to keep "linearized" columns in memory.
40
41
// Implement optimized vec/matrix multiplications of all types and transposes that
42
// take into account in which XMM registers the values are. Fallback: Just dump out the values
43
// and do it the old way.
44
45
#include "ppsspp_config.h"
46
47
enum {
48
TEMP0 = 32 + 128,
49
NUM_MIPS_FPRS = 32 + 128 + NUM_X86_FPU_TEMPS,
50
};
51
52
#if PPSSPP_ARCH(AMD64)
53
#define NUM_X_FPREGS 16
54
#elif PPSSPP_ARCH(X86)
55
#define NUM_X_FPREGS 8
56
#endif
57
58
namespace MIPSAnalyst {
59
struct AnalysisResults;
60
};
61
62
struct X64CachedFPReg {
63
union {
64
int mipsReg;
65
int mipsRegs[4];
66
};
67
bool dirty;
68
};
69
70
struct MIPSCachedFPReg {
71
Gen::OpArg location;
72
int lane;
73
bool away; // value not in source register (memory)
74
u8 locked;
75
// Only for temp regs.
76
bool tempLocked;
77
};
78
79
struct FPURegCacheState {
80
MIPSCachedFPReg regs[NUM_MIPS_FPRS];
81
X64CachedFPReg xregs[NUM_X_FPREGS];
82
};
83
84
namespace MIPSComp {
85
struct JitOptions;
86
struct JitState;
87
}
88
89
enum {
90
MAP_DIRTY = 1,
91
MAP_NOINIT = 2 | MAP_DIRTY,
92
// Only for MapRegsV, MapRegsVS.
93
MAP_NOLOCK = 4,
94
};
95
96
// The PSP has 160 FP registers: 32 FPRs + 128 VFPU registers.
97
// Soon we will support them all.
98
99
class FPURegCache
100
{
101
public:
102
FPURegCache();
103
~FPURegCache() {}
104
105
void Start(MIPSState *mipsState, MIPSComp::JitState *js, MIPSComp::JitOptions *jo, MIPSAnalyst::AnalysisResults &stats, bool useRip);
106
void MapReg(int preg, bool doLoad = true, bool makeDirty = true);
107
void StoreFromRegister(int preg);
108
void StoreFromRegisterV(int preg) {
109
StoreFromRegister(preg + 32);
110
}
111
Gen::OpArg GetDefaultLocation(int reg) const;
112
void DiscardR(int freg);
113
void DiscardV(int vreg) {
114
DiscardR(vreg + 32);
115
}
116
void DiscardVS(int vreg);
117
bool IsTempX(Gen::X64Reg xreg);
118
int GetTempR();
119
int GetTempV() {
120
return GetTempR() - 32;
121
}
122
int GetTempVS(u8 *v, VectorSize vsz);
123
124
void SetEmitter(Gen::XEmitter *emitter) {emit = emitter;}
125
126
// Flushes one register and reuses the register for another one. Dirtyness is implied.
127
void FlushRemap(int oldreg, int newreg);
128
129
void Flush();
130
int SanityCheck() const;
131
132
const Gen::OpArg &R(int freg) const {return regs[freg].location;}
133
const Gen::OpArg &V(int vreg) const {
134
_dbg_assert_msg_(vregs[vreg].lane == 0, "SIMD reg %d used as V reg (use VS instead). pc=%08x", vreg, mips_->pc);
135
return vregs[vreg].location;
136
}
137
const Gen::OpArg &VS(const u8 *vs) const {
138
_dbg_assert_msg_(vregs[vs[0]].lane != 0, "V reg %d used as VS reg (use V instead). pc=%08x", vs[0], mips_->pc);
139
return vregs[vs[0]].location;
140
}
141
142
Gen::X64Reg RX(int freg) const {
143
if (regs[freg].away && regs[freg].location.IsSimpleReg())
144
return regs[freg].location.GetSimpleReg();
145
_assert_msg_(false, "Not so simple - f%i", freg);
146
return (Gen::X64Reg)-1;
147
}
148
149
Gen::X64Reg VX(int vreg) const {
150
_dbg_assert_msg_(vregs[vreg].lane == 0, "SIMD reg %d used as V reg (use VSX instead). pc=%08x", vreg, mips_->pc);
151
if (vregs[vreg].away && vregs[vreg].location.IsSimpleReg())
152
return vregs[vreg].location.GetSimpleReg();
153
_assert_msg_(false, "Not so simple - v%i", vreg);
154
return (Gen::X64Reg)-1;
155
}
156
157
Gen::X64Reg VSX(const u8 *vs) const {
158
_dbg_assert_msg_(vregs[vs[0]].lane != 0, "V reg %d used as VS reg (use VX instead). pc=%08x", vs[0], mips_->pc);
159
if (vregs[vs[0]].away && vregs[vs[0]].location.IsSimpleReg())
160
return vregs[vs[0]].location.GetSimpleReg();
161
_assert_msg_(false, "Not so simple - v%i", vs[0]);
162
return (Gen::X64Reg)-1;
163
}
164
165
// Just to avoid coding mistakes, defined here to prevent compilation.
166
void R(Gen::X64Reg r);
167
168
// Register locking. Prevents them from being spilled.
169
void SpillLock(int p1, int p2=0xff, int p3=0xff, int p4=0xff);
170
void ReleaseSpillLock(int mipsreg);
171
void ReleaseSpillLocks();
172
173
bool IsMapped(int r) {
174
return R(r).IsSimpleReg();
175
}
176
bool IsMappedV(int v) {
177
return vregs[v].lane == 0 && V(v).IsSimpleReg();
178
}
179
bool IsMappedVS(u8 v) {
180
return vregs[v].lane != 0 && VS(&v).IsSimpleReg();
181
}
182
bool IsMappedVS(const u8 *v, VectorSize vsz);
183
bool CanMapVS(const u8 *v, VectorSize vsz);
184
185
void MapRegV(int vreg, int flags);
186
void MapRegsV(int vec, VectorSize vsz, int flags);
187
void MapRegsV(const u8 *v, VectorSize vsz, int flags);
188
void SpillLockV(int vreg) {
189
SpillLock(vreg + 32);
190
}
191
void SpillLockV(const u8 *v, VectorSize vsz);
192
void SpillLockV(int vec, VectorSize vsz);
193
void ReleaseSpillLockV(int vreg) {
194
ReleaseSpillLock(vreg + 32);
195
}
196
void ReleaseSpillLockV(const u8 *vec, VectorSize sz);
197
198
// TODO: This may trash XMM0/XMM1 some day.
199
void MapRegsVS(const u8 *v, VectorSize vsz, int flags);
200
bool TryMapRegsVS(const u8 *v, VectorSize vsz, int flags);
201
bool TryMapDirtyInVS(const u8 *vd, VectorSize vdsz, const u8 *vs, VectorSize vssz, bool avoidLoad = true);
202
bool TryMapDirtyInInVS(const u8 *vd, VectorSize vdsz, const u8 *vs, VectorSize vssz, const u8 *vt, VectorSize vtsz, bool avoidLoad = true);
203
// TODO: If s/t overlap differently, need read-only copies? Maybe finalize d? Major design flaw...
204
// TODO: Matrix versions? Cols/Rows?
205
// No MapRegVS, that'd be silly.
206
207
void SimpleRegsV(const u8 *v, VectorSize vsz, int flags);
208
void SimpleRegsV(const u8 *v, MatrixSize msz, int flags);
209
void SimpleRegV(const u8 v, int flags);
210
211
void GetState(FPURegCacheState &state) const;
212
void RestoreState(const FPURegCacheState& state);
213
214
MIPSState *mips_ = nullptr;
215
216
void FlushX(Gen::X64Reg reg);
217
Gen::X64Reg GetFreeXReg();
218
int GetFreeXRegs(Gen::X64Reg *regs, int n, bool spill = true);
219
220
void Invariant() const;
221
222
private:
223
const int *GetAllocationOrder(int &count);
224
void SetupInitialRegs();
225
226
// These are intentionally not public so the interface is "locked" or "unlocked", no levels.
227
void ReduceSpillLock(int mreg);
228
void ReduceSpillLockV(int vreg) {
229
ReduceSpillLock(vreg + 32);
230
}
231
void ReduceSpillLockV(const u8 *vec, VectorSize sz);
232
233
Gen::X64Reg LoadRegsVS(const u8 *v, int n);
234
235
MIPSCachedFPReg regs[NUM_MIPS_FPRS]{};
236
X64CachedFPReg xregs[NUM_X_FPREGS]{};
237
MIPSCachedFPReg *vregs;
238
239
bool useRip_;
240
bool pendingFlush;
241
bool initialReady = false;
242
MIPSCachedFPReg regsInitial[NUM_MIPS_FPRS];
243
X64CachedFPReg xregsInitial[NUM_X_FPREGS];
244
245
Gen::XEmitter *emit = nullptr;
246
MIPSComp::JitState *js_;
247
MIPSComp::JitOptions *jo_;
248
};
249
250