CoCalc -- ArmRegCacheFPU.h

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Core/MIPS/ARM/ArmRegCacheFPU.h
Views: ¹⁴⁰¹
1
// Copyright (c) 2012- PPSSPP Project.
2

3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6

7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
10
// GNU General Public License 2.0 for more details.
11

12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14

15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17

18
#pragma once
19

20
#pragma once
21

22
#include "Core/MIPS/MIPS.h"
23
#include "Core/MIPS/ARM/ArmRegCache.h"
24
#include "Core/MIPS/MIPSVFPUUtils.h"
25
#include "Common/ArmEmitter.h"
26

27
namespace ArmJitConstants {
28

29
enum {
30
	NUM_TEMPS = 16,
31
	TEMP0 = 32 + 128,
32
	TOTAL_MAPPABLE_MIPSFPUREGS = 32 + 128 + NUM_TEMPS,
33
};
34

35
enum {
36
	MAP_READ = 0,
37
	MAP_MTX_TRANSPOSED = 16,
38
	MAP_PREFER_LOW = 16,
39
	MAP_PREFER_HIGH = 32,
40

41
	// Force is not yet correctly implemented, if the reg is already mapped it will not move
42
	MAP_FORCE_LOW = 64,  // Only map Q0-Q7  (and probably not Q0-Q3 as they are S registers so that leaves Q8-Q15)
43
	MAP_FORCE_HIGH = 128,  // Only map Q8-Q15
44
};
45

46
}
47

48
namespace MIPSAnalyst {
49
struct AnalysisResults;
50
};
51

52
struct FPURegARM {
53
	int mipsReg;  // if -1, no mipsreg attached.
54
	bool isDirty;  // Should the register be written back?
55
};
56

57
struct FPURegQuad {
58
	int mipsVec;
59
	VectorSize sz;
60
	u8 vregs[4];
61
	bool isDirty;
62
	bool spillLock;
63
	bool isTemp;
64
};
65

66
struct FPURegMIPS {
67
	// Where is this MIPS register?
68
	ArmJitConstants::RegMIPSLoc loc;
69
	// Data (only one of these is used, depending on loc. Could make a union).
70
	u32 reg;
71
	int lane;
72

73
	bool spillLock;  // if true, this register cannot be spilled.
74
	bool tempLock;
75
	// If loc == ML_MEM, it's back in its location in the CPU context struct.
76
};
77

78
namespace MIPSComp {
79
	struct JitOptions;
80
	struct JitState;
81
}
82

83
class ArmRegCacheFPU {
84
public:
85
	ArmRegCacheFPU(MIPSState *mipsState, MIPSComp::JitState *js, MIPSComp::JitOptions *jo);
86
	~ArmRegCacheFPU() {}
87

88
	void Init(ArmGen::ARMXEmitter *emitter);
89

90
	void Start(MIPSAnalyst::AnalysisResults &stats);
91

92
	// Protect the arm register containing a MIPS register from spilling, to ensure that
93
	// it's being kept allocated.
94
	void SpillLock(MIPSReg reg, MIPSReg reg2 = -1, MIPSReg reg3 = -1, MIPSReg reg4 = -1);
95
	void SpillLockV(MIPSReg r) { SpillLock(r + 32); }
96

97
	void ReleaseSpillLocksAndDiscardTemps();
98
	void ReleaseSpillLock(int mipsreg) {
99
		mr[mipsreg].spillLock = false;
100
	}
101
	void ReleaseSpillLockV(int mipsreg) {
102
		ReleaseSpillLock(mipsreg + 32);
103
	}
104

105
	void SetImm(MIPSReg reg, u32 immVal);
106
	bool IsImm(MIPSReg reg) const;
107
	u32 GetImm(MIPSReg reg) const;
108

109
	// Returns an ARM register containing the requested MIPS register.
110
	ArmGen::ARMReg MapReg(MIPSReg reg, int mapFlags = 0);
111
	void MapInIn(MIPSReg rd, MIPSReg rs);
112
	void MapDirty(MIPSReg rd);
113
	void MapDirtyIn(MIPSReg rd, MIPSReg rs, bool avoidLoad = true);
114
	void MapDirtyInIn(MIPSReg rd, MIPSReg rs, MIPSReg rt, bool avoidLoad = true);
115
	bool IsMapped(MIPSReg r);
116
	void FlushArmReg(ArmGen::ARMReg r);
117
	void FlushR(MIPSReg r);
118
	void DiscardR(MIPSReg r);
119
	ArmGen::ARMReg R(int preg); // Returns a cached register
120

121
	// VFPU register as single ARM VFP registers. Must not be used in the upcoming NEON mode!
122
	void MapRegV(int vreg, int flags = 0);
123
	void LoadToRegV(ArmGen::ARMReg armReg, int vreg);
124
	void MapInInV(int rt, int rs);
125
	void MapDirtyInV(int rd, int rs, bool avoidLoad = true);
126
	void MapDirtyInInV(int rd, int rs, int rt, bool avoidLoad = true);
127

128
	bool IsTempX(ArmGen::ARMReg r) const;
129
	MIPSReg GetTempV() { return GetTempR() - 32; }
130
	// VFPU registers as single VFP registers.
131
	ArmGen::ARMReg V(int vreg) { return R(vreg + 32); }
132
	 
133
	int FlushGetSequential(int a);
134
	void FlushAll();
135

136
	// This one is allowed at any point.
137
	void FlushV(MIPSReg r);
138

139
	// VFPU registers mapped to match NEON quads (and doubles, for pairs and singles)
140
	// Here we return the ARM register directly instead of providing a "V" accessor
141
	// and so on. Might switch to this model for the other regallocs later.
142

143
	// Quad mapping does NOT look into the ar array. Instead we use the qr array to keep
144
	// track of what's in each quad.
145

146
	// Note that we automatically spill-lock EVERY Q REGISTER we map, unlike other types.
147
	// Need to explicitly allow spilling to get spilling.
148
	ArmGen::ARMReg QMapReg(int vreg, VectorSize sz, int flags);
149

150
	// TODO
151
	// Maps a matrix as a set of columns (yes, even transposed ones, always columns
152
	// as those are faster to load/flush). When possible it will map into consecutive
153
	// quad registers, enabling blazing-fast full-matrix loads, transposed or not.
154
	void QMapMatrix(ArmGen::ARMReg *regs, int matrix, MatrixSize mz, int flags);
155

156
	ArmGen::ARMReg QAllocTemp(VectorSize sz);
157
	
158
	void QAllowSpill(int quad);
159
	void QFlush(int quad);
160
	void QLoad4x4(MIPSGPReg regPtr, int vquads[4]);
161
	//void FlushQWithV(MIPSReg r);
162

163
	// NOTE: These require you to release spill locks manually!
164
	void MapRegsAndSpillLockV(int vec, VectorSize vsz, int flags);
165
	void MapRegsAndSpillLockV(const u8 *v, VectorSize vsz, int flags);
166

167
	void SpillLockV(const u8 *v, VectorSize vsz);
168
	void SpillLockV(int vec, VectorSize vsz);
169

170
	void SetEmitter(ArmGen::ARMXEmitter *emitter) { emit_ = emitter; }
171

172
	int GetMipsRegOffset(MIPSReg r);
173

174
private:
175
	bool Consecutive(int v1, int v2) const;
176
	bool Consecutive(int v1, int v2, int v3) const;
177
	bool Consecutive(int v1, int v2, int v3, int v4) const;
178

179
	MIPSReg GetTempR();
180
	const ArmGen::ARMReg *GetMIPSAllocationOrder(int &count);
181
	int GetMipsRegOffsetV(MIPSReg r) {
182
		return GetMipsRegOffset(r + 32);
183
	}
184
	// This one WILL get a free quad as long as you haven't spill-locked them all.
185
	int QGetFreeQuad(int start, int count, const char *reason);
186

187
	void SetupInitialRegs();
188

189
	MIPSState *mips_;
190
	ArmGen::ARMXEmitter *emit_;
191
	MIPSComp::JitState *js_;
192
	MIPSComp::JitOptions *jo_;
193

194
	int qTime_;
195

196
	enum {
197
		// With NEON, we have 64 S = 32 D = 16 Q registers. Only the first 32 S registers
198
		// are individually mappable though.
199
		NUM_ARMFPUREG = 32,
200
		NUM_ARMQUADS = 16,
201
		NUM_MIPSFPUREG = ArmJitConstants::TOTAL_MAPPABLE_MIPSFPUREGS,
202
	};
203

204
	FPURegARM ar[NUM_ARMFPUREG];
205
	FPURegMIPS mr[NUM_MIPSFPUREG];
206
	FPURegQuad qr[NUM_ARMQUADS];
207
	FPURegMIPS *vr;
208

209
	bool pendingFlush;
210
	bool initialReady = false;
211
	FPURegARM arInitial[NUM_ARMFPUREG];
212
	FPURegMIPS mrInitial[NUM_MIPSFPUREG];
213
};
214

215
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

Product

Resources

Company