CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GPUState.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#include "Common/Common.h"
20
#include "Common/Serialize/Serializer.h"
21
#include "Common/Serialize/SerializeFuncs.h"
22
#include "Core/CoreParameter.h"
23
#include "Core/Config.h"
24
#include "Core/System.h"
25
#include "Core/MemMap.h"
26
#include "GPU/ge_constants.h"
27
#include "GPU/GPUInterface.h"
28
#include "GPU/GPUState.h"
29
30
#ifdef _M_SSE
31
#include <emmintrin.h>
32
#endif
33
#if PPSSPP_ARCH(ARM_NEON)
34
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
35
#include <arm64_neon.h>
36
#else
37
#include <arm_neon.h>
38
#endif
39
#endif
40
41
// This must be aligned so that the matrices within are aligned.
42
alignas(16) GPUgstate gstate;
43
// Let's align this one too for good measure.
44
alignas(16) GPUStateCache gstate_c;
45
46
// For save state compatibility.
47
static int savedContextVersion = 1;
48
49
struct CmdRange {
50
u8 start;
51
u8 end;
52
};
53
54
static const CmdRange contextCmdRanges[] = {
55
{0x00, 0x02},
56
// Skip: {0x03, 0x0F},
57
{0x10, 0x10},
58
// Skip: {0x11, 0x11},
59
{0x12, 0x28},
60
// Skip: {0x29, 0x2B},
61
{0x2c, 0x33},
62
// Skip: {0x34, 0x35},
63
{0x36, 0x38},
64
// Skip: {0x39, 0x41},
65
{0x42, 0x4D},
66
// Skip: {0x4E, 0x4F},
67
{0x50, 0x51},
68
// Skip: {0x52, 0x52},
69
{0x53, 0x58},
70
// Skip: {0x59, 0x5A},
71
{0x5B, 0xB5},
72
// Skip: {0xB6, 0xB7},
73
{0xB8, 0xC3},
74
// Skip: {0xC4, 0xC4},
75
{0xC5, 0xD0},
76
// Skip: {0xD1, 0xD1}
77
{0xD2, 0xE9},
78
// Skip: {0xEA, 0xEA},
79
{0xEB, 0xEC},
80
// Skip: {0xED, 0xED},
81
{0xEE, 0xEE},
82
// Skip: {0xEF, 0xEF},
83
{0xF0, 0xF6},
84
// Skip: {0xF7, 0xF7},
85
{0xF8, 0xF9},
86
// Skip: {0xFA, 0xFF},
87
};
88
89
static u32_le *SaveMatrix(u32_le *cmds, GEMatrixType type, int sz, int numcmd, int datacmd) {
90
if (!gpu)
91
return cmds;
92
93
*cmds++ = numcmd << 24;
94
// This saves the CPU-visible values, not the actual used ones, which may differ.
95
// Note that Restore overwrites both values.
96
if (type == GE_MTX_BONE0) {
97
for (int i = 0; i < 8; ++i)
98
gpu->GetMatrix24(GEMatrixType(GE_MTX_BONE0 + i), cmds + i * 12, datacmd << 24);
99
} else {
100
gpu->GetMatrix24(type, cmds, datacmd << 24);
101
}
102
cmds += sz;
103
104
return cmds;
105
}
106
107
static const u32_le *LoadMatrix(const u32_le *cmds, float *mtx, int sz) {
108
// Skip the reset.
109
cmds++;
110
for (int i = 0; i < sz; ++i) {
111
mtx[i] = getFloat24(*cmds++);
112
}
113
114
return cmds;
115
}
116
117
void GPUgstate::Reset() {
118
memset(gstate.cmdmem, 0, sizeof(gstate.cmdmem));
119
for (int i = 0; i < 256; i++) {
120
gstate.cmdmem[i] = i << 24;
121
}
122
123
// Lighting is not enabled by default, matrices are zero initialized.
124
memset(gstate.worldMatrix, 0, sizeof(gstate.worldMatrix));
125
memset(gstate.viewMatrix, 0, sizeof(gstate.viewMatrix));
126
memset(gstate.projMatrix, 0, sizeof(gstate.projMatrix));
127
memset(gstate.tgenMatrix, 0, sizeof(gstate.tgenMatrix));
128
memset(gstate.boneMatrix, 0, sizeof(gstate.boneMatrix));
129
130
savedContextVersion = 1;
131
132
gstate_c.Dirty(DIRTY_CULL_PLANES);
133
}
134
135
void GPUgstate::Save(u32_le *ptr) {
136
// Not sure what the first 10 values are, exactly, but these seem right.
137
ptr[5] = gstate_c.vertexAddr;
138
ptr[6] = gstate_c.indexAddr;
139
ptr[7] = gstate_c.offsetAddr;
140
141
// Command values start 17 ints in.
142
u32_le *cmds = ptr + 17;
143
for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) {
144
for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) {
145
// We'll run ReapplyGfxState after this to process dirtying.
146
*cmds++ = cmdmem[n];
147
}
148
}
149
150
if (savedContextVersion == 0) {
151
if (Memory::IsValidAddress(getClutAddress()))
152
*cmds++ = loadclut;
153
154
// Seems like it actually writes commands to load the matrices and then reset the counts.
155
*cmds++ = boneMatrixNumber;
156
*cmds++ = worldmtxnum;
157
*cmds++ = viewmtxnum;
158
*cmds++ = projmtxnum;
159
*cmds++ = texmtxnum;
160
161
u8 *matrices = (u8 *)cmds;
162
memcpy(matrices, boneMatrix, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
163
memcpy(matrices, worldMatrix, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
164
memcpy(matrices, viewMatrix, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
165
memcpy(matrices, projMatrix, sizeof(projMatrix)); matrices += sizeof(projMatrix);
166
memcpy(matrices, tgenMatrix, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
167
} else {
168
cmds = SaveMatrix(cmds, GE_MTX_BONE0, ARRAY_SIZE(boneMatrix), GE_CMD_BONEMATRIXNUMBER, GE_CMD_BONEMATRIXDATA);
169
cmds = SaveMatrix(cmds, GE_MTX_WORLD, ARRAY_SIZE(worldMatrix), GE_CMD_WORLDMATRIXNUMBER, GE_CMD_WORLDMATRIXDATA);
170
cmds = SaveMatrix(cmds, GE_MTX_VIEW, ARRAY_SIZE(viewMatrix), GE_CMD_VIEWMATRIXNUMBER, GE_CMD_VIEWMATRIXDATA);
171
cmds = SaveMatrix(cmds, GE_MTX_PROJECTION, ARRAY_SIZE(projMatrix), GE_CMD_PROJMATRIXNUMBER, GE_CMD_PROJMATRIXDATA);
172
cmds = SaveMatrix(cmds, GE_MTX_TEXGEN, ARRAY_SIZE(tgenMatrix), GE_CMD_TGENMATRIXNUMBER, GE_CMD_TGENMATRIXDATA);
173
174
*cmds++ = boneMatrixNumber & 0xFF00007F;
175
*cmds++ = worldmtxnum & 0xFF00000F;
176
*cmds++ = viewmtxnum & 0xFF00000F;
177
*cmds++ = projmtxnum & 0xFF00000F;
178
*cmds++ = texmtxnum & 0xFF00000F;
179
*cmds++ = GE_CMD_END << 24;
180
}
181
}
182
183
void GPUgstate::FastLoadBoneMatrix(u32 addr) {
184
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(addr);
185
u32 num = boneMatrixNumber;
186
u32 *dst = (u32 *)(boneMatrix + (num & 0x7F));
187
188
#ifdef _M_SSE
189
__m128i row1 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)src), 8);
190
__m128i row2 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)(src + 4)), 8);
191
__m128i row3 = _mm_slli_epi32(_mm_loadu_si128((const __m128i *)(src + 8)), 8);
192
if ((num & 0x3) == 0) {
193
_mm_store_si128((__m128i *)dst, row1);
194
_mm_store_si128((__m128i *)(dst + 4), row2);
195
_mm_store_si128((__m128i *)(dst + 8), row3);
196
} else {
197
_mm_storeu_si128((__m128i *)dst, row1);
198
_mm_storeu_si128((__m128i *)(dst + 4), row2);
199
_mm_storeu_si128((__m128i *)(dst + 8), row3);
200
}
201
#elif PPSSPP_ARCH(ARM_NEON)
202
const uint32x4_t row1 = vshlq_n_u32(vld1q_u32(src), 8);
203
const uint32x4_t row2 = vshlq_n_u32(vld1q_u32(src + 4), 8);
204
const uint32x4_t row3 = vshlq_n_u32(vld1q_u32(src + 8), 8);
205
vst1q_u32(dst, row1);
206
vst1q_u32(dst + 4, row2);
207
vst1q_u32(dst + 8, row3);
208
#else
209
for (int i = 0; i < 12; i++) {
210
dst[i] = src[i] << 8;
211
}
212
#endif
213
214
num += 12;
215
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
216
}
217
218
void GPUgstate::Restore(const u32_le *ptr) {
219
// Not sure what the first 10 values are, exactly, but these seem right.
220
gstate_c.vertexAddr = ptr[5];
221
gstate_c.indexAddr = ptr[6];
222
gstate_c.offsetAddr = ptr[7];
223
224
// Command values start 17 ints in.
225
const u32_le *cmds = ptr + 17;
226
for (size_t i = 0; i < ARRAY_SIZE(contextCmdRanges); ++i) {
227
for (int n = contextCmdRanges[i].start; n <= contextCmdRanges[i].end; ++n) {
228
cmdmem[n] = *cmds++;
229
}
230
}
231
232
if (savedContextVersion == 0) {
233
if (Memory::IsValidAddress(getClutAddress()))
234
loadclut = *cmds++;
235
boneMatrixNumber = *cmds++;
236
worldmtxnum = *cmds++;
237
viewmtxnum = *cmds++;
238
projmtxnum = *cmds++;
239
texmtxnum = *cmds++;
240
241
u8 *matrices = (u8 *)cmds;
242
memcpy(boneMatrix, matrices, sizeof(boneMatrix)); matrices += sizeof(boneMatrix);
243
memcpy(worldMatrix, matrices, sizeof(worldMatrix)); matrices += sizeof(worldMatrix);
244
memcpy(viewMatrix, matrices, sizeof(viewMatrix)); matrices += sizeof(viewMatrix);
245
memcpy(projMatrix, matrices, sizeof(projMatrix)); matrices += sizeof(projMatrix);
246
memcpy(tgenMatrix, matrices, sizeof(tgenMatrix)); matrices += sizeof(tgenMatrix);
247
} else {
248
cmds = LoadMatrix(cmds, boneMatrix, ARRAY_SIZE(boneMatrix));
249
cmds = LoadMatrix(cmds, worldMatrix, ARRAY_SIZE(worldMatrix));
250
cmds = LoadMatrix(cmds, viewMatrix, ARRAY_SIZE(viewMatrix));
251
cmds = LoadMatrix(cmds, projMatrix, ARRAY_SIZE(projMatrix));
252
cmds = LoadMatrix(cmds, tgenMatrix, ARRAY_SIZE(tgenMatrix));
253
254
boneMatrixNumber = (*cmds++) & 0xFF00007F;
255
worldmtxnum = (*cmds++) & 0xFF00000F;
256
viewmtxnum = (*cmds++) & 0xFF00000F;
257
projmtxnum = (*cmds++) & 0xFF00000F;
258
texmtxnum = (*cmds++) & 0xFF00000F;
259
}
260
261
if (gpu)
262
gpu->ResetMatrices();
263
264
gstate_c.Dirty(DIRTY_CULL_PLANES);
265
}
266
267
bool vertTypeIsSkinningEnabled(u32 vertType) {
268
return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE);
269
}
270
271
struct GPUStateCache_v0 {
272
u32 vertexAddr;
273
u32 indexAddr;
274
275
u32 offsetAddr;
276
277
bool textureChanged;
278
bool textureFullAlpha;
279
bool vertexFullAlpha;
280
bool framebufChanged;
281
282
int skipDrawReason;
283
284
UVScale uv;
285
bool flipTexture;
286
};
287
288
void GPUStateCache::Reset() {
289
memset(&gstate_c, 0, sizeof(gstate_c));
290
}
291
292
void GPUStateCache::DoState(PointerWrap &p) {
293
auto s = p.Section("GPUStateCache", 0, 5);
294
if (!s) {
295
// Old state, this was not versioned.
296
GPUStateCache_v0 old;
297
Do(p, old);
298
299
vertexAddr = old.vertexAddr;
300
indexAddr = old.indexAddr;
301
offsetAddr = old.offsetAddr;
302
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
303
textureFullAlpha = old.textureFullAlpha;
304
vertexFullAlpha = old.vertexFullAlpha;
305
skipDrawReason = old.skipDrawReason;
306
uv = old.uv;
307
308
savedContextVersion = 0;
309
} else {
310
Do(p, vertexAddr);
311
Do(p, indexAddr);
312
Do(p, offsetAddr);
313
314
uint8_t textureChanged = 0;
315
Do(p, textureChanged); // legacy
316
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
317
Do(p, textureFullAlpha);
318
Do(p, vertexFullAlpha);
319
bool framebufChanged = false; // legacy
320
Do(p, framebufChanged);
321
322
Do(p, skipDrawReason);
323
324
Do(p, uv);
325
326
bool oldFlipTexture = false;
327
Do(p, oldFlipTexture); // legacy
328
}
329
330
// needShaderTexClamp and bgraTexture don't need to be saved.
331
332
if (s >= 3) {
333
bool oldTextureSimpleAlpha = false;
334
Do(p, oldTextureSimpleAlpha);
335
}
336
337
if (s < 2) {
338
float l12[12];
339
float l4[4];
340
Do(p, l12); // lightpos
341
Do(p, l12); // lightdir
342
Do(p, l12); // lightattr
343
Do(p, l12); // lightcol0
344
Do(p, l12); // lightcol1
345
Do(p, l12); // lightcol2
346
Do(p, l4); // lightangle
347
Do(p, l4); // lightspot
348
}
349
350
Do(p, morphWeights);
351
352
Do(p, curTextureWidth);
353
Do(p, curTextureHeight);
354
Do(p, actualTextureHeight);
355
// curTextureXOffset and curTextureYOffset don't need to be saved. Well, the above don't either...
356
357
Do(p, vpWidth);
358
Do(p, vpHeight);
359
if (s == 4) {
360
float oldDepth = 1.0f;
361
Do(p, oldDepth);
362
}
363
364
Do(p, curRTWidth);
365
Do(p, curRTHeight);
366
367
// curRTBufferWidth, curRTBufferHeight, and cutRTOffsetX don't need to be saved.
368
if (s < 5) {
369
savedContextVersion = 0;
370
} else {
371
Do(p, savedContextVersion);
372
}
373
374
if (p.GetMode() == PointerWrap::MODE_READ)
375
gstate_c.Dirty(DIRTY_CULL_PLANES);
376
}
377
378
static const char *const gpuUseFlagNames[32] = {
379
"GPU_USE_DUALSOURCE_BLEND",
380
"GPU_USE_LIGHT_UBERSHADER",
381
"GPU_USE_FRAGMENT_TEST_CACHE",
382
"GPU_USE_VS_RANGE_CULLING",
383
"GPU_USE_BLEND_MINMAX",
384
"GPU_USE_LOGIC_OP",
385
"GPU_USE_FRAGMENT_UBERSHADER",
386
"GPU_USE_TEXTURE_NPOT",
387
"GPU_USE_ANISOTROPY",
388
"GPU_USE_CLEAR_RAM_HACK",
389
"GPU_USE_INSTANCE_RENDERING",
390
"GPU_USE_VERTEX_TEXTURE_FETCH",
391
"GPU_USE_TEXTURE_FLOAT",
392
"GPU_USE_16BIT_FORMATS",
393
"GPU_USE_DEPTH_CLAMP",
394
"GPU_USE_TEXTURE_LOD_CONTROL",
395
"GPU_USE_DEPTH_TEXTURE",
396
"GPU_USE_ACCURATE_DEPTH",
397
"GPU_USE_GS_CULLING",
398
"N/A",
399
"GPU_USE_FRAMEBUFFER_FETCH",
400
"GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT",
401
"GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT",
402
"GPU_ROUND_DEPTH_TO_16BIT",
403
"GPU_USE_CLIP_DISTANCE",
404
"GPU_USE_CULL_DISTANCE",
405
"N/A", // bit 26
406
"N/A", // bit 27
407
"N/A", // bit 28
408
"GPU_USE_VIRTUAL_REALITY",
409
"GPU_USE_SINGLE_PASS_STEREO",
410
"GPU_USE_SIMPLE_STEREO_PERSPECTIVE",
411
};
412
413
const char *GpuUseFlagToString(int useFlag) {
414
if ((u32)useFlag < 32) {
415
return gpuUseFlagNames[useFlag];
416
} else {
417
return "N/A";
418
}
419
}
420
421