Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GPUCommonHW.cpp
5673 views
1
#include "Common/Profiler/Profiler.h"
2
3
#include "Common/GPU/thin3d.h"
4
#include "Common/Serialize/Serializer.h"
5
#include "Common/System/System.h"
6
7
#include "Core/System.h"
8
#include "Core/Config.h"
9
#include "Core/Util/PPGeDraw.h"
10
11
#include "GPU/GPUCommonHW.h"
12
#include "GPU/Common/SplineCommon.h"
13
#include "GPU/Common/DrawEngineCommon.h"
14
#include "GPU/Common/TextureCacheCommon.h"
15
#include "GPU/Common/FramebufferManagerCommon.h"
16
17
struct CommonCommandTableEntry {
18
uint8_t cmd;
19
uint8_t flags;
20
uint64_t dirty;
21
GPUCommonHW::CmdFunc func;
22
};
23
24
struct CommandInfo {
25
uint64_t flags;
26
GPUCommonHW::CmdFunc func;
27
28
// Dirty flags are mashed into the regular flags by a left shift of 8.
29
void AddDirty(u64 dirty) {
30
flags |= dirty << 8;
31
}
32
void RemoveDirty(u64 dirty) {
33
flags &= ~(dirty << 8);
34
}
35
};
36
37
static CommandInfo cmdInfo_[256];
38
39
const CommonCommandTableEntry commonCommandTable[] = {
40
// From Common. No flushing but definitely need execute.
41
{ GE_CMD_OFFSETADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_OffsetAddr },
42
{ GE_CMD_ORIGIN, FLAG_EXECUTE | FLAG_READS_PC, 0, &GPUCommon::Execute_Origin },
43
{ GE_CMD_JUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_Jump },
44
{ GE_CMD_CALL, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_Call },
45
{ GE_CMD_RET, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_Ret },
46
{ GE_CMD_END, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_End },
47
{ GE_CMD_VADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_Vaddr },
48
{ GE_CMD_IADDR, FLAG_EXECUTE, 0, &GPUCommon::Execute_Iaddr },
49
{ GE_CMD_BJUMP, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommon::Execute_BJump }, // EXECUTE
50
{ GE_CMD_BOUNDINGBOX, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_BoundingBox }, // Shouldn't need to FLUSHBEFORE.
51
52
{ GE_CMD_PRIM, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Prim },
53
{ GE_CMD_BEZIER, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Bezier },
54
{ GE_CMD_SPLINE, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_Spline },
55
56
// Changing the vertex type requires us to flush.
57
{ GE_CMD_VERTEXTYPE, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommonHW::Execute_VertexType },
58
59
{ GE_CMD_LOADCLUT, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommonHW::Execute_LoadClut},
60
61
// These two are actually processed in CMD_END.
62
{ GE_CMD_SIGNAL },
63
{ GE_CMD_FINISH },
64
65
// Changes that dirty the framebuffer
66
{ GE_CMD_FRAMEBUFPTR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS },
67
{ GE_CMD_FRAMEBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
68
{ GE_CMD_FRAMEBUFPIXFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
69
{ GE_CMD_ZBUFPTR, FLAG_FLUSHBEFOREONCHANGE },
70
{ GE_CMD_ZBUFWIDTH, FLAG_FLUSHBEFOREONCHANGE },
71
72
{ GE_CMD_FOGCOLOR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FOGCOLOR },
73
{ GE_CMD_FOG1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FOGCOEF },
74
{ GE_CMD_FOG2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FOGCOEF },
75
76
// These affect the fragment shader so need flushing.
77
{ GE_CMD_CLEARMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE },
78
{ GE_CMD_TEXTUREMAPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE },
79
{ GE_CMD_FOGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
80
{ GE_CMD_TEXMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE },
81
{ GE_CMD_TEXSHADELS, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
82
// Raster state for Direct3D 9, uncommon.
83
{ GE_CMD_SHADEMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_RASTER_STATE },
84
{ GE_CMD_TEXFUNC, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEX_ALPHA_MUL },
85
{ GE_CMD_COLORTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
86
{ GE_CMD_ALPHATESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
87
{ GE_CMD_COLORTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAGMENTSHADER_STATE },
88
{ GE_CMD_COLORTESTMASK, FLAG_FLUSHBEFOREONCHANGE, DIRTY_ALPHACOLORMASK | DIRTY_FRAGMENTSHADER_STATE },
89
90
// These change the vertex shader so need flushing.
91
{ GE_CMD_REVERSENORMAL, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
92
{ GE_CMD_LIGHTINGENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE },
93
{ GE_CMD_LIGHTENABLE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
94
{ GE_CMD_LIGHTENABLE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
95
{ GE_CMD_LIGHTENABLE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
96
{ GE_CMD_LIGHTENABLE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
97
{ GE_CMD_LIGHTTYPE0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT0 },
98
{ GE_CMD_LIGHTTYPE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT1 },
99
{ GE_CMD_LIGHTTYPE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT2 },
100
{ GE_CMD_LIGHTTYPE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_LIGHT3 },
101
{ GE_CMD_MATERIALUPDATE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
102
103
// These change all shaders so need flushing.
104
{ GE_CMD_LIGHTMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE },
105
106
{ GE_CMD_TEXFILTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
107
{ GE_CMD_TEXWRAP, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE },
108
109
// Uniform changes. though the fragmentshader optimizes based on these sometimes.
110
{ GE_CMD_ALPHATEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_ALPHACOLORREF | DIRTY_ALPHACOLORMASK | DIRTY_FRAGMENTSHADER_STATE },
111
{ GE_CMD_COLORREF, FLAG_FLUSHBEFOREONCHANGE, DIRTY_ALPHACOLORREF | DIRTY_FRAGMENTSHADER_STATE },
112
{ GE_CMD_TEXENVCOLOR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXENV },
113
114
// Simple render state changes. Handled in StateMapping.cpp.
115
{ GE_CMD_CULL, FLAG_FLUSHBEFOREONCHANGE, DIRTY_RASTER_STATE },
116
{ GE_CMD_CULLFACEENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_RASTER_STATE },
117
{ GE_CMD_DITHERENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_RASTER_STATE },
118
{ GE_CMD_STENCILOP, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
119
{ GE_CMD_STENCILTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_STENCILREPLACEVALUE | DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE },
120
{ GE_CMD_STENCILTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
121
{ GE_CMD_ALPHABLENDENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
122
{ GE_CMD_BLENDMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
123
{ GE_CMD_BLENDFIXEDA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
124
{ GE_CMD_BLENDFIXEDB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
125
{ GE_CMD_MASKRGB, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK },
126
{ GE_CMD_MASKALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_COLORWRITEMASK },
127
{ GE_CMD_ZTEST, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
128
{ GE_CMD_ZTESTENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
129
{ GE_CMD_ZWRITEDISABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHSTENCIL_STATE | DIRTY_FRAGMENTSHADER_STATE },
130
{ GE_CMD_LOGICOP, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
131
{ GE_CMD_LOGICOPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_BLEND_STATE | DIRTY_FRAGMENTSHADER_STATE },
132
133
{ GE_CMD_TEXMAPMODE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE },
134
135
// These are read on every SubmitPrim, no need for dirtying or flushing.
136
{ GE_CMD_TEXSCALEU },
137
{ GE_CMD_TEXSCALEV },
138
{ GE_CMD_TEXOFFSETU },
139
{ GE_CMD_TEXOFFSETV },
140
141
{ GE_CMD_TEXSIZE0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTE, 0, &GPUCommonHW::Execute_TexSize0 },
142
{ GE_CMD_TEXSIZE1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
143
{ GE_CMD_TEXSIZE2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
144
{ GE_CMD_TEXSIZE3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
145
{ GE_CMD_TEXSIZE4, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
146
{ GE_CMD_TEXSIZE5, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
147
{ GE_CMD_TEXSIZE6, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
148
{ GE_CMD_TEXSIZE7, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
149
{ GE_CMD_TEXFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
150
{ GE_CMD_TEXLEVEL, FLAG_EXECUTEONCHANGE, DIRTY_TEXTURE_PARAMS, &GPUCommonHW::Execute_TexLevel },
151
{ GE_CMD_TEXLODSLOPE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
152
{ GE_CMD_TEXADDR0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
153
{ GE_CMD_TEXADDR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
154
{ GE_CMD_TEXADDR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
155
{ GE_CMD_TEXADDR3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
156
{ GE_CMD_TEXADDR4, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
157
{ GE_CMD_TEXADDR5, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
158
{ GE_CMD_TEXADDR6, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
159
{ GE_CMD_TEXADDR7, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
160
{ GE_CMD_TEXBUFWIDTH0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_IMAGE },
161
{ GE_CMD_TEXBUFWIDTH1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
162
{ GE_CMD_TEXBUFWIDTH2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
163
{ GE_CMD_TEXBUFWIDTH3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
164
{ GE_CMD_TEXBUFWIDTH4, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
165
{ GE_CMD_TEXBUFWIDTH5, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
166
{ GE_CMD_TEXBUFWIDTH6, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
167
{ GE_CMD_TEXBUFWIDTH7, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS },
168
169
// These must flush on change, so that LoadClut doesn't have to always flush.
170
{ GE_CMD_CLUTADDR, FLAG_FLUSHBEFOREONCHANGE },
171
{ GE_CMD_CLUTADDRUPPER, FLAG_FLUSHBEFOREONCHANGE },
172
{ GE_CMD_CLUTFORMAT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_TEXTURE_PARAMS | DIRTY_DEPAL },
173
174
// Morph weights. TODO: Remove precomputation?
175
{ GE_CMD_MORPHWEIGHT0, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
176
{ GE_CMD_MORPHWEIGHT1, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
177
{ GE_CMD_MORPHWEIGHT2, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
178
{ GE_CMD_MORPHWEIGHT3, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
179
{ GE_CMD_MORPHWEIGHT4, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
180
{ GE_CMD_MORPHWEIGHT5, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
181
{ GE_CMD_MORPHWEIGHT6, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
182
{ GE_CMD_MORPHWEIGHT7, FLAG_FLUSHBEFOREONCHANGE | FLAG_EXECUTEONCHANGE, 0, &GPUCommon::Execute_MorphWeight },
183
184
// Control spline/bezier patches. Don't really require flushing as such, but meh.
185
{ GE_CMD_PATCHDIVISION, FLAG_FLUSHBEFOREONCHANGE },
186
{ GE_CMD_PATCHPRIMITIVE, FLAG_FLUSHBEFOREONCHANGE },
187
{ GE_CMD_PATCHFACING, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VERTEXSHADER_STATE },
188
{ GE_CMD_PATCHCULLENABLE, FLAG_FLUSHBEFOREONCHANGE },
189
190
// Can probably ignore this one as we don't support AA lines.
191
{ GE_CMD_ANTIALIASENABLE, FLAG_FLUSHBEFOREONCHANGE },
192
193
// Viewport.
194
{ GE_CMD_OFFSETX, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
195
{ GE_CMD_OFFSETY, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
196
{ GE_CMD_VIEWPORTXSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
197
{ GE_CMD_VIEWPORTYSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
198
{ GE_CMD_VIEWPORTXCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
199
{ GE_CMD_VIEWPORTYCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULL_PLANES },
200
{ GE_CMD_VIEWPORTZSCALE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
201
{ GE_CMD_VIEWPORTZCENTER, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_CULLRANGE | DIRTY_DEPTHRANGE | DIRTY_PROJMATRIX | DIRTY_VIEWPORTSCISSOR_STATE },
202
{ GE_CMD_DEPTHCLAMPENABLE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_RASTER_STATE },
203
204
// Z clip
205
{ GE_CMD_MINZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
206
{ GE_CMD_MAXZ, FLAG_FLUSHBEFOREONCHANGE, DIRTY_DEPTHRANGE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE },
207
208
// Region
209
{ GE_CMD_REGION1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
210
{ GE_CMD_REGION2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
211
212
// Scissor
213
{ GE_CMD_SCISSOR1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
214
{ GE_CMD_SCISSOR2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_FRAMEBUF | DIRTY_TEXTURE_PARAMS | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_CULLRANGE | DIRTY_CULL_PLANES },
215
216
// Lighting base colors
217
{ GE_CMD_AMBIENTCOLOR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_AMBIENT },
218
{ GE_CMD_AMBIENTALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_AMBIENT },
219
{ GE_CMD_MATERIALDIFFUSE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATDIFFUSE },
220
{ GE_CMD_MATERIALEMISSIVE, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATEMISSIVE },
221
{ GE_CMD_MATERIALAMBIENT, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATAMBIENTALPHA },
222
{ GE_CMD_MATERIALALPHA, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATAMBIENTALPHA },
223
{ GE_CMD_MATERIALSPECULAR, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATSPECULAR },
224
{ GE_CMD_MATERIALSPECULARCOEF, FLAG_FLUSHBEFOREONCHANGE, DIRTY_MATSPECULAR },
225
226
// Light parameters
227
{ GE_CMD_LX0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
228
{ GE_CMD_LY0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
229
{ GE_CMD_LZ0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
230
{ GE_CMD_LX1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
231
{ GE_CMD_LY1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
232
{ GE_CMD_LZ1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
233
{ GE_CMD_LX2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
234
{ GE_CMD_LY2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
235
{ GE_CMD_LZ2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
236
{ GE_CMD_LX3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
237
{ GE_CMD_LY3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
238
{ GE_CMD_LZ3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
239
240
{ GE_CMD_LDX0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
241
{ GE_CMD_LDY0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
242
{ GE_CMD_LDZ0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
243
{ GE_CMD_LDX1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
244
{ GE_CMD_LDY1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
245
{ GE_CMD_LDZ1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
246
{ GE_CMD_LDX2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
247
{ GE_CMD_LDY2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
248
{ GE_CMD_LDZ2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
249
{ GE_CMD_LDX3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
250
{ GE_CMD_LDY3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
251
{ GE_CMD_LDZ3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
252
253
{ GE_CMD_LKA0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
254
{ GE_CMD_LKB0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
255
{ GE_CMD_LKC0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
256
{ GE_CMD_LKA1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
257
{ GE_CMD_LKB1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
258
{ GE_CMD_LKC1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
259
{ GE_CMD_LKA2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
260
{ GE_CMD_LKB2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
261
{ GE_CMD_LKC2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
262
{ GE_CMD_LKA3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
263
{ GE_CMD_LKB3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
264
{ GE_CMD_LKC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
265
266
{ GE_CMD_LKS0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
267
{ GE_CMD_LKS1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
268
{ GE_CMD_LKS2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
269
{ GE_CMD_LKS3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
270
271
{ GE_CMD_LKO0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
272
{ GE_CMD_LKO1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
273
{ GE_CMD_LKO2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
274
{ GE_CMD_LKO3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
275
276
{ GE_CMD_LAC0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
277
{ GE_CMD_LDC0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
278
{ GE_CMD_LSC0, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT0 },
279
{ GE_CMD_LAC1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
280
{ GE_CMD_LDC1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
281
{ GE_CMD_LSC1, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT1 },
282
{ GE_CMD_LAC2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
283
{ GE_CMD_LDC2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
284
{ GE_CMD_LSC2, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT2 },
285
{ GE_CMD_LAC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
286
{ GE_CMD_LDC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
287
{ GE_CMD_LSC3, FLAG_FLUSHBEFOREONCHANGE, DIRTY_LIGHT3 },
288
289
// Ignored commands
290
{ GE_CMD_TEXFLUSH, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_TexFlush },
291
{ GE_CMD_TEXSYNC, 0 },
292
293
// These are just nop or part of other later commands.
294
{ GE_CMD_NOP, 0 },
295
{ GE_CMD_BASE, 0 },
296
{ GE_CMD_TRANSFERSRC, 0 },
297
{ GE_CMD_TRANSFERSRCW, 0 },
298
{ GE_CMD_TRANSFERDST, 0 },
299
{ GE_CMD_TRANSFERDSTW, 0 },
300
{ GE_CMD_TRANSFERSRCPOS, 0 },
301
{ GE_CMD_TRANSFERDSTPOS, 0 },
302
{ GE_CMD_TRANSFERSIZE, 0 },
303
{ GE_CMD_TRANSFERSTART, FLAG_EXECUTE | FLAG_READS_PC, 0, &GPUCommonHW::Execute_BlockTransferStart },
304
305
// We don't use the dither table.
306
{ GE_CMD_DITH0 },
307
{ GE_CMD_DITH1 },
308
{ GE_CMD_DITH2 },
309
{ GE_CMD_DITH3 },
310
311
// These handle their own flushing.
312
{ GE_CMD_WORLDMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_WorldMtxNum },
313
{ GE_CMD_WORLDMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_WorldMtxData },
314
{ GE_CMD_VIEWMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_ViewMtxNum },
315
{ GE_CMD_VIEWMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_ViewMtxData },
316
{ GE_CMD_PROJMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_ProjMtxNum },
317
{ GE_CMD_PROJMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_ProjMtxData },
318
{ GE_CMD_TGENMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_TgenMtxNum },
319
{ GE_CMD_TGENMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_TgenMtxData },
320
{ GE_CMD_BONEMATRIXNUMBER, FLAG_EXECUTE | FLAG_READS_PC | FLAG_WRITES_PC, 0, &GPUCommonHW::Execute_BoneMtxNum },
321
{ GE_CMD_BONEMATRIXDATA, FLAG_EXECUTE, 0, &GPUCommonHW::Execute_BoneMtxData },
322
323
// Vertex Screen/Texture/Color
324
{ GE_CMD_VSCX },
325
{ GE_CMD_VSCY },
326
{ GE_CMD_VSCZ },
327
{ GE_CMD_VTCS },
328
{ GE_CMD_VTCT },
329
{ GE_CMD_VTCQ },
330
{ GE_CMD_VCV },
331
{ GE_CMD_VAP, FLAG_EXECUTE, 0, &GPUCommon::Execute_ImmVertexAlphaPrim },
332
{ GE_CMD_VFC },
333
{ GE_CMD_VSCV },
334
335
// "Missing" commands (gaps in the sequence)
336
{ GE_CMD_UNKNOWN_03, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
337
{ GE_CMD_UNKNOWN_0D, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
338
{ GE_CMD_UNKNOWN_11, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
339
{ GE_CMD_UNKNOWN_29, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
340
{ GE_CMD_UNKNOWN_34, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
341
{ GE_CMD_UNKNOWN_35, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
342
{ GE_CMD_UNKNOWN_39, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
343
{ GE_CMD_UNKNOWN_4E, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
344
{ GE_CMD_UNKNOWN_4F, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
345
{ GE_CMD_UNKNOWN_52, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
346
{ GE_CMD_UNKNOWN_59, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
347
{ GE_CMD_UNKNOWN_5A, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
348
{ GE_CMD_UNKNOWN_B6, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
349
{ GE_CMD_UNKNOWN_B7, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
350
{ GE_CMD_UNKNOWN_D1, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
351
{ GE_CMD_UNKNOWN_ED, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
352
{ GE_CMD_UNKNOWN_EF, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
353
{ GE_CMD_UNKNOWN_FA, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
354
{ GE_CMD_UNKNOWN_FB, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
355
{ GE_CMD_UNKNOWN_FC, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
356
{ GE_CMD_UNKNOWN_FD, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
357
{ GE_CMD_UNKNOWN_FE, FLAG_EXECUTE, 0, &GPUCommon::Execute_Unknown },
358
// Appears to be debugging related or something? Hit a lot in GoW.
359
{ GE_CMD_NOP_FF, 0 },
360
};
361
362
GPUCommonHW::GPUCommonHW(GraphicsContext *gfxCtx, Draw::DrawContext *draw) : GPUCommon(gfxCtx, draw) {
363
memset(cmdInfo_, 0, sizeof(cmdInfo_));
364
365
// Convert the command table to a faster format, and check for dupes.
366
std::set<u8> dupeCheck;
367
for (size_t i = 0; i < ARRAY_SIZE(commonCommandTable); i++) {
368
const u8 cmd = commonCommandTable[i].cmd;
369
if (dupeCheck.find(cmd) != dupeCheck.end()) {
370
ERROR_LOG(Log::G3D, "Command table Dupe: %02x (%i)", (int)cmd, (int)cmd);
371
} else {
372
dupeCheck.insert(cmd);
373
}
374
cmdInfo_[cmd].flags |= (uint64_t)commonCommandTable[i].flags | (commonCommandTable[i].dirty << 8);
375
cmdInfo_[cmd].func = commonCommandTable[i].func;
376
if ((cmdInfo_[cmd].flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) && !cmdInfo_[cmd].func) {
377
// Can't have FLAG_EXECUTE commands without a function pointer to execute.
378
Crash();
379
}
380
}
381
// Find commands missing from the table.
382
for (int i = 0; i < 0xEF; i++) {
383
if (dupeCheck.find((u8)i) == dupeCheck.end()) {
384
ERROR_LOG(Log::G3D, "Command missing from table: %02x (%i)", i, i);
385
}
386
}
387
388
UpdateCmdInfo();
389
UpdateMSAALevel(draw);
390
}
391
392
GPUCommonHW::~GPUCommonHW() {
393
// Clear features so they're not visible in system info.
394
gstate_c.SetUseFlags(0);
395
396
// Delete the various common managers.
397
framebufferManager_->DestroyAllFBOs();
398
delete framebufferManager_;
399
delete textureCache_;
400
if (shaderManager_) {
401
shaderManager_->ClearShaders();
402
delete shaderManager_;
403
}
404
}
405
406
// Called once per frame. Might also get called during the pause screen
407
// if "transparent".
408
void GPUCommonHW::CheckConfigChanged(const DisplayLayoutConfig &config) {
409
if (configChanged_) {
410
ClearCacheNextFrame();
411
gstate_c.SetUseFlags(CheckGPUFeatures());
412
drawEngineCommon_->NotifyConfigChanged();
413
textureCache_->NotifyConfigChanged();
414
framebufferManager_->NotifyConfigChanged();
415
BuildReportingInfo();
416
configChanged_ = false;
417
}
418
419
// Check needed when running tests.
420
if (framebufferManager_) {
421
framebufferManager_->CheckPostShaders(config);
422
}
423
}
424
425
void GPUCommonHW::CheckDisplayResized() {
426
if (displayResized_) {
427
framebufferManager_->NotifyDisplayResized();
428
displayResized_ = false;
429
}
430
}
431
432
void GPUCommonHW::CheckRenderResized(const DisplayLayoutConfig &config) {
433
if (renderResized_) {
434
framebufferManager_->NotifyRenderResized(config, msaaLevel_);
435
renderResized_ = false;
436
}
437
}
438
439
// Call at the END of the GPU implementation's DeviceLost
440
void GPUCommonHW::DeviceLost() {
441
framebufferManager_->DeviceLost();
442
draw_ = nullptr;
443
textureCache_->Clear(false);
444
textureCache_->DeviceLost();
445
shaderManager_->DeviceLost();
446
drawEngineCommon_->DeviceLost();
447
}
448
449
// Call at the start of the GPU implementation's DeviceRestore
450
void GPUCommonHW::DeviceRestore(Draw::DrawContext *draw) {
451
draw_ = draw;
452
displayResized_ = true; // re-check display bounds.
453
renderResized_ = true;
454
framebufferManager_->DeviceRestore(draw_);
455
textureCache_->DeviceRestore(draw_);
456
shaderManager_->DeviceRestore(draw_);
457
drawEngineCommon_->DeviceRestore(draw_);
458
459
PPGeSetDrawContext(draw_);
460
461
gstate_c.SetUseFlags(CheckGPUFeatures());
462
BuildReportingInfo();
463
UpdateCmdInfo();
464
}
465
466
void GPUCommonHW::UpdateCmdInfo() {
467
if (g_Config.bSoftwareSkinning) {
468
cmdInfo_[GE_CMD_VERTEXTYPE].flags &= ~FLAG_FLUSHBEFOREONCHANGE;
469
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexTypeSkinning;
470
} else {
471
cmdInfo_[GE_CMD_VERTEXTYPE].flags |= FLAG_FLUSHBEFOREONCHANGE;
472
cmdInfo_[GE_CMD_VERTEXTYPE].func = &GPUCommonHW::Execute_VertexType;
473
}
474
475
// Reconfigure for light ubershader or not.
476
for (int i = 0; i < 4; i++) {
477
if (gstate_c.Use(GPU_USE_LIGHT_UBERSHADER)) {
478
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].RemoveDirty(DIRTY_VERTEXSHADER_STATE);
479
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].AddDirty(DIRTY_LIGHT_CONTROL);
480
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].RemoveDirty(DIRTY_VERTEXSHADER_STATE);
481
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].AddDirty(DIRTY_LIGHT_CONTROL);
482
} else {
483
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].RemoveDirty(DIRTY_LIGHT_CONTROL);
484
cmdInfo_[GE_CMD_LIGHTENABLE0 + i].AddDirty(DIRTY_VERTEXSHADER_STATE);
485
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].RemoveDirty(DIRTY_LIGHT_CONTROL);
486
cmdInfo_[GE_CMD_LIGHTTYPE0 + i].AddDirty(DIRTY_VERTEXSHADER_STATE);
487
}
488
}
489
490
if (gstate_c.Use(GPU_USE_LIGHT_UBERSHADER)) {
491
cmdInfo_[GE_CMD_MATERIALUPDATE].RemoveDirty(DIRTY_VERTEXSHADER_STATE);
492
cmdInfo_[GE_CMD_MATERIALUPDATE].AddDirty(DIRTY_LIGHT_CONTROL);
493
} else {
494
cmdInfo_[GE_CMD_MATERIALUPDATE].RemoveDirty(DIRTY_LIGHT_CONTROL);
495
cmdInfo_[GE_CMD_MATERIALUPDATE].AddDirty(DIRTY_VERTEXSHADER_STATE);
496
}
497
498
if (gstate_c.Use(GPU_USE_FRAGMENT_UBERSHADER)) {
499
// Texfunc controls both texalpha and doubling. The rest is not dynamic yet so can't remove fragment shader dirtying.
500
cmdInfo_[GE_CMD_TEXFUNC].AddDirty(DIRTY_TEX_ALPHA_MUL);
501
} else {
502
cmdInfo_[GE_CMD_TEXFUNC].RemoveDirty(DIRTY_TEX_ALPHA_MUL);
503
}
504
}
505
506
void GPUCommonHW::BeginHostFrame(const DisplayLayoutConfig &config) {
507
GPUCommon::BeginHostFrame(config);
508
if (drawEngineCommon_->EverUsedExactEqualDepth() && !sawExactEqualDepth_) {
509
sawExactEqualDepth_ = true;
510
gstate_c.SetUseFlags(CheckGPUFeatures());
511
}
512
}
513
514
void GPUCommonHW::SetDisplayFramebuffer(u32 framebuf, u32 stride, GEBufferFormat format) {
515
framebufferManager_->SetDisplayFramebuffer(framebuf, stride, format);
516
NotifyDisplay(framebuf, stride, format);
517
}
518
519
void GPUCommonHW::CheckFlushOp(int cmd, u32 diff) {
520
const u8 cmdFlags = cmdInfo_[cmd].flags;
521
if (diff && (cmdFlags & FLAG_FLUSHBEFOREONCHANGE)) {
522
if (dumpThisFrame_) {
523
NOTICE_LOG(Log::G3D, "================ FLUSH ================");
524
}
525
drawEngineCommon_->Flush();
526
}
527
}
528
529
void GPUCommonHW::PreExecuteOp(u32 op, u32 diff) {
530
CheckFlushOp(op >> 24, diff);
531
}
532
533
void GPUCommonHW::PrepareCopyDisplayToOutput(const DisplayLayoutConfig &config) {
534
drawEngineCommon_->FlushQueuedDepth();
535
// Flush anything left over.
536
drawEngineCommon_->Flush();
537
538
shaderManager_->DirtyLastShader();
539
540
// after this, render pass is active.
541
framebufferManager_->PrepareCopyDisplayToOutput(config, curFramebufferDirty_);
542
}
543
544
void GPUCommonHW::CopyDisplayToOutput(const DisplayLayoutConfig &config) {
545
framebufferManager_->CopyDisplayToOutput(config);
546
curFramebufferDirty_ = false;
547
}
548
549
bool GPUCommonHW::PresentedThisFrame() const {
550
return framebufferManager_->PresentedThisFrame();
551
}
552
553
void GPUCommonHW::DoState(PointerWrap &p) {
554
GPUCommon::DoState(p);
555
556
// TODO: Some of these things may not be necessary.
557
// None of these are necessary when saving.
558
if (p.mode == p.MODE_READ && !PSP_CoreParameter().frozen) {
559
textureCache_->Clear(true);
560
561
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
562
framebufferManager_->DestroyAllFBOs();
563
}
564
}
565
566
void GPUCommonHW::ClearCacheNextFrame() {
567
textureCache_->ClearNextFrame();
568
}
569
570
// Needs to be called on GPU thread, not reporting thread.
571
void GPUCommonHW::BuildReportingInfo() {
572
using namespace Draw;
573
574
reportingPrimaryInfo_ = draw_->GetInfoString(InfoField::VENDORSTRING);
575
reportingFullInfo_ = reportingPrimaryInfo_ + " - " + System_GetProperty(SYSPROP_GPUDRIVER_VERSION) + " - " + draw_->GetInfoString(InfoField::SHADELANGVERSION);
576
}
577
578
u32 GPUCommonHW::CheckGPUFeatures() const {
579
u32 features = 0;
580
if (draw_->GetDeviceCaps().logicOpSupported) {
581
features |= GPU_USE_LOGIC_OP;
582
}
583
if (draw_->GetDeviceCaps().anisoSupported) {
584
features |= GPU_USE_ANISOTROPY;
585
}
586
if (draw_->GetDeviceCaps().textureNPOTFullySupported) {
587
features |= GPU_USE_TEXTURE_NPOT;
588
}
589
if (draw_->GetDeviceCaps().dualSourceBlend) {
590
if (!g_Config.bVendorBugChecksEnabled || !draw_->GetBugs().Has(Draw::Bugs::DUAL_SOURCE_BLENDING_BROKEN)) {
591
features |= GPU_USE_DUALSOURCE_BLEND;
592
}
593
}
594
if (draw_->GetDeviceCaps().blendMinMaxSupported) {
595
features |= GPU_USE_BLEND_MINMAX;
596
}
597
598
if (draw_->GetDeviceCaps().clipDistanceSupported) {
599
features |= GPU_USE_CLIP_DISTANCE;
600
}
601
602
if (draw_->GetDeviceCaps().cullDistanceSupported) {
603
features |= GPU_USE_CULL_DISTANCE;
604
}
605
606
if (draw_->GetDeviceCaps().textureDepthSupported) {
607
features |= GPU_USE_DEPTH_TEXTURE;
608
}
609
610
if (draw_->GetDeviceCaps().depthClampSupported) {
611
// Some backends always do GPU_USE_ACCURATE_DEPTH, but it's required for depth clamp.
612
features |= GPU_USE_DEPTH_CLAMP | GPU_USE_ACCURATE_DEPTH;
613
}
614
615
bool canClipOrCull = draw_->GetDeviceCaps().clipDistanceSupported || draw_->GetDeviceCaps().cullDistanceSupported;
616
bool canDiscardVertex = !draw_->GetBugs().Has(Draw::Bugs::BROKEN_NAN_IN_CONDITIONAL);
617
if ((canClipOrCull || canDiscardVertex) && !g_Config.bDisableRangeCulling) {
618
// We'll dynamically use the parts that are supported, to reduce artifacts as much as possible.
619
features |= GPU_USE_VS_RANGE_CULLING;
620
}
621
622
if (draw_->GetDeviceCaps().framebufferFetchSupported) {
623
features |= GPU_USE_FRAMEBUFFER_FETCH;
624
features |= GPU_USE_SHADER_BLENDING; // doesn't matter if we are buffered or not here.
625
} else {
626
if (!g_Config.bSkipBufferEffects) {
627
features |= GPU_USE_SHADER_BLENDING;
628
}
629
}
630
631
if (draw_->GetShaderLanguageDesc().bitwiseOps && g_Config.bUberShaderVertex) {
632
features |= GPU_USE_LIGHT_UBERSHADER;
633
}
634
635
if (PSP_CoreParameter().compat.flags().ClearToRAM) {
636
features |= GPU_USE_CLEAR_RAM_HACK;
637
}
638
639
// Even without depth clamp, force accurate depth on for some games that break without it.
640
if (PSP_CoreParameter().compat.flags().DepthRangeHack) {
641
features |= GPU_USE_ACCURATE_DEPTH;
642
}
643
644
// Some backends will turn this off again in the calling function.
645
if (g_Config.bUberShaderFragment) {
646
features |= GPU_USE_FRAGMENT_UBERSHADER;
647
}
648
649
return features;
650
}
651
652
u32 GPUCommonHW::CheckGPUFeaturesLate(u32 features) const {
653
// If we already have a 16-bit depth buffer, we don't need to round.
654
bool prefer24 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D24_S8;
655
bool prefer16 = draw_->GetDeviceCaps().preferredDepthBufferFormat == Draw::DataFormat::D16;
656
if (!prefer16) {
657
if (sawExactEqualDepth_ && (features & GPU_USE_ACCURATE_DEPTH) != 0 && !PSP_CoreParameter().compat.flags().ForceMaxDepthResolution) {
658
// Exact equal tests tend to have issues unless we use the PSP's depth range.
659
// We use 24-bit depth virtually everwhere, the fallback is just for safety.
660
if (prefer24)
661
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
662
else
663
features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT;
664
} else if (!g_Config.bHighQualityDepth && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
665
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
666
} else if (PSP_CoreParameter().compat.flags().PixelDepthRounding) {
667
if (prefer24 && (features & GPU_USE_ACCURATE_DEPTH) != 0) {
668
// Here we can simulate a 16 bit depth buffer by scaling.
669
// Note that the depth buffer is fixed point, not floating, so dividing by 256 is pretty good.
670
features |= GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT;
671
} else {
672
// Use fragment rounding on where available otherwise.
673
features |= GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT;
674
}
675
} else if (PSP_CoreParameter().compat.flags().VertexDepthRounding) {
676
features |= GPU_ROUND_DEPTH_TO_16BIT;
677
}
678
}
679
680
return features;
681
}
682
683
void GPUCommonHW::UpdateMSAALevel(Draw::DrawContext *draw) {
684
int level = g_Config.iMultiSampleLevel;
685
if (draw && draw->GetDeviceCaps().multiSampleLevelsMask & (1 << level)) {
686
msaaLevel_ = level;
687
} else {
688
// Didn't support the configured level, so revert to 0.
689
msaaLevel_ = 0;
690
}
691
}
692
693
std::vector<std::string> GPUCommonHW::DebugGetShaderIDs(DebugShaderType type) {
694
switch (type) {
695
case SHADER_TYPE_VERTEXLOADER:
696
return drawEngineCommon_->DebugGetVertexLoaderIDs();
697
case SHADER_TYPE_TEXTURE:
698
return textureCache_->GetTextureShaderCache()->DebugGetShaderIDs(type);
699
default:
700
return shaderManager_->DebugGetShaderIDs(type);
701
}
702
}
703
704
std::string GPUCommonHW::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
705
switch (type) {
706
case SHADER_TYPE_VERTEXLOADER:
707
return drawEngineCommon_->DebugGetVertexLoaderString(id, stringType);
708
case SHADER_TYPE_TEXTURE:
709
return textureCache_->GetTextureShaderCache()->DebugGetShaderString(id, type, stringType);
710
default:
711
return shaderManager_->DebugGetShaderString(id, type, stringType);
712
}
713
}
714
715
bool GPUCommonHW::GetCurrentFramebuffer(GPUDebugBuffer &buffer, GPUDebugFramebufferType type, int maxRes) {
716
u32 fb_address = type == GPU_DBG_FRAMEBUF_RENDER ? (gstate.getFrameBufRawAddress() | 0x04000000) : framebufferManager_->DisplayFramebufAddr();
717
int fb_stride = type == GPU_DBG_FRAMEBUF_RENDER ? gstate.FrameBufStride() : framebufferManager_->DisplayFramebufStride();
718
GEBufferFormat format = type == GPU_DBG_FRAMEBUF_RENDER ? gstate_c.framebufFormat : framebufferManager_->DisplayFramebufFormat();
719
return framebufferManager_->GetFramebuffer(fb_address, fb_stride, format, buffer, maxRes);
720
}
721
722
bool GPUCommonHW::GetCurrentDepthbuffer(GPUDebugBuffer &buffer) {
723
u32 fb_address = gstate.getFrameBufRawAddress() | 0x04000000;
724
int fb_stride = gstate.FrameBufStride();
725
726
u32 z_address = gstate.getDepthBufRawAddress() | 0x04000000;
727
int z_stride = gstate.DepthBufStride();
728
729
return framebufferManager_->GetDepthbuffer(fb_address, fb_stride, z_address, z_stride, buffer);
730
}
731
732
bool GPUCommonHW::GetCurrentStencilbuffer(GPUDebugBuffer &buffer) {
733
u32 fb_address = gstate.getFrameBufRawAddress() | 0x04000000;
734
int fb_stride = gstate.FrameBufStride();
735
736
return framebufferManager_->GetStencilbuffer(fb_address, fb_stride, buffer);
737
}
738
739
bool GPUCommonHW::GetOutputFramebuffer(GPUDebugBuffer &buffer) {
740
// framebufferManager_ can be null here when taking screens in software rendering mode.
741
// TODO: Actually grab the framebuffer anyway.
742
return framebufferManager_ ? framebufferManager_->GetOutputFramebuffer(buffer) : false;
743
}
744
745
std::vector<const VirtualFramebuffer *> GPUCommonHW::GetFramebufferList() const {
746
return framebufferManager_->GetFramebufferList();
747
}
748
749
bool GPUCommonHW::GetCurrentClut(GPUDebugBuffer &buffer) {
750
return textureCache_->GetCurrentClutBuffer(buffer);
751
}
752
753
bool GPUCommonHW::GetCurrentTexture(GPUDebugBuffer &buffer, int level, bool *isFramebuffer) {
754
if (!gstate.isTextureMapEnabled()) {
755
return false;
756
}
757
return textureCache_->GetCurrentTextureDebug(buffer, level, isFramebuffer);
758
}
759
760
void GPUCommonHW::CheckDepthUsage(VirtualFramebuffer *vfb) {
761
if (!gstate_c.usingDepth) {
762
bool isReadingDepth = false;
763
bool isClearingDepth = false;
764
bool isWritingDepth = false;
765
if (gstate.isModeClear()) {
766
isClearingDepth = gstate.isClearModeDepthMask();
767
isWritingDepth = isClearingDepth;
768
} else if (gstate.isDepthTestEnabled()) {
769
isWritingDepth = gstate.isDepthWriteEnabled();
770
isReadingDepth = gstate.getDepthTestFunction() > GE_COMP_ALWAYS;
771
}
772
773
if (isWritingDepth || isReadingDepth) {
774
gstate_c.usingDepth = true;
775
gstate_c.clearingDepth = isClearingDepth;
776
vfb->last_frame_depth_render = gpuStats.numFlips;
777
if (isWritingDepth) {
778
vfb->last_frame_depth_updated = gpuStats.numFlips;
779
}
780
framebufferManager_->SetDepthFrameBuffer(isClearingDepth);
781
}
782
}
783
}
784
785
void GPUCommonHW::InvalidateCache(u32 addr, int size, GPUInvalidationType type) {
786
if (size > 0)
787
textureCache_->Invalidate(addr, size, type);
788
else
789
textureCache_->InvalidateAll(type);
790
791
if (type != GPU_INVALIDATE_ALL && framebufferManager_->MayIntersectFramebufferColor(addr)) {
792
// Vempire invalidates (with writeback) after drawing, but before blitting.
793
// TODO: Investigate whether we can get this to work some other way.
794
if (type == GPU_INVALIDATE_SAFE) {
795
framebufferManager_->UpdateFromMemory(addr, size);
796
}
797
}
798
}
799
800
bool GPUCommonHW::FramebufferDirty() {
801
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
802
if (vfb) {
803
bool dirty = vfb->dirtyAfterDisplay;
804
vfb->dirtyAfterDisplay = false;
805
return dirty;
806
}
807
return true;
808
}
809
810
bool GPUCommonHW::FramebufferReallyDirty() {
811
VirtualFramebuffer *vfb = framebufferManager_->GetDisplayVFB();
812
if (vfb) {
813
bool dirty = vfb->reallyDirtyAfterDisplay;
814
vfb->reallyDirtyAfterDisplay = false;
815
return dirty;
816
}
817
return true;
818
}
819
820
void GPUCommonHW::ExecuteOp(u32 op, u32 diff) {
821
const u8 cmd = op >> 24;
822
const CommandInfo info = cmdInfo_[cmd];
823
const u8 cmdFlags = info.flags;
824
if ((cmdFlags & FLAG_EXECUTE) || (diff && (cmdFlags & FLAG_EXECUTEONCHANGE))) {
825
(this->*info.func)(op, diff);
826
} else if (diff) {
827
uint64_t dirty = info.flags >> 8;
828
if (dirty)
829
gstate_c.Dirty(dirty);
830
}
831
}
832
833
void GPUCommonHW::FastRunLoop(DisplayList &list) {
834
PROFILE_THIS_SCOPE("gpuloop");
835
836
if (!Memory::IsValidAddress(list.pc)) {
837
// We're having some serious problems here, just bail and try to limp along and not crash the app.
838
downcount = 0;
839
return;
840
}
841
842
const CommandInfo *cmdInfo = cmdInfo_;
843
int dc = downcount;
844
for (; dc > 0; --dc) {
845
// We know that display list PCs have the upper nibble == 0 - no need to mask the pointer
846
const u32 op = *(const u32_le *)(Memory::base + list.pc);
847
const u32 cmd = op >> 24;
848
const CommandInfo &info = cmdInfo[cmd];
849
const u32 diff = op ^ gstate.cmdmem[cmd];
850
if (diff == 0) {
851
if (info.flags & FLAG_EXECUTE) {
852
downcount = dc;
853
(this->*info.func)(op, diff);
854
dc = downcount;
855
}
856
} else {
857
uint64_t flags = info.flags;
858
if (flags & FLAG_FLUSHBEFOREONCHANGE) {
859
drawEngineCommon_->Flush();
860
}
861
gstate.cmdmem[cmd] = op;
862
if (flags & (FLAG_EXECUTE | FLAG_EXECUTEONCHANGE)) {
863
downcount = dc;
864
(this->*info.func)(op, diff);
865
dc = downcount;
866
} else {
867
uint64_t dirty = flags >> 8;
868
if (dirty)
869
gstate_c.Dirty(dirty);
870
}
871
}
872
list.pc += 4;
873
}
874
downcount = 0;
875
}
876
877
void GPUCommonHW::Execute_VertexType(u32 op, u32 diff) {
878
if (diff) {
879
// TODO: We only need to dirty vshader-state here if the output format will be different.
880
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
881
882
if (diff & GE_VTYPE_THROUGH_MASK) {
883
// Switching between through and non-through, we need to invalidate a bunch of stuff.
884
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
885
}
886
}
887
}
888
889
void GPUCommonHW::Execute_VertexTypeSkinning(u32 op, u32 diff) {
890
// Don't flush when weight count changes.
891
if (diff & ~GE_VTYPE_WEIGHTCOUNT_MASK) {
892
// Restore and flush
893
gstate.vertType ^= diff;
894
Flush();
895
gstate.vertType ^= diff;
896
// In this case, we may be doing weights and morphs.
897
// Update any bone matrix uniforms so it uses them correctly.
898
if ((op & GE_VTYPE_MORPHCOUNT_MASK) != 0) {
899
gstate_c.Dirty(gstate_c.deferredVertTypeDirty);
900
gstate_c.deferredVertTypeDirty = 0;
901
}
902
gstate_c.Dirty(DIRTY_VERTEXSHADER_STATE);
903
}
904
if (diff & GE_VTYPE_THROUGH_MASK)
905
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_CULLRANGE);
906
}
907
908
void GPUCommonHW::Execute_Prim(u32 op, u32 diff) {
909
// This drives all drawing. All other state we just buffer up, then we apply it only
910
// when it's time to draw. As most PSP games set state redundantly ALL THE TIME, this is a huge optimization.
911
912
PROFILE_THIS_SCOPE("execprim");
913
914
FlushImm();
915
916
// Upper bits are ignored.
917
const GEPrimitiveType prim = static_cast<GEPrimitiveType>((op >> 16) & 7);
918
SetDrawType(DRAW_PRIM, prim);
919
920
// Discard AA lines as we can't do anything that makes sense with these anyway. The SW plugin might, though.
921
if (gstate.isAntiAliasEnabled()) {
922
// Heuristic derived from discussions in #6483 and #12588.
923
// Discard AA lines in Persona 3 Portable, DOA Paradise and Summon Night 5, while still keeping AA lines in Echochrome.
924
if ((prim == GE_PRIM_LINE_STRIP || prim == GE_PRIM_LINES) && gstate.getTextureFunction() == GE_TEXFUNC_REPLACE)
925
return;
926
}
927
928
// Update cached framebuffer format.
929
// We store it in the cache so it can be modified for blue-to-alpha, next.
930
gstate_c.framebufFormat = gstate.FrameBufFormat();
931
932
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
933
ERROR_LOG(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
934
return;
935
}
936
937
// See the documentation for gstate_c.blueToAlpha.
938
bool blueToAlpha = false;
939
if (PSP_CoreParameter().compat.flags().BlueToAlpha) {
940
if (gstate_c.framebufFormat == GEBufferFormat::GE_FORMAT_565 && gstate.getColorMask() == 0x0FFFFF && !gstate.isLogicOpEnabled()) {
941
blueToAlpha = true;
942
gstate_c.framebufFormat = GEBufferFormat::GE_FORMAT_4444;
943
}
944
if (blueToAlpha != gstate_c.blueToAlpha) {
945
gstate_c.blueToAlpha = blueToAlpha;
946
gstate_c.Dirty(DIRTY_FRAMEBUF | DIRTY_FRAGMENTSHADER_STATE | DIRTY_BLEND_STATE);
947
}
948
}
949
950
if (PSP_CoreParameter().compat.flags().SplitFramebufferMargin) {
951
switch (gstate.vertType & 0xFFFFFF) {
952
case 0x00800102: // through, u16 uv, u16 pos (used for the framebuffer effect in-game)
953
case 0x0080011c: // through, 8888 color, s16 pos (used for clearing in the margin of the title screen)
954
case 0x00000183: // float uv, float pos (used for drawing in the margin of the title screen)
955
// Need to re-check the framebuffer every one of these draws, to update the split if needed.
956
gstate_c.Dirty(DIRTY_FRAMEBUF);
957
}
958
}
959
960
// This also makes skipping drawing very effective.
961
bool changed;
962
VirtualFramebuffer *const vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
963
if (blueToAlpha) {
964
vfb->usageFlags |= FB_USAGE_BLUE_TO_ALPHA;
965
}
966
967
if (changed) {
968
drawEngineCommon_->FlushQueuedDepth();
969
}
970
971
if (gstate_c.dirty & DIRTY_VERTEXSHADER_STATE) {
972
vertexCost_ = EstimatePerVertexCost();
973
}
974
975
const u32 count = op & 0xFFFF;
976
// Must check this after SetRenderFrameBuffer so we know SKIPDRAW_NON_DISPLAYED_FB.
977
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
978
// Rough estimate, not sure what's correct.
979
cyclesExecuted += vertexCost_ * count;
980
if (gstate.isModeClear()) {
981
gpuStats.numClears++;
982
}
983
return;
984
}
985
986
CheckDepthUsage(vfb);
987
988
const void *verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
989
const void *inds = nullptr;
990
991
const bool isTriangle = IsTrianglePrim(prim);
992
993
bool canExtend = isTriangle;
994
u32 vertexType = gstate.vertType;
995
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
996
u32 indexAddr = gstate_c.indexAddr;
997
const int indexShift = ((vertexType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
998
if (!Memory::IsValidRange(indexAddr, count << indexShift)) {
999
ERROR_LOG(Log::G3D, "Bad index address %08x (%d)!", indexAddr, count);
1000
return;
1001
}
1002
inds = Memory::GetPointerUnchecked(indexAddr);
1003
canExtend = false;
1004
}
1005
1006
gstate_c.UpdateUVScaleOffset();
1007
1008
// cull mode
1009
int cullMode = gstate.getCullMode();
1010
1011
uint32_t vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
1012
VertexDecoder *decoder = drawEngineCommon_->GetVertexDecoder(vertTypeID);
1013
1014
// Through mode early-out for simple float 2D draws, like in Fate Extra CCC (very beneficial there due to avoiding texture loads)
1015
if ((vertexType & (GE_VTYPE_THROUGH_MASK | GE_VTYPE_POS_MASK | GE_VTYPE_IDX_MASK)) == (GE_VTYPE_THROUGH_MASK | GE_VTYPE_POS_FLOAT | GE_VTYPE_IDX_NONE)) {
1016
int bytesRead = 0;
1017
if (!drawEngineCommon_->TestBoundingBoxThrough(verts, count, decoder, vertexType, &bytesRead)) {
1018
gpuStats.numCulledDraws++;
1019
int cycles = vertexCost_ * count;
1020
gpuStats.vertexGPUCycles += cycles;
1021
cyclesExecuted += cycles;
1022
// NOTE! We still have to advance vertex pointers!
1023
gstate_c.vertexAddr += bytesRead; // We know from the above check that it's not an indexed draw.
1024
return;
1025
}
1026
}
1027
1028
#define MAX_CULL_CHECK_COUNT 6
1029
1030
// For now, turn off culling on platforms where we don't have SIMD bounding box tests, like RISC-V.
1031
#if PPSSPP_ARCH(ARM_NEON) || PPSSPP_ARCH(SSE2)
1032
1033
#define PASSES_CULLING ((vertexType & (GE_VTYPE_THROUGH_MASK | GE_VTYPE_MORPHCOUNT_MASK | GE_VTYPE_WEIGHT_MASK | GE_VTYPE_IDX_MASK)) || count > MAX_CULL_CHECK_COUNT)
1034
1035
#else
1036
1037
#define PASSES_CULLING true
1038
1039
#endif
1040
1041
// If certain conditions are true, do frustum culling.
1042
bool passCulling = PASSES_CULLING;
1043
if (!passCulling) {
1044
// Do software culling.
1045
if (drawEngineCommon_->TestBoundingBoxFast(verts, count, decoder, vertexType)) {
1046
passCulling = true;
1047
} else {
1048
gpuStats.numCulledDraws++;
1049
}
1050
}
1051
1052
int bytesRead = 0;
1053
1054
// If the first one in a batch passes, let's assume the whole batch passes.
1055
// Cuts down on checking, while not losing that much efficiency.
1056
bool onePassed = false;
1057
if (passCulling) {
1058
if (!drawEngineCommon_->SubmitPrim(verts, inds, prim, count, decoder, vertTypeID, true, &bytesRead)) {
1059
canExtend = false;
1060
}
1061
onePassed = true;
1062
} else {
1063
// Still need to advance bytesRead.
1064
drawEngineCommon_->SkipPrim(prim, count, decoder, vertTypeID, &bytesRead);
1065
canExtend = false;
1066
}
1067
1068
// After drawing, we advance the vertexAddr (when non indexed) or indexAddr (when indexed).
1069
// Some games rely on this, they don't bother reloading VADDR and IADDR.
1070
// The VADDR/IADDR registers are NOT updated.
1071
AdvanceVerts(vertexType, count, bytesRead);
1072
1073
int totalVertCount = count;
1074
1075
// PRIMs are often followed by more PRIMs. Save some work and submit them immediately.
1076
const u32_le *start = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1077
const u32_le *src = start;
1078
const u32_le *stall = currentList->stall ? (const u32_le *)Memory::GetPointerUnchecked(currentList->stall) : 0;
1079
1080
// Optimized submission of sequences of PRIM. Allows us to avoid going through all the mess
1081
// above for each one. This can be expanded to support additional games that intersperse
1082
// PRIM commands with other commands. A special case is Earth Defence Force 2 that changes culling mode
1083
// between each prim, we just change the triangle winding right here to still be able to join draw calls.
1084
1085
const uint32_t vtypeCheckMask = g_Config.bSoftwareSkinning ? (~GE_VTYPE_WEIGHTCOUNT_MASK) : 0xFFFFFFFF;
1086
1087
if (!useFastRunLoop_)
1088
goto bail; // we're either recording or stepping.
1089
1090
while (src != stall) {
1091
uint32_t data = *src;
1092
switch (data >> 24) {
1093
case GE_CMD_PRIM:
1094
{
1095
GEPrimitiveType newPrim = static_cast<GEPrimitiveType>((data >> 16) & 7);
1096
if (IsTrianglePrim(newPrim) != isTriangle)
1097
goto bail; // Can't join over this boundary. Might as well exit and get this on the next time around.
1098
// TODO: more efficient updating of verts/inds
1099
1100
u32 count = data & 0xFFFF;
1101
bool clockwise = !gstate.isCullEnabled() || gstate.getCullMode() == cullMode;
1102
if (canExtend) {
1103
// Non-indexed draws can be cheaply merged if vertexAddr hasn't changed, that means the vertices
1104
// are consecutive in memory. We also ignore culling here.
1105
_dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE);
1106
int commandsExecuted = drawEngineCommon_->ExtendNonIndexedPrim(src, stall, decoder, vertTypeID, clockwise, &bytesRead, isTriangle);
1107
if (!commandsExecuted) {
1108
goto bail;
1109
}
1110
src += commandsExecuted - 1;
1111
gstate_c.vertexAddr += bytesRead;
1112
totalVertCount += count;
1113
break;
1114
}
1115
1116
verts = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
1117
inds = nullptr;
1118
if ((vertexType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
1119
const u32 indexAddr = gstate_c.indexAddr;
1120
const int indexShift = ((vertexType & GE_VTYPE_IDX_MASK) >> GE_VTYPE_IDX_SHIFT) - 1;
1121
if (!Memory::IsValidRange(gstate_c.indexAddr, count << indexShift)) {
1122
// Bad index range. Let's give up the fast loop.
1123
goto bail;
1124
}
1125
inds = Memory::GetPointerUnchecked(indexAddr);
1126
} else {
1127
// We can extend again after submitting a normal draw.
1128
canExtend = isTriangle;
1129
}
1130
1131
bool passCulling = onePassed || PASSES_CULLING;
1132
if (!passCulling) {
1133
// Do software culling.
1134
_dbg_assert_((vertexType & GE_VTYPE_IDX_MASK) == GE_VTYPE_IDX_NONE);
1135
if (drawEngineCommon_->TestBoundingBoxFast(verts, count, decoder, vertexType)) {
1136
passCulling = true;
1137
} else {
1138
gpuStats.numCulledDraws++;
1139
}
1140
}
1141
if (passCulling) {
1142
if (!drawEngineCommon_->SubmitPrim(verts, inds, newPrim, count, decoder, vertTypeID, clockwise, &bytesRead)) {
1143
canExtend = false;
1144
}
1145
// As soon as one passes, assume we don't need to check the rest of this batch.
1146
onePassed = true;
1147
} else {
1148
// Still need to advance bytesRead.
1149
drawEngineCommon_->SkipPrim(newPrim, count, decoder, vertTypeID, &bytesRead);
1150
canExtend = false;
1151
}
1152
AdvanceVerts(vertexType, count, bytesRead);
1153
totalVertCount += count;
1154
break;
1155
}
1156
case GE_CMD_VERTEXTYPE:
1157
{
1158
uint32_t diff = data ^ vertexType;
1159
// don't mask upper bits, vertexType is unmasked
1160
if (diff) {
1161
if (diff & vtypeCheckMask)
1162
goto bail;
1163
drawEngineCommon_->FlushSkin();
1164
canExtend = false; // TODO: Might support extending between some vertex types in the future.
1165
vertexType = data;
1166
vertTypeID = GetVertTypeID(vertexType, gstate.getUVGenMode(), g_Config.bSoftwareSkinning);
1167
decoder = drawEngineCommon_->GetVertexDecoder(vertTypeID);
1168
}
1169
break;
1170
}
1171
case GE_CMD_VADDR:
1172
{
1173
gstate.cmdmem[GE_CMD_VADDR] = data;
1174
uint32_t newAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
1175
if (gstate_c.vertexAddr != newAddr) {
1176
canExtend = false;
1177
gstate_c.vertexAddr = newAddr;
1178
}
1179
break;
1180
}
1181
case GE_CMD_IADDR:
1182
gstate.cmdmem[GE_CMD_IADDR] = data;
1183
gstate_c.indexAddr = gstate_c.getRelativeAddress(data & 0x00FFFFFF);
1184
break;
1185
case GE_CMD_OFFSETADDR:
1186
gstate.cmdmem[GE_CMD_OFFSETADDR] = data;
1187
gstate_c.offsetAddr = data << 8;
1188
break;
1189
case GE_CMD_BASE:
1190
gstate.cmdmem[GE_CMD_BASE] = data;
1191
break;
1192
case GE_CMD_CULLFACEENABLE:
1193
// Earth Defence Force 2
1194
if (gstate.cmdmem[GE_CMD_CULLFACEENABLE] != data) {
1195
goto bail;
1196
}
1197
break;
1198
case GE_CMD_CULL:
1199
// flip face by indices for triangles
1200
cullMode = data & 1;
1201
break;
1202
case GE_CMD_TEXFLUSH:
1203
case GE_CMD_NOP:
1204
case GE_CMD_NOP_FF:
1205
gstate.cmdmem[data >> 24] = data;
1206
break;
1207
case GE_CMD_BONEMATRIXNUMBER:
1208
gstate.cmdmem[GE_CMD_BONEMATRIXNUMBER] = data;
1209
break;
1210
case GE_CMD_TEXSCALEU:
1211
// We don't "dirty-check" - we could avoid getFloat24 and setting canExtend=false, but usually
1212
// when texscale commands are in line with the prims like this, they actually have an effect
1213
// and requires us to stop extending strips anyway.
1214
gstate.cmdmem[GE_CMD_TEXSCALEU] = data;
1215
gstate_c.uv.uScale = getFloat24(data);
1216
canExtend = false;
1217
break;
1218
case GE_CMD_TEXSCALEV:
1219
gstate.cmdmem[GE_CMD_TEXSCALEV] = data;
1220
gstate_c.uv.vScale = getFloat24(data);
1221
canExtend = false;
1222
break;
1223
case GE_CMD_TEXOFFSETU:
1224
gstate.cmdmem[GE_CMD_TEXOFFSETU] = data;
1225
gstate_c.uv.uOff = getFloat24(data);
1226
canExtend = false;
1227
break;
1228
case GE_CMD_TEXOFFSETV:
1229
gstate.cmdmem[GE_CMD_TEXOFFSETV] = data;
1230
gstate_c.uv.vOff = getFloat24(data);
1231
canExtend = false;
1232
break;
1233
case GE_CMD_TEXLEVEL:
1234
// Same Gran Turismo hack from Execute_TexLevel
1235
if ((data & 3) != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & data) != 0) {
1236
goto bail;
1237
}
1238
gstate.cmdmem[GE_CMD_TEXLEVEL] = data;
1239
break;
1240
case GE_CMD_CALL:
1241
{
1242
// A bone matrix probably. If not we bail.
1243
const u32 target = gstate_c.getRelativeAddress(data & 0x00FFFFFC);
1244
if ((Memory::ReadUnchecked_U32(target) >> 24) == GE_CMD_BONEMATRIXDATA &&
1245
(Memory::ReadUnchecked_U32(target + 11 * 4) >> 24) == GE_CMD_BONEMATRIXDATA &&
1246
(Memory::ReadUnchecked_U32(target + 12 * 4) >> 24) == GE_CMD_RET &&
1247
(target > currentList->stall || target + 12 * 4 < currentList->stall) &&
1248
(gstate.boneMatrixNumber & 0x00FFFFFF) <= 96 - 12) {
1249
drawEngineCommon_->FlushSkin();
1250
canExtend = false;
1251
FastLoadBoneMatrix(target);
1252
} else {
1253
goto bail;
1254
}
1255
break;
1256
}
1257
1258
// Keep going if these commands don't change state.
1259
case GE_CMD_TEXBUFWIDTH0:
1260
case GE_CMD_TEXADDR0:
1261
if (data != gstate.cmdmem[data >> 24])
1262
goto bail;
1263
break;
1264
1265
default:
1266
// All other commands might need a flush or something, stop this inner loop.
1267
goto bail;
1268
}
1269
src++;
1270
}
1271
1272
bail:
1273
drawEngineCommon_->FlushSkin();
1274
gstate.cmdmem[GE_CMD_VERTEXTYPE] = vertexType;
1275
const int cmdCount = src - start;
1276
// Skip over the commands we just read out manually.
1277
if (cmdCount > 0) {
1278
UpdatePC(currentList->pc, currentList->pc + cmdCount * 4);
1279
currentList->pc += cmdCount * 4;
1280
// flush back cull mode
1281
if (cullMode != gstate.getCullMode()) {
1282
// We rewrote everything to the old cull mode, so flush first.
1283
drawEngineCommon_->Flush();
1284
1285
// Now update things for next time.
1286
gstate.cmdmem[GE_CMD_CULL] ^= 1;
1287
gstate_c.Dirty(DIRTY_RASTER_STATE);
1288
}
1289
}
1290
1291
int cycles = vertexCost_ * totalVertCount;
1292
gpuStats.vertexGPUCycles += cycles;
1293
cyclesExecuted += cycles;
1294
}
1295
1296
void GPUCommonHW::Execute_Bezier(u32 op, u32 diff) {
1297
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
1298
gstate_c.framebufFormat = gstate.FrameBufFormat();
1299
1300
// This also make skipping drawing very effective.
1301
bool changed;
1302
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
1303
if (changed) {
1304
drawEngineCommon_->FlushQueuedDepth();
1305
}
1306
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
1307
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
1308
return;
1309
}
1310
1311
CheckDepthUsage(vfb);
1312
1313
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
1314
ERROR_LOG(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
1315
return;
1316
}
1317
1318
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
1319
const void *indices = NULL;
1320
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
1321
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
1322
ERROR_LOG(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);
1323
return;
1324
}
1325
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
1326
}
1327
1328
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
1329
DEBUG_LOG_REPORT(Log::G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
1330
}
1331
1332
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
1333
if (flushOnParams_)
1334
drawEngineCommon_->Flush();
1335
1336
Spline::BezierSurface surface;
1337
surface.tess_u = gstate.getPatchDivisionU();
1338
surface.tess_v = gstate.getPatchDivisionV();
1339
surface.num_points_u = op & 0xFF;
1340
surface.num_points_v = (op >> 8) & 0xFF;
1341
surface.num_patches_u = (surface.num_points_u - 1) / 3;
1342
surface.num_patches_v = (surface.num_points_v - 1) / 3;
1343
surface.primType = gstate.getPatchPrimitiveType();
1344
surface.patchFacing = gstate.patchfacing & 1;
1345
1346
SetDrawType(DRAW_BEZIER, PatchPrimToPrim(surface.primType));
1347
1348
// We need to dirty UVSCALEOFFSET here because we look at the submit type when setting that uniform.
1349
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);
1350
if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) {
1351
gstate_c.submitType = SubmitType::HW_BEZIER;
1352
if (gstate_c.spline_num_points_u != surface.num_points_u) {
1353
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
1354
gstate_c.spline_num_points_u = surface.num_points_u;
1355
}
1356
} else {
1357
gstate_c.submitType = SubmitType::BEZIER;
1358
}
1359
1360
int bytesRead = 0;
1361
gstate_c.UpdateUVScaleOffset();
1362
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "bezier");
1363
1364
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);
1365
gstate_c.submitType = SubmitType::DRAW;
1366
1367
// After drawing, we advance pointers - see SubmitPrim which does the same.
1368
const int count = surface.num_points_u * surface.num_points_v;
1369
AdvanceVerts(gstate.vertType, count, bytesRead);
1370
}
1371
1372
void GPUCommonHW::Execute_Spline(u32 op, u32 diff) {
1373
// We don't dirty on normal changes anymore as we prescale, but it's needed for splines/bezier.
1374
gstate_c.framebufFormat = gstate.FrameBufFormat();
1375
1376
// This also make skipping drawing very effective.
1377
bool changed;
1378
VirtualFramebuffer *vfb = framebufferManager_->SetRenderFrameBuffer(gstate_c.IsDirty(DIRTY_FRAMEBUF), gstate_c.skipDrawReason, &changed);
1379
if (changed) {
1380
drawEngineCommon_->FlushQueuedDepth();
1381
}
1382
if (gstate_c.skipDrawReason & (SKIPDRAW_SKIPFRAME | SKIPDRAW_NON_DISPLAYED_FB)) {
1383
// TODO: Should this eat some cycles? Probably yes. Not sure if important.
1384
return;
1385
}
1386
1387
CheckDepthUsage(vfb);
1388
1389
if (!Memory::IsValidAddress(gstate_c.vertexAddr)) {
1390
ERROR_LOG(Log::G3D, "Bad vertex address %08x!", gstate_c.vertexAddr);
1391
return;
1392
}
1393
1394
const void *control_points = Memory::GetPointerUnchecked(gstate_c.vertexAddr);
1395
const void *indices = NULL;
1396
if ((gstate.vertType & GE_VTYPE_IDX_MASK) != GE_VTYPE_IDX_NONE) {
1397
if (!Memory::IsValidAddress(gstate_c.indexAddr)) {
1398
ERROR_LOG(Log::G3D, "Bad index address %08x!", gstate_c.indexAddr);
1399
return;
1400
}
1401
indices = Memory::GetPointerUnchecked(gstate_c.indexAddr);
1402
}
1403
1404
if (vertTypeIsSkinningEnabled(gstate.vertType)) {
1405
WARN_LOG_ONCE(unusualcurve, Log::G3D, "Unusual bezier/spline vtype: %08x, morph: %d, bones: %d", gstate.vertType, (gstate.vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT, vertTypeGetNumBoneWeights(gstate.vertType));
1406
}
1407
1408
// Can't flush after setting gstate_c.submitType below since it'll be a mess - it must be done already.
1409
if (flushOnParams_)
1410
drawEngineCommon_->Flush();
1411
1412
Spline::SplineSurface surface;
1413
surface.tess_u = gstate.getPatchDivisionU();
1414
surface.tess_v = gstate.getPatchDivisionV();
1415
surface.type_u = (op >> 16) & 0x3;
1416
surface.type_v = (op >> 18) & 0x3;
1417
surface.num_points_u = op & 0xFF;
1418
surface.num_points_v = (op >> 8) & 0xFF;
1419
surface.num_patches_u = surface.num_points_u - 3;
1420
surface.num_patches_v = surface.num_points_v - 3;
1421
surface.primType = gstate.getPatchPrimitiveType();
1422
surface.patchFacing = gstate.patchfacing & 1;
1423
1424
SetDrawType(DRAW_SPLINE, PatchPrimToPrim(surface.primType));
1425
1426
// We need to dirty UVSCALEOFFSET here because we look at the submit type when setting that uniform.
1427
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);
1428
if (drawEngineCommon_->CanUseHardwareTessellation(surface.primType)) {
1429
gstate_c.submitType = SubmitType::HW_SPLINE;
1430
if (gstate_c.spline_num_points_u != surface.num_points_u) {
1431
gstate_c.Dirty(DIRTY_BEZIERSPLINE);
1432
gstate_c.spline_num_points_u = surface.num_points_u;
1433
}
1434
} else {
1435
gstate_c.submitType = SubmitType::SPLINE;
1436
}
1437
1438
int bytesRead = 0;
1439
gstate_c.UpdateUVScaleOffset();
1440
drawEngineCommon_->SubmitCurve(control_points, indices, surface, gstate.vertType, &bytesRead, "spline");
1441
1442
gstate_c.Dirty(DIRTY_RASTER_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE | DIRTY_UVSCALEOFFSET);
1443
gstate_c.submitType = SubmitType::DRAW;
1444
1445
// After drawing, we advance pointers - see SubmitPrim which does the same.
1446
int count = surface.num_points_u * surface.num_points_v;
1447
AdvanceVerts(gstate.vertType, count, bytesRead);
1448
}
1449
1450
void GPUCommonHW::Execute_BlockTransferStart(u32 op, u32 diff) {
1451
drawEngineCommon_->FlushQueuedDepth();
1452
Flush();
1453
1454
PROFILE_THIS_SCOPE("block"); // don't include the flush in the profile, would be misleading.
1455
1456
gstate_c.framebufFormat = gstate.FrameBufFormat();
1457
1458
// and take appropriate action. This is a block transfer between RAM and VRAM, or vice versa.
1459
// Can we skip this on SkipDraw?
1460
DoBlockTransfer(gstate_c.skipDrawReason);
1461
}
1462
1463
void GPUCommonHW::Execute_TexSize0(u32 op, u32 diff) {
1464
// Render to texture may have overridden the width/height.
1465
// Don't reset it unless the size is different / the texture has changed.
1466
if (diff || gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS)) {
1467
gstate_c.curTextureWidth = gstate.getTextureWidth(0);
1468
gstate_c.curTextureHeight = gstate.getTextureHeight(0);
1469
// We will need to reset the texture now.
1470
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
1471
}
1472
}
1473
1474
void GPUCommonHW::Execute_TexLevel(u32 op, u32 diff) {
1475
// TODO: If you change the rules here, don't forget to update the inner interpreter in Execute_Prim.
1476
if (diff == 0xFFFFFFFF)
1477
return;
1478
1479
gstate.texlevel ^= diff;
1480
1481
if (diff & 0xFF0000) {
1482
// Piggyback on this flag for 3D textures.
1483
gstate_c.Dirty(DIRTY_MIPBIAS);
1484
}
1485
if (gstate.getTexLevelMode() != GE_TEXLEVEL_MODE_AUTO && (0x00FF0000 & gstate.texlevel) != 0) {
1486
Flush();
1487
}
1488
1489
gstate.texlevel ^= diff;
1490
1491
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS | DIRTY_FRAGMENTSHADER_STATE);
1492
}
1493
1494
void GPUCommonHW::Execute_LoadClut(u32 op, u32 diff) {
1495
gstate_c.Dirty(DIRTY_TEXTURE_PARAMS);
1496
textureCache_->LoadClut(gstate.getClutAddress(), gstate.getClutLoadBytes(), &recorder_);
1497
}
1498
1499
void GPUCommonHW::Execute_WorldMtxNum(u32 op, u32 diff) {
1500
if (!currentList) {
1501
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (op & 0xF);
1502
return;
1503
}
1504
1505
// This is almost always followed by GE_CMD_WORLDMATRIXDATA.
1506
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1507
u32 *dst = (u32 *)(gstate.worldMatrix + (op & 0xF));
1508
const int end = 12 - (op & 0xF);
1509
int i = 0;
1510
1511
// We must record the individual data commands while debugRecording_.
1512
bool fastLoad = !debugRecording_ && end > 0;
1513
// Stalling in the middle of a matrix would be stupid, I doubt this check is necessary.
1514
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1515
fastLoad = false;
1516
}
1517
1518
if (fastLoad) {
1519
while ((src[i] >> 24) == GE_CMD_WORLDMATRIXDATA) {
1520
const u32 newVal = src[i] << 8;
1521
if (dst[i] != newVal) {
1522
Flush();
1523
dst[i] = newVal;
1524
gstate_c.Dirty(DIRTY_WORLDMATRIX);
1525
}
1526
if (++i >= end) {
1527
break;
1528
}
1529
}
1530
}
1531
1532
const int count = i;
1533
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | ((op & 0xF) + count);
1534
1535
// Skip over the loaded data, it's done now.
1536
UpdatePC(currentList->pc, currentList->pc + count * 4);
1537
currentList->pc += count * 4;
1538
}
1539
1540
void GPUCommonHW::Execute_WorldMtxData(u32 op, u32 diff) {
1541
// Note: it's uncommon to get here now, see above.
1542
int num = gstate.worldmtxnum & 0x00FFFFFF;
1543
u32 newVal = op << 8;
1544
if (num < 12 && newVal != ((const u32 *)gstate.worldMatrix)[num]) {
1545
Flush();
1546
((u32 *)gstate.worldMatrix)[num] = newVal;
1547
gstate_c.Dirty(DIRTY_WORLDMATRIX);
1548
}
1549
num++;
1550
gstate.worldmtxnum = (GE_CMD_WORLDMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1551
gstate.worldmtxdata = GE_CMD_WORLDMATRIXDATA << 24;
1552
}
1553
1554
void GPUCommonHW::Execute_ViewMtxNum(u32 op, u32 diff) {
1555
if (!currentList) {
1556
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (op & 0xF);
1557
return;
1558
}
1559
1560
// This is almost always followed by GE_CMD_VIEWMATRIXDATA.
1561
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1562
u32 *dst = (u32 *)(gstate.viewMatrix + (op & 0xF));
1563
const int end = 12 - (op & 0xF);
1564
int i = 0;
1565
1566
bool fastLoad = !debugRecording_ && end > 0;
1567
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1568
fastLoad = false;
1569
}
1570
1571
if (fastLoad) {
1572
while ((src[i] >> 24) == GE_CMD_VIEWMATRIXDATA) {
1573
const u32 newVal = src[i] << 8;
1574
if (dst[i] != newVal) {
1575
Flush();
1576
dst[i] = newVal;
1577
gstate_c.Dirty(DIRTY_VIEWMATRIX | DIRTY_CULL_PLANES);
1578
}
1579
if (++i >= end) {
1580
break;
1581
}
1582
}
1583
}
1584
1585
const int count = i;
1586
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | ((op & 0xF) + count);
1587
1588
// Skip over the loaded data, it's done now.
1589
UpdatePC(currentList->pc, currentList->pc + count * 4);
1590
currentList->pc += count * 4;
1591
}
1592
1593
void GPUCommonHW::Execute_ViewMtxData(u32 op, u32 diff) {
1594
// Note: it's uncommon to get here now, see above.
1595
int num = gstate.viewmtxnum & 0x00FFFFFF;
1596
u32 newVal = op << 8;
1597
if (num < 12 && newVal != ((const u32 *)gstate.viewMatrix)[num]) {
1598
Flush();
1599
((u32 *)gstate.viewMatrix)[num] = newVal;
1600
gstate_c.Dirty(DIRTY_VIEWMATRIX | DIRTY_CULL_PLANES);
1601
}
1602
num++;
1603
gstate.viewmtxnum = (GE_CMD_VIEWMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1604
gstate.viewmtxdata = GE_CMD_VIEWMATRIXDATA << 24;
1605
}
1606
1607
void GPUCommonHW::Execute_ProjMtxNum(u32 op, u32 diff) {
1608
if (!currentList) {
1609
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (op & 0xF);
1610
return;
1611
}
1612
1613
// This is almost always followed by GE_CMD_PROJMATRIXDATA.
1614
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1615
u32 *dst = (u32 *)(gstate.projMatrix + (op & 0xF));
1616
const int end = 16 - (op & 0xF);
1617
int i = 0;
1618
1619
bool fastLoad = !debugRecording_;
1620
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1621
fastLoad = false;
1622
}
1623
1624
if (fastLoad) {
1625
while ((src[i] >> 24) == GE_CMD_PROJMATRIXDATA) {
1626
const u32 newVal = src[i] << 8;
1627
if (dst[i] != newVal) {
1628
Flush();
1629
dst[i] = newVal;
1630
gstate_c.Dirty(DIRTY_PROJMATRIX | DIRTY_CULL_PLANES);
1631
}
1632
if (++i >= end) {
1633
break;
1634
}
1635
}
1636
}
1637
1638
const int count = i;
1639
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | ((op & 0xF) + count);
1640
1641
// Skip over the loaded data, it's done now.
1642
UpdatePC(currentList->pc, currentList->pc + count * 4);
1643
currentList->pc += count * 4;
1644
}
1645
1646
void GPUCommonHW::Execute_ProjMtxData(u32 op, u32 diff) {
1647
// Note: it's uncommon to get here now, see above.
1648
int num = gstate.projmtxnum & 0x00FFFFFF;
1649
u32 newVal = op << 8;
1650
if (num < 16 && newVal != ((const u32 *)gstate.projMatrix)[num]) {
1651
Flush();
1652
((u32 *)gstate.projMatrix)[num] = newVal;
1653
gstate_c.Dirty(DIRTY_PROJMATRIX | DIRTY_CULL_PLANES);
1654
}
1655
num++;
1656
if (num <= 16)
1657
gstate.projmtxnum = (GE_CMD_PROJMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1658
gstate.projmtxdata = GE_CMD_PROJMATRIXDATA << 24;
1659
}
1660
1661
void GPUCommonHW::Execute_TgenMtxNum(u32 op, u32 diff) {
1662
if (!currentList) {
1663
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (op & 0xF);
1664
return;
1665
}
1666
1667
// This is almost always followed by GE_CMD_TGENMATRIXDATA.
1668
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1669
u32 *dst = (u32 *)(gstate.tgenMatrix + (op & 0xF));
1670
const int end = 12 - (op & 0xF);
1671
int i = 0;
1672
1673
bool fastLoad = !debugRecording_ && end > 0;
1674
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1675
fastLoad = false;
1676
}
1677
1678
if (fastLoad) {
1679
while ((src[i] >> 24) == GE_CMD_TGENMATRIXDATA) {
1680
const u32 newVal = src[i] << 8;
1681
if (dst[i] != newVal) {
1682
Flush();
1683
dst[i] = newVal;
1684
// We check the matrix to see if we need projection.
1685
gstate_c.Dirty(DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE);
1686
}
1687
if (++i >= end) {
1688
break;
1689
}
1690
}
1691
}
1692
1693
const int count = i;
1694
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | ((op & 0xF) + count);
1695
1696
// Skip over the loaded data, it's done now.
1697
UpdatePC(currentList->pc, currentList->pc + count * 4);
1698
currentList->pc += count * 4;
1699
}
1700
1701
void GPUCommonHW::Execute_TgenMtxData(u32 op, u32 diff) {
1702
// Note: it's uncommon to get here now, see above.
1703
int num = gstate.texmtxnum & 0x00FFFFFF;
1704
u32 newVal = op << 8;
1705
if (num < 12 && newVal != ((const u32 *)gstate.tgenMatrix)[num]) {
1706
Flush();
1707
((u32 *)gstate.tgenMatrix)[num] = newVal;
1708
gstate_c.Dirty(DIRTY_TEXMATRIX | DIRTY_FRAGMENTSHADER_STATE); // We check the matrix to see if we need projection
1709
}
1710
num++;
1711
gstate.texmtxnum = (GE_CMD_TGENMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1712
gstate.texmtxdata = GE_CMD_TGENMATRIXDATA << 24;
1713
}
1714
1715
void GPUCommonHW::Execute_BoneMtxNum(u32 op, u32 diff) {
1716
if (!currentList) {
1717
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (op & 0x7F);
1718
return;
1719
}
1720
1721
// This is almost always followed by GE_CMD_BONEMATRIXDATA.
1722
const u32_le *src = (const u32_le *)Memory::GetPointerUnchecked(currentList->pc + 4);
1723
u32 *dst = (u32 *)(gstate.boneMatrix + (op & 0x7F));
1724
const int end = 12 * 8 - (op & 0x7F);
1725
int i = 0;
1726
1727
bool fastLoad = !debugRecording_ && end > 0;
1728
if (currentList->pc < currentList->stall && currentList->pc + end * 4 >= currentList->stall) {
1729
fastLoad = false;
1730
}
1731
1732
if (fastLoad) {
1733
// If we can't use software skinning, we have to flush and dirty.
1734
if (!g_Config.bSoftwareSkinning) {
1735
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
1736
const u32 newVal = src[i] << 8;
1737
if (dst[i] != newVal) {
1738
Flush();
1739
dst[i] = newVal;
1740
}
1741
if (++i >= end) {
1742
break;
1743
}
1744
}
1745
1746
const unsigned int numPlusCount = (op & 0x7F) + i;
1747
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
1748
gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12));
1749
}
1750
} else {
1751
while ((src[i] >> 24) == GE_CMD_BONEMATRIXDATA) {
1752
dst[i] = src[i] << 8;
1753
if (++i >= end) {
1754
break;
1755
}
1756
}
1757
1758
const unsigned int numPlusCount = (op & 0x7F) + i;
1759
for (unsigned int num = op & 0x7F; num < numPlusCount; num += 12) {
1760
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
1761
}
1762
}
1763
}
1764
1765
const int count = i;
1766
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | ((op & 0x7F) + count);
1767
1768
// Skip over the loaded data, it's done now.
1769
UpdatePC(currentList->pc, currentList->pc + count * 4);
1770
currentList->pc += count * 4;
1771
}
1772
1773
void GPUCommonHW::Execute_BoneMtxData(u32 op, u32 diff) {
1774
// Note: it's uncommon to get here now, see above.
1775
int num = gstate.boneMatrixNumber & 0x00FFFFFF;
1776
u32 newVal = op << 8;
1777
if (num < 96 && newVal != ((const u32 *)gstate.boneMatrix)[num]) {
1778
// Bone matrices should NOT flush when software skinning is enabled!
1779
if (!g_Config.bSoftwareSkinning) {
1780
Flush();
1781
gstate_c.Dirty(DIRTY_BONEMATRIX0 << (num / 12));
1782
} else {
1783
gstate_c.deferredVertTypeDirty |= DIRTY_BONEMATRIX0 << (num / 12);
1784
}
1785
((u32 *)gstate.boneMatrix)[num] = newVal;
1786
}
1787
num++;
1788
gstate.boneMatrixNumber = (GE_CMD_BONEMATRIXNUMBER << 24) | (num & 0x00FFFFFF);
1789
gstate.boneMatrixData = GE_CMD_BONEMATRIXDATA << 24;
1790
}
1791
1792
void GPUCommonHW::Execute_TexFlush(u32 op, u32 diff) {
1793
// Games call this when they need the effect of drawing to be visible to texturing.
1794
// And for a bunch of other reasons, but either way, this is what we need to do.
1795
// It's possible we could also use this as a hint for the texture cache somehow.
1796
framebufferManager_->DiscardFramebufferCopy();
1797
}
1798
1799
u32 GPUCommonHW::DrawSync(int mode) {
1800
drawEngineCommon_->FlushQueuedDepth();
1801
return GPUCommon::DrawSync(mode);
1802
}
1803
1804
int GPUCommonHW::ListSync(int listid, int mode) {
1805
drawEngineCommon_->FlushQueuedDepth();
1806
return GPUCommon::ListSync(listid, mode);
1807
}
1808
1809
size_t GPUCommonHW::FormatGPUStatsCommon(char *buffer, size_t size) {
1810
float vertexAverageCycles = gpuStats.numVertsSubmitted > 0 ? (float)gpuStats.vertexGPUCycles / (float)gpuStats.numVertsSubmitted : 0.0f;
1811
return snprintf(buffer, size,
1812
"DL processing time: %0.2f ms, %d drawsync, %d listsync\n"
1813
"Draw: %d (%d dec, %d culled), flushes %d, clears %d, bbox jumps %d (%d updates)\n"
1814
"Vertices: %d dec: %d drawn: %d\n"
1815
"FBOs active: %d (evaluations: %d, created %d)\n"
1816
"Textures: %d, dec: %d, invalidated: %d, hashed: %d kB, clut %d\n"
1817
"readbacks %d (%d non-block), upload %d (cached %d), depal %d\n"
1818
"block transfers: %d\n"
1819
"replacer: tracks %d references, %d unique textures\n"
1820
"Cpy: depth %d, color %d, reint %d, blend %d, self %d\n"
1821
"GPU cycles: %d (%0.1f per vertex)\n"
1822
"Z-rast: %0.2f+%0.2f+%0.2f (total %0.2f/%0.2f) ms\n"
1823
"Z-rast: %d prim, %d nopix, %d small, %d earlysize, %d zcull, %d box\n%s",
1824
gpuStats.msProcessingDisplayLists * 1000.0f,
1825
gpuStats.numDrawSyncs,
1826
gpuStats.numListSyncs,
1827
gpuStats.numDrawCalls,
1828
gpuStats.numVertexDecodes,
1829
gpuStats.numCulledDraws,
1830
gpuStats.numFlushes,
1831
gpuStats.numClears,
1832
gpuStats.numBBOXJumps,
1833
gpuStats.numPlaneUpdates,
1834
gpuStats.numVertsSubmitted,
1835
gpuStats.numVertsDecoded,
1836
gpuStats.numUncachedVertsDrawn,
1837
(int)framebufferManager_->NumVFBs(),
1838
gpuStats.numFramebufferEvaluations,
1839
gpuStats.numFBOsCreated,
1840
(int)textureCache_->NumLoadedTextures(),
1841
gpuStats.numTexturesDecoded,
1842
gpuStats.numTextureInvalidations,
1843
gpuStats.numTextureDataBytesHashed / 1024,
1844
gpuStats.numClutTextures,
1845
gpuStats.numBlockingReadbacks,
1846
gpuStats.numReadbacks,
1847
gpuStats.numUploads,
1848
gpuStats.numCachedUploads,
1849
gpuStats.numDepal,
1850
gpuStats.numBlockTransfers,
1851
gpuStats.numReplacerTrackedTex,
1852
gpuStats.numCachedReplacedTextures,
1853
gpuStats.numDepthCopies,
1854
gpuStats.numColorCopies,
1855
gpuStats.numReinterpretCopies,
1856
gpuStats.numCopiesForShaderBlend,
1857
gpuStats.numCopiesForSelfTex,
1858
gpuStats.vertexGPUCycles + gpuStats.otherGPUCycles,
1859
vertexAverageCycles,
1860
gpuStats.msPrepareDepth * 1000.0,
1861
gpuStats.msCullDepth * 1000.0,
1862
gpuStats.msRasterizeDepth * 1000.0,
1863
(gpuStats.msPrepareDepth + gpuStats.msCullDepth + gpuStats.msRasterizeDepth) * 1000.0,
1864
gpuStats.msRasterTimeAvailable * 1000.0,
1865
gpuStats.numDepthRasterPrims,
1866
gpuStats.numDepthRasterNoPixels,
1867
gpuStats.numDepthRasterTooSmall,
1868
gpuStats.numDepthRasterEarlySize,
1869
gpuStats.numDepthRasterZCulled,
1870
gpuStats.numDepthEarlyBoxCulled,
1871
debugRecording_ ? "(debug-recording)" : ""
1872
);
1873
}
1874
1875