CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GPUState.h
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#pragma once
19
20
#include "ppsspp_config.h"
21
22
#include "Common/CommonTypes.h"
23
#include "Common/Swap.h"
24
#include "GPU/GPU.h"
25
#include "GPU/ge_constants.h"
26
#include "GPU/Common/ShaderCommon.h"
27
28
#if defined(_M_SSE)
29
#include <emmintrin.h>
30
#endif
31
#if PPSSPP_ARCH(ARM_NEON)
32
#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)
33
#include <arm64_neon.h>
34
#else
35
#include <arm_neon.h>
36
#endif
37
#endif
38
39
class PointerWrap;
40
41
struct GPUgstate {
42
// Getting rid of this ugly union in favor of the accessor functions
43
// might be a good idea....
44
union {
45
u32 cmdmem[256];
46
struct {
47
u32 nop,
48
vaddr,
49
iaddr,
50
pad00,
51
prim,
52
bezier,
53
spline,
54
boundBox,
55
jump,
56
bjump,
57
call,
58
ret,
59
end,
60
pad01,
61
signal,
62
finish,
63
base,
64
pad02,
65
vertType,
66
offsetAddr,
67
origin,
68
region1,
69
region2,
70
lightingEnable,
71
lightEnable[4],
72
depthClampEnable,
73
cullfaceEnable,
74
textureMapEnable, // 0x1E GE_CMD_TEXTUREMAPENABLE
75
fogEnable,
76
ditherEnable,
77
alphaBlendEnable,
78
alphaTestEnable,
79
zTestEnable,
80
stencilTestEnable,
81
antiAliasEnable,
82
patchCullEnable,
83
colorTestEnable,
84
logicOpEnable,
85
pad03,
86
boneMatrixNumber,
87
boneMatrixData,
88
morphwgt[8], //dont use
89
pad04[2],
90
patchdivision,
91
patchprimitive,
92
patchfacing,
93
pad04_a,
94
95
worldmtxnum, // 0x3A
96
worldmtxdata, // 0x3B
97
viewmtxnum, // 0x3C
98
viewmtxdata, // 0x3D
99
projmtxnum, // 0x3E
100
projmtxdata, // 0x3F
101
texmtxnum, // 0x40
102
texmtxdata, // 0x41
103
104
viewportxscale, // 0x42
105
viewportyscale, // 0x43
106
viewportzscale, // 0x44
107
viewportxcenter, // 0x45
108
viewportycenter, // 0x46
109
viewportzcenter, // 0x47
110
texscaleu, // 0x48
111
texscalev, // 0x49
112
texoffsetu, // 0x4A
113
texoffsetv, // 0x4B
114
offsetx, // 0x4C
115
offsety, // 0x4D
116
pad111[2],
117
shademodel, // 0x50
118
reversenormals, // 0x51
119
pad222,
120
materialupdate, // 0x53
121
materialemissive, // 0x54
122
materialambient, // 0x55
123
materialdiffuse, // 0x56
124
materialspecular, // 0x57
125
materialalpha, // 0x58
126
pad333[2],
127
materialspecularcoef, // 0x5B
128
ambientcolor, // 0x5C
129
ambientalpha, // 0x5D
130
lmode, // 0x5E GE_CMD_LIGHTMODE
131
ltype[4], // 0x5F-0x62 GE_CMD_LIGHTTYPEx
132
lpos[12], // 0x63-0x6E
133
ldir[12], // 0x6F-0x7A
134
latt[12], // 0x7B-0x86
135
lconv[4], // 0x87-0x8A
136
lcutoff[4], // 0x8B-0x8E
137
lcolor[12], // 0x8F-0x9A
138
cullmode, // 0x9B
139
fbptr, // 0x9C
140
fbwidth, // 0x9D
141
zbptr, // 0x9E
142
zbwidth, // 0x9F
143
texaddr[8], // 0xA0-0xA7
144
texbufwidth[8], // 0xA8-0xAF
145
clutaddr, // 0xB0
146
clutaddrupper, // 0xB1
147
transfersrc, // 0xB2
148
transfersrcw, // 0xB3
149
transferdst, // 0xB4
150
transferdstw, // 0xB5
151
padxxx[2],
152
texsize[8], // 0xB8-BF
153
texmapmode, // 0xC0
154
texshade, // 0xC1
155
texmode, // 0xC2 GE_CMD_TEXMODE
156
texformat, // 0xC3
157
loadclut, // 0xC4
158
clutformat, // 0xC5
159
texfilter, // 0xC6
160
texwrap, // 0xC7
161
texlevel, // 0xC8
162
texfunc, // 0xC9
163
texenvcolor, // 0xCA
164
texflush, // 0xCB
165
texsync, // 0xCC
166
fog1, // 0xCD
167
fog2, // 0xCE
168
fogcolor, // 0xCF
169
texlodslope, // 0xD0
170
padxxxxxx, // 0xD1
171
framebufpixformat, // 0xD2
172
clearmode, // 0xD3 GE_CMD_CLEARMODE
173
scissor1,
174
scissor2,
175
minz,
176
maxz,
177
colortest,
178
colorref,
179
colortestmask,
180
alphatest,
181
stenciltest,
182
stencilop,
183
ztestfunc,
184
blend,
185
blendfixa,
186
blendfixb,
187
dithmtx[4],
188
lop, // 0xE6
189
zmsk,
190
pmskc,
191
pmska,
192
transferstart,
193
transfersrcpos,
194
transferdstpos,
195
pad99,
196
transfersize, // 0xEE
197
pad100, // 0xEF
198
imm_vscx, // 0xF0
199
imm_vscy,
200
imm_vscz,
201
imm_vtcs,
202
imm_vtct,
203
imm_vtcq,
204
imm_cv,
205
imm_ap,
206
imm_fc,
207
imm_scv; // 0xF9
208
// In the unlikely case we ever add anything else here, don't forget to update the padding on the next line!
209
u32 pad05[0xFF- 0xF9];
210
};
211
};
212
213
// These are not directly mapped, instead these are loaded one-by-one through special commands.
214
// However, these are actual state, and can be read back.
215
float worldMatrix[12]; // 4x3
216
float viewMatrix[12]; // 4x3
217
float projMatrix[16]; // 4x4
218
float tgenMatrix[12]; // 4x3
219
float boneMatrix[12 * 8]; // Eight 4x3 bone matrices.
220
221
// We ignore the high bits of the framebuffer in fbwidth - even 0x08000000 renders to vRAM.
222
// The top bits of mirroring are also not respected, so we mask them away.
223
u32 getFrameBufRawAddress() const { return fbptr & 0x1FFFF0; }
224
// 0x44000000 is uncached VRAM.
225
u32 getFrameBufAddress() const { return 0x44000000 | getFrameBufRawAddress(); }
226
GEBufferFormat FrameBufFormat() const { return static_cast<GEBufferFormat>(framebufpixformat & 3); }
227
int FrameBufStride() const { return fbwidth&0x7FC; }
228
u32 getDepthBufRawAddress() const { return zbptr & 0x1FFFF0; }
229
u32 getDepthBufAddress() const { return 0x44600000 | getDepthBufRawAddress(); }
230
int DepthBufStride() const { return zbwidth&0x7FC; }
231
232
// Pixel Pipeline
233
bool isModeClear() const { return clearmode & 1; }
234
bool isFogEnabled() const { return fogEnable & 1; }
235
float getFogCoef1() const { return getFloat24(fog1); }
236
float getFogCoef2() const { return getFloat24(fog2); }
237
238
// Cull
239
bool isCullEnabled() const { return cullfaceEnable & 1; }
240
int getCullMode() const { return cullmode & 1; }
241
242
// Color Mask
243
bool isClearModeColorMask() const { return (clearmode&0x100) != 0; }
244
bool isClearModeAlphaMask() const { return (clearmode&0x200) != 0; }
245
bool isClearModeDepthMask() const { return (clearmode&0x400) != 0; }
246
u32 getClearModeColorMask() const { return ((clearmode&0x100) ? 0 : 0xFFFFFF) | ((clearmode&0x200) ? 0 : 0xFF000000); }
247
248
// Blend
249
GEBlendSrcFactor getBlendFuncA() const { return (GEBlendSrcFactor)(blend & 0xF); }
250
GEBlendDstFactor getBlendFuncB() const { return (GEBlendDstFactor)((blend >> 4) & 0xF); }
251
u32 getFixA() const { return blendfixa & 0xFFFFFF; }
252
u32 getFixB() const { return blendfixb & 0xFFFFFF; }
253
GEBlendMode getBlendEq() const { return static_cast<GEBlendMode>((blend >> 8) & 0x7); }
254
bool isAlphaBlendEnabled() const { return alphaBlendEnable & 1; }
255
256
// AntiAlias
257
bool isAntiAliasEnabled() const { return antiAliasEnable & 1; }
258
259
// Dither
260
bool isDitherEnabled() const { return ditherEnable & 1; }
261
int getDitherValue(int x, int y) const {
262
u8 raw = (dithmtx[y & 3] >> ((x & 3) * 4)) & 0xF;
263
// Apply sign extension to make 8-F negative, 0-7 positive.
264
return ((s8)(raw << 4)) >> 4;
265
}
266
267
// Color Mask
268
u32 getColorMask() const { return (pmskc & 0xFFFFFF) | ((pmska & 0xFF) << 24); }
269
u8 getStencilWriteMask() const { return pmska & 0xFF; }
270
bool isLogicOpEnabled() const { return logicOpEnable & 1; }
271
GELogicOp getLogicOp() const { return static_cast<GELogicOp>(lop & 0xF); }
272
273
// Depth Test
274
bool isDepthTestEnabled() const { return zTestEnable & 1; }
275
bool isDepthWriteEnabled() const { return !(zmsk & 1); }
276
GEComparison getDepthTestFunction() const { return static_cast<GEComparison>(ztestfunc & 0x7); }
277
u16 getDepthRangeMin() const { return minz & 0xFFFF; }
278
u16 getDepthRangeMax() const { return maxz & 0xFFFF; }
279
280
// Stencil Test
281
bool isStencilTestEnabled() const { return stencilTestEnable & 1; }
282
GEComparison getStencilTestFunction() const { return static_cast<GEComparison>(stenciltest & 0x7); }
283
int getStencilTestRef() const { return (stenciltest>>8) & 0xFF; }
284
int getStencilTestMask() const { return (stenciltest>>16) & 0xFF; }
285
GEStencilOp getStencilOpSFail() const { return static_cast<GEStencilOp>(stencilop & 0x7); }
286
GEStencilOp getStencilOpZFail() const { return static_cast<GEStencilOp>((stencilop>>8) & 0x7); }
287
GEStencilOp getStencilOpZPass() const { return static_cast<GEStencilOp>((stencilop>>16) & 0x7); }
288
289
// Alpha Test
290
bool isAlphaTestEnabled() const { return alphaTestEnable & 1; }
291
GEComparison getAlphaTestFunction() const { return static_cast<GEComparison>(alphatest & 0x7); }
292
int getAlphaTestRef() const { return (alphatest >> 8) & 0xFF; }
293
int getAlphaTestMask() const { return (alphatest >> 16) & 0xFF; }
294
295
// Color Test
296
bool isColorTestEnabled() const { return colorTestEnable & 1; }
297
GEComparison getColorTestFunction() const { return static_cast<GEComparison>(colortest & 0x3); }
298
u32 getColorTestRef() const { return colorref & 0xFFFFFF; }
299
u32 getColorTestMask() const { return colortestmask & 0xFFFFFF; }
300
301
// Texturing
302
// TODO: Verify getTextureAddress() alignment?
303
u32 getTextureAddress(int level) const { return (texaddr[level] & 0xFFFFF0) | ((texbufwidth[level] << 8) & 0x0F000000); }
304
int getTextureWidth(int level) const { return 1 << (texsize[level] & 0xf);}
305
int getTextureHeight(int level) const { return 1 << ((texsize[level] >> 8) & 0xf);}
306
u16 getTextureDimension(int level) const { return texsize[level] & 0xf0f;}
307
GETexLevelMode getTexLevelMode() const { return static_cast<GETexLevelMode>(texlevel & 0x3); }
308
int getTexLevelOffset16() const { return (int)(s8)((texlevel >> 16) & 0xFF); }
309
bool isTextureMapEnabled() const { return textureMapEnable & 1; }
310
GETexFunc getTextureFunction() const { return static_cast<GETexFunc>(texfunc & 0x7); }
311
bool isColorDoublingEnabled() const { return (texfunc & 0x10000) != 0; }
312
bool isTextureAlphaUsed() const { return (texfunc & 0x100) != 0; }
313
GETextureFormat getTextureFormat() const { return static_cast<GETextureFormat>(texformat & 0xF); }
314
bool isTextureFormatIndexed() const { return (texformat & 4) != 0; } // GE_TFMT_CLUT4 - GE_TFMT_CLUT32 are 0b1xx.
315
int getTextureEnvColRGB() const { return texenvcolor & 0x00FFFFFF; }
316
u32 getClutAddress() const { return (clutaddr & 0x00FFFFF0) | ((clutaddrupper << 8) & 0x0F000000); }
317
int getClutLoadBytes() const { return getClutLoadBlocks() * 32; }
318
int getClutLoadBlocks() const {
319
// The PSP only supports 0x3F, but Misshitsu no Sacrifice has extra color data (see #15727.)
320
// 0x40 would be 0, which would be a no-op, so we allow it.
321
if ((loadclut & 0x7F) == 0x40)
322
return 0x40;
323
return loadclut & 0x3F;
324
}
325
GEPaletteFormat getClutPaletteFormat() const { return static_cast<GEPaletteFormat>(clutformat & 3); }
326
int getClutIndexShift() const { return (clutformat >> 2) & 0x1F; }
327
int getClutIndexMask() const { return (clutformat >> 8) & 0xFF; }
328
int getClutIndexStartPos() const { return ((clutformat >> 16) & 0x1F) << 4; }
329
u32 transformClutIndex(u32 index) const {
330
// We need to wrap any entries beyond the first 1024 bytes.
331
u32 mask = getClutPaletteFormat() == GE_CMODE_32BIT_ABGR8888 ? 0xFF : 0x1FF;
332
return ((index >> getClutIndexShift()) & getClutIndexMask()) | (getClutIndexStartPos() & mask);
333
}
334
bool isClutIndexSimple() const { return (clutformat & ~3) == 0xC500FF00; } // Meaning, no special mask, shift, or start pos.
335
bool isTextureSwizzled() const { return texmode & 1; }
336
bool isClutSharedForMipmaps() const { return (texmode & 0x100) == 0; }
337
bool isMipmapEnabled() const { return (texfilter & 4) != 0; }
338
bool isMipmapFilteringEnabled() const { return (texfilter & 2) != 0; }
339
bool isMinifyFilteringEnabled() const { return (texfilter & 1) != 0; }
340
bool isMagnifyFilteringEnabled() const { return (texfilter >> 8) & 1; }
341
int getTextureMaxLevel() const { return (texmode >> 16) & 0x7; }
342
float getTextureLodSlope() const { return getFloat24(texlodslope); }
343
344
// Lighting
345
bool isLightingEnabled() const { return lightingEnable & 1; }
346
bool isLightChanEnabled(int chan) const { return lightEnable[chan] & 1; }
347
GELightComputation getLightComputation(int chan) const { return static_cast<GELightComputation>(ltype[chan] & 0x3); }
348
bool isUsingPoweredDiffuseLight(int chan) const { return getLightComputation(chan) == GE_LIGHTCOMP_ONLYPOWDIFFUSE; }
349
bool isUsingSpecularLight(int chan) const { return getLightComputation(chan) == GE_LIGHTCOMP_BOTH; }
350
bool isUsingSecondaryColor() const { return lmode & 1; }
351
GELightType getLightType(int chan) const { return static_cast<GELightType>((ltype[chan] >> 8) & 3); }
352
bool isDirectionalLight(int chan) const { return getLightType(chan) == GE_LIGHTTYPE_DIRECTIONAL; }
353
bool isPointLight(int chan) const { return getLightType(chan) == GE_LIGHTTYPE_POINT; }
354
bool isSpotLight(int chan) const { return getLightType(chan) >= GE_LIGHTTYPE_SPOT; }
355
GEShadeMode getShadeMode() const { return static_cast<GEShadeMode>(shademodel & 1); }
356
unsigned int getAmbientR() const { return ambientcolor&0xFF; }
357
unsigned int getAmbientG() const { return (ambientcolor>>8)&0xFF; }
358
unsigned int getAmbientB() const { return (ambientcolor>>16)&0xFF; }
359
unsigned int getAmbientA() const { return ambientalpha&0xFF; }
360
unsigned int getAmbientRGBA() const { return (ambientcolor&0xFFFFFF) | ((ambientalpha&0xFF)<<24); }
361
unsigned int getMaterialUpdate() const { return materialupdate & 7; }
362
unsigned int getMaterialAmbientR() const { return materialambient&0xFF; }
363
unsigned int getMaterialAmbientG() const { return (materialambient>>8)&0xFF; }
364
unsigned int getMaterialAmbientB() const { return (materialambient>>16)&0xFF; }
365
unsigned int getMaterialAmbientA() const { return materialalpha&0xFF; }
366
unsigned int getMaterialAmbientRGBA() const { return (materialambient & 0x00FFFFFF) | (materialalpha << 24); }
367
unsigned int getMaterialDiffuseR() const { return materialdiffuse&0xFF; }
368
unsigned int getMaterialDiffuseG() const { return (materialdiffuse>>8)&0xFF; }
369
unsigned int getMaterialDiffuseB() const { return (materialdiffuse>>16)&0xFF; }
370
unsigned int getMaterialDiffuse() const { return materialdiffuse & 0xffffff; }
371
unsigned int getMaterialEmissiveR() const { return materialemissive&0xFF; }
372
unsigned int getMaterialEmissiveG() const { return (materialemissive>>8)&0xFF; }
373
unsigned int getMaterialEmissiveB() const { return (materialemissive>>16)&0xFF; }
374
unsigned int getMaterialEmissive() const { return materialemissive & 0xffffff; }
375
unsigned int getMaterialSpecularR() const { return materialspecular&0xFF; }
376
unsigned int getMaterialSpecularG() const { return (materialspecular>>8)&0xFF; }
377
unsigned int getMaterialSpecularB() const { return (materialspecular>>16)&0xFF; }
378
unsigned int getMaterialSpecular() const { return materialspecular & 0xffffff; }
379
float getMaterialSpecularCoef() const { return getFloat24(materialspecularcoef); }
380
unsigned int getLightAmbientColorR(int chan) const { return lcolor[chan*3]&0xFF; }
381
unsigned int getLightAmbientColorG(int chan) const { return (lcolor[chan*3]>>8)&0xFF; }
382
unsigned int getLightAmbientColorB(int chan) const { return (lcolor[chan*3]>>16)&0xFF; }
383
unsigned int getLightAmbientColor(int chan) const { return lcolor[chan*3]&0xFFFFFF; }
384
unsigned int getDiffuseColorR(int chan) const { return lcolor[1+chan*3]&0xFF; }
385
unsigned int getDiffuseColorG(int chan) const { return (lcolor[1+chan*3]>>8)&0xFF; }
386
unsigned int getDiffuseColorB(int chan) const { return (lcolor[1+chan*3]>>16)&0xFF; }
387
unsigned int getDiffuseColor(int chan) const { return lcolor[1+chan*3]&0xFFFFFF; }
388
unsigned int getSpecularColorR(int chan) const { return lcolor[2+chan*3]&0xFF; }
389
unsigned int getSpecularColorG(int chan) const { return (lcolor[2+chan*3]>>8)&0xFF; }
390
unsigned int getSpecularColorB(int chan) const { return (lcolor[2+chan*3]>>16)&0xFF; }
391
unsigned int getSpecularColor(int chan) const { return lcolor[2+chan*3]&0xFFFFFF; }
392
393
int getPatchDivisionU() const { return patchdivision & 0x7F; }
394
int getPatchDivisionV() const { return (patchdivision >> 8) & 0x7F; }
395
396
// UV gen
397
GETexMapMode getUVGenMode() const { return static_cast<GETexMapMode>(texmapmode & 3);} // 2 bits
398
GETexProjMapMode getUVProjMode() const { return static_cast<GETexProjMapMode>((texmapmode >> 8) & 3);} // 2 bits
399
int getUVLS0() const { return texshade & 0x3; } // 2 bits
400
int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits
401
402
bool isTexCoordClampedS() const { return texwrap & 1; }
403
bool isTexCoordClampedT() const { return (texwrap >> 8) & 1; }
404
405
int getScissorX1() const { return scissor1 & 0x3FF; }
406
int getScissorY1() const { return (scissor1 >> 10) & 0x3FF; }
407
int getScissorX2() const { return scissor2 & 0x3FF; }
408
int getScissorY2() const { return (scissor2 >> 10) & 0x3FF; }
409
int getRegionRateX() const { return 0x100 + (region1 & 0x3FF); }
410
int getRegionRateY() const { return 0x100 + ((region1 >> 10) & 0x3FF); }
411
int getRegionX2() const { return (region2 & 0x3FF); }
412
int getRegionY2() const { return (region2 >> 10) & 0x3FF; }
413
414
bool isDepthClampEnabled() const { return depthClampEnable & 1; }
415
416
// Note that the X1/Y1/Z1 here does not mean the upper-left corner, but half the dimensions. X2/Y2/Z2 are the center.
417
float getViewportXScale() const { return getFloat24(viewportxscale); }
418
float getViewportYScale() const { return getFloat24(viewportyscale); }
419
float getViewportZScale() const { return getFloat24(viewportzscale); }
420
float getViewportXCenter() const { return getFloat24(viewportxcenter); }
421
float getViewportYCenter() const { return getFloat24(viewportycenter); }
422
float getViewportZCenter() const { return getFloat24(viewportzcenter); }
423
424
// Fixed 12.4 point.
425
int getOffsetX16() const { return offsetx & 0xFFFF; }
426
int getOffsetY16() const { return offsety & 0xFFFF; }
427
float getOffsetX() const { return (float)getOffsetX16() / 16.0f; }
428
float getOffsetY() const { return (float)getOffsetY16() / 16.0f; }
429
430
// Vertex type
431
bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; }
432
bool areNormalsReversed() const { return reversenormals & 1; }
433
bool isSkinningEnabled() const { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); }
434
int getNumMorphWeights() const { return ((vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT) + 1; }
435
436
GEPatchPrimType getPatchPrimitiveType() const { return static_cast<GEPatchPrimType>(patchprimitive & 3); }
437
bool isPatchNormalsReversed() const { return patchfacing & 1; }
438
439
// Transfers
440
u32 getTransferSrcAddress() const { return (transfersrc & 0xFFFFF0) | ((transfersrcw & 0xFF0000) << 8); }
441
// Bits 0xf800 are ignored, > 0x400 is treated as 0.
442
u32 getTransferSrcStride() const { int stride = transfersrcw & 0x7F8; return stride > 0x400 ? 0 : stride; }
443
int getTransferSrcX() const { return (transfersrcpos >> 0) & 0x3FF; }
444
int getTransferSrcY() const { return (transfersrcpos >> 10) & 0x3FF; }
445
u32 getTransferDstAddress() const { return (transferdst & 0xFFFFF0) | ((transferdstw & 0xFF0000) << 8); }
446
// Bits 0xf800 are ignored, > 0x400 is treated as 0.
447
u32 getTransferDstStride() const { int stride = transferdstw & 0x7F8; return stride > 0x400 ? 0 : stride; }
448
int getTransferDstX() const { return (transferdstpos >> 0) & 0x3FF; }
449
int getTransferDstY() const { return (transferdstpos >> 10) & 0x3FF; }
450
int getTransferWidth() const { return ((transfersize >> 0) & 0x3FF) + 1; }
451
int getTransferHeight() const { return ((transfersize >> 10) & 0x3FF) + 1; }
452
int getTransferBpp() const { return (transferstart & 1) ? 4 : 2; }
453
454
455
void FastLoadBoneMatrix(u32 addr);
456
457
// Real data in the context ends here
458
459
static void Reset();
460
void Save(u32_le *ptr);
461
void Restore(const u32_le *ptr);
462
};
463
464
bool vertTypeIsSkinningEnabled(u32 vertType);
465
466
inline int vertTypeGetNumBoneWeights(u32 vertType) { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); }
467
inline int vertTypeGetWeightMask(u32 vertType) { return vertType & GE_VTYPE_WEIGHT_MASK; }
468
469
// The rest is cached simplified/converted data for fast access.
470
// Does not need to be saved when saving/restoring context.
471
//
472
// Lots of this, however, is actual emulator state which must be saved when savestating.
473
// vertexAddr, indexAddr, offsetAddr for example.
474
475
struct UVScale {
476
float uScale, vScale;
477
float uOff, vOff;
478
};
479
480
#define FLAG_BIT(x) (1 << x)
481
482
// These flags are mainly to make sure that we make decisions on code path in a single
483
// location. Sometimes we need to take things into account in multiple places, it helps
484
// to centralize into flags like this. They're also fast to check since the cache line
485
// will be hot.
486
// NOTE: Do not forget to update the string array at the end of GPUState.cpp!
487
enum {
488
GPU_USE_DUALSOURCE_BLEND = FLAG_BIT(0),
489
GPU_USE_LIGHT_UBERSHADER = FLAG_BIT(1),
490
GPU_USE_FRAGMENT_TEST_CACHE = FLAG_BIT(2),
491
GPU_USE_VS_RANGE_CULLING = FLAG_BIT(3),
492
GPU_USE_BLEND_MINMAX = FLAG_BIT(4),
493
GPU_USE_LOGIC_OP = FLAG_BIT(5),
494
GPU_USE_FRAGMENT_UBERSHADER = FLAG_BIT(6),
495
GPU_USE_TEXTURE_NPOT = FLAG_BIT(7),
496
GPU_USE_ANISOTROPY = FLAG_BIT(8),
497
GPU_USE_CLEAR_RAM_HACK = FLAG_BIT(9),
498
GPU_USE_INSTANCE_RENDERING = FLAG_BIT(10),
499
GPU_USE_VERTEX_TEXTURE_FETCH = FLAG_BIT(11),
500
GPU_USE_TEXTURE_FLOAT = FLAG_BIT(12),
501
GPU_USE_16BIT_FORMATS = FLAG_BIT(13),
502
GPU_USE_DEPTH_CLAMP = FLAG_BIT(14),
503
GPU_USE_TEXTURE_LOD_CONTROL = FLAG_BIT(15),
504
GPU_USE_DEPTH_TEXTURE = FLAG_BIT(16),
505
GPU_USE_ACCURATE_DEPTH = FLAG_BIT(17),
506
GPU_USE_GS_CULLING = FLAG_BIT(18), // Geometry shader
507
GPU_USE_FRAMEBUFFER_ARRAYS = FLAG_BIT(19),
508
GPU_USE_FRAMEBUFFER_FETCH = FLAG_BIT(20),
509
GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21),
510
GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22),
511
GPU_ROUND_DEPTH_TO_16BIT = FLAG_BIT(23), // Can be disabled either per game or if we use a real 16-bit depth buffer
512
GPU_USE_CLIP_DISTANCE = FLAG_BIT(24),
513
GPU_USE_CULL_DISTANCE = FLAG_BIT(25),
514
515
// VR flags (reserved or in-use)
516
GPU_USE_VIRTUAL_REALITY = FLAG_BIT(29),
517
GPU_USE_SINGLE_PASS_STEREO = FLAG_BIT(30),
518
GPU_USE_SIMPLE_STEREO_PERSPECTIVE = FLAG_BIT(31),
519
};
520
521
// Note that this take a flag index, not the bit value.
522
const char *GpuUseFlagToString(int useFlag);
523
524
struct KnownVertexBounds {
525
u16 minU;
526
u16 minV;
527
u16 maxU;
528
u16 maxV;
529
};
530
531
enum class SubmitType {
532
DRAW,
533
BEZIER,
534
SPLINE,
535
HW_BEZIER,
536
HW_SPLINE,
537
};
538
539
extern GPUgstate gstate;
540
541
struct GPUStateCache {
542
bool Use(u32 flags) const { return (useFlags_ & flags) != 0; } // Return true if ANY of flags are true.
543
bool UseAll(u32 flags) const { return (useFlags_ & flags) == flags; } // Return true if ALL flags are true.
544
545
u32 UseFlags() const { return useFlags_; }
546
547
uint64_t GetDirtyUniforms() { return dirty & DIRTY_ALL_UNIFORMS; }
548
void Dirty(u64 what) {
549
dirty |= what;
550
}
551
void CleanUniforms() {
552
dirty &= ~DIRTY_ALL_UNIFORMS;
553
}
554
void Clean(u64 what) {
555
dirty &= ~what;
556
}
557
bool IsDirty(u64 what) const {
558
return (dirty & what) != 0ULL;
559
}
560
void SetUseShaderDepal(ShaderDepalMode mode) {
561
if (mode != shaderDepalMode) {
562
shaderDepalMode = mode;
563
Dirty(DIRTY_FRAGMENTSHADER_STATE);
564
}
565
}
566
void SetTextureFullAlpha(bool fullAlpha) {
567
if (fullAlpha != textureFullAlpha) {
568
textureFullAlpha = fullAlpha;
569
Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEX_ALPHA_MUL);
570
}
571
}
572
void SetNeedShaderTexclamp(bool need) {
573
if (need != needShaderTexClamp) {
574
needShaderTexClamp = need;
575
Dirty(DIRTY_FRAGMENTSHADER_STATE);
576
if (need)
577
Dirty(DIRTY_TEXCLAMP);
578
}
579
}
580
void SetTextureIs3D(bool is3D) {
581
if (is3D != curTextureIs3D) {
582
curTextureIs3D = is3D;
583
Dirty(DIRTY_FRAGMENTSHADER_STATE | (is3D ? DIRTY_MIPBIAS : 0));
584
}
585
}
586
void SetTextureIsArray(bool isArrayTexture) { // VK only
587
if (textureIsArray != isArrayTexture) {
588
textureIsArray = isArrayTexture;
589
Dirty(DIRTY_FRAGMENTSHADER_STATE);
590
}
591
}
592
void SetTextureIsVideo(bool isVideo) {
593
textureIsVideo = isVideo;
594
}
595
void SetTextureIsBGRA(bool isBGRA) {
596
if (bgraTexture != isBGRA) {
597
bgraTexture = isBGRA;
598
Dirty(DIRTY_FRAGMENTSHADER_STATE);
599
}
600
}
601
void SetTextureIsFramebuffer(bool isFramebuffer) {
602
if (textureIsFramebuffer != isFramebuffer) {
603
textureIsFramebuffer = isFramebuffer;
604
Dirty(DIRTY_UVSCALEOFFSET);
605
} else if (isFramebuffer) {
606
// Always dirty if it's a framebuffer, since the uniform value depends both
607
// on the specified texture size and the bound texture size. Makes things easier.
608
// TODO: Look at this again later.
609
Dirty(DIRTY_UVSCALEOFFSET);
610
}
611
}
612
void SetUseFlags(u32 newFlags) {
613
if (newFlags != useFlags_) {
614
if (useFlags_ != 0)
615
useFlagsChanged = true;
616
useFlags_ = newFlags;
617
}
618
}
619
620
// When checking for a single flag, use Use()/UseAll().
621
u32 GetUseFlags() const {
622
return useFlags_;
623
}
624
625
void UpdateUVScaleOffset() {
626
#if defined(_M_SSE)
627
__m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *)&gstate.texscaleu), 8);
628
_mm_storeu_si128((__m128i *)&uv, values);
629
#elif PPSSPP_ARCH(ARM_NEON)
630
const uint32x4_t values = vshlq_n_u32(vld1q_u32((const u32 *)&gstate.texscaleu), 8);
631
vst1q_u32((u32 *)&uv, values);
632
#else
633
uv.uScale = getFloat24(gstate.texscaleu);
634
uv.vScale = getFloat24(gstate.texscalev);
635
uv.uOff = getFloat24(gstate.texoffsetu);
636
uv.vOff = getFloat24(gstate.texoffsetv);
637
#endif
638
}
639
640
private:
641
u32 useFlags_;
642
public:
643
u32 vertexAddr;
644
u32 indexAddr;
645
u32 offsetAddr;
646
647
uint64_t dirty;
648
649
bool usingDepth; // For deferred depth copies.
650
bool clearingDepth;
651
652
bool textureFullAlpha;
653
bool vertexFullAlpha;
654
655
int skipDrawReason;
656
657
UVScale uv;
658
659
bool bgraTexture;
660
bool needShaderTexClamp;
661
bool textureIsArray;
662
bool textureIsFramebuffer;
663
bool textureIsVideo;
664
bool useFlagsChanged;
665
666
float morphWeights[8];
667
u32 deferredVertTypeDirty;
668
669
u32 curTextureWidth;
670
u32 curTextureHeight;
671
u32 actualTextureHeight;
672
// Only applied when needShaderTexClamp = true.
673
int curTextureXOffset;
674
int curTextureYOffset;
675
bool curTextureIs3D;
676
677
float vpWidth;
678
float vpHeight;
679
680
float vpXOffset;
681
float vpYOffset;
682
float vpZOffset;
683
float vpWidthScale;
684
float vpHeightScale;
685
float vpDepthScale;
686
687
KnownVertexBounds vertBounds;
688
689
GEBufferFormat framebufFormat;
690
// Some games use a very specific masking setup to draw into the alpha channel of a 4444 target using the blue channel of a 565 target.
691
// This is done because on PSP you can't write to destination alpha, other than stencil values, which can't be set from a texture.
692
// Examples of games that do this: Outrun, Split/Second.
693
// We detect this case and go into a special drawing mode.
694
bool blueToAlpha;
695
696
// U/V is 1:1 to pixels. Can influence texture sampling.
697
bool pixelMapped;
698
699
// TODO: These should be accessed from the current VFB object directly.
700
u32 curRTWidth;
701
u32 curRTHeight;
702
u32 curRTRenderWidth;
703
u32 curRTRenderHeight;
704
705
void SetCurRTOffset(int xoff, int yoff) {
706
if (xoff != curRTOffsetX || yoff != curRTOffsetY) {
707
curRTOffsetX = xoff;
708
curRTOffsetY = yoff;
709
Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_PROJTHROUGHMATRIX);
710
}
711
}
712
int curRTOffsetX;
713
int curRTOffsetY;
714
715
// Set if we are doing hardware bezier/spline.
716
SubmitType submitType;
717
int spline_num_points_u;
718
719
ShaderDepalMode shaderDepalMode;
720
GEBufferFormat depalFramebufferFormat;
721
722
u32 getRelativeAddress(u32 data) const;
723
static void Reset();
724
void DoState(PointerWrap &p);
725
};
726
727
class GPUInterface;
728
class GPUDebugInterface;
729
730
extern GPUStateCache gstate_c;
731
732
inline u32 GPUStateCache::getRelativeAddress(u32 data) const {
733
u32 baseExtended = ((gstate.base & 0x000F0000) << 8) | data;
734
return (gstate_c.offsetAddr + baseExtended) & 0x0FFFFFFF;
735
}
736
737