CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/GPUStateUtils.h
Views: 1401
1
#pragma once
2
3
#include <cstdint>
4
#include "Common/CommonTypes.h"
5
6
#include "GPU/ge_constants.h"
7
#include "GPU/GPUState.h"
8
9
// TODO: Replace enums and structs with same from thin3d.h, for convenient mapping.
10
11
enum StencilValueType {
12
STENCIL_VALUE_UNIFORM,
13
STENCIL_VALUE_ZERO,
14
STENCIL_VALUE_ONE,
15
STENCIL_VALUE_KEEP,
16
STENCIL_VALUE_INVERT,
17
STENCIL_VALUE_INCR_4,
18
STENCIL_VALUE_INCR_8,
19
STENCIL_VALUE_DECR_4,
20
STENCIL_VALUE_DECR_8,
21
};
22
23
enum ReplaceAlphaType {
24
REPLACE_ALPHA_NO = 0,
25
REPLACE_ALPHA_YES = 1,
26
REPLACE_ALPHA_DUALSOURCE = 2,
27
};
28
29
enum ReplaceBlendType {
30
REPLACE_BLEND_NO, // Blend function handled directly with blend states.
31
32
REPLACE_BLEND_STANDARD,
33
34
// SRC part of blend function handled in-shader.
35
REPLACE_BLEND_PRE_SRC,
36
REPLACE_BLEND_PRE_SRC_2X_ALPHA,
37
REPLACE_BLEND_2X_ALPHA,
38
REPLACE_BLEND_2X_SRC,
39
40
// Full blend equation runs in shader.
41
// We might have to make a copy of the framebuffer target to read from.
42
REPLACE_BLEND_READ_FRAMEBUFFER,
43
44
// Color blend mode and color gets copied to alpha blend mode.
45
REPLACE_BLEND_BLUE_TO_ALPHA,
46
};
47
48
enum SimulateLogicOpType {
49
LOGICOPTYPE_NORMAL,
50
LOGICOPTYPE_ONE,
51
LOGICOPTYPE_INVERT,
52
};
53
54
bool IsAlphaTestTriviallyTrue();
55
bool IsColorTestAgainstZero();
56
bool IsColorTestTriviallyTrue();
57
bool IsAlphaTestAgainstZero();
58
bool NeedsTestDiscard();
59
bool IsDepthTestEffectivelyDisabled();
60
bool IsStencilTestOutputDisabled();
61
62
StencilValueType ReplaceAlphaWithStencilType();
63
ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend);
64
ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat);
65
66
// This is for the fallback path if real logic ops are not available.
67
SimulateLogicOpType SimulateLogicOpShaderTypeIfNeeded();
68
69
// Common representation, should be able to set this directly with any modern API.
70
struct ViewportAndScissor {
71
int scissorX;
72
int scissorY;
73
int scissorW;
74
int scissorH;
75
float viewportX;
76
float viewportY;
77
float viewportW;
78
float viewportH;
79
float depthRangeMin;
80
float depthRangeMax;
81
float widthScale;
82
float heightScale;
83
float depthScale;
84
float xOffset;
85
float yOffset;
86
float zOffset;
87
bool throughMode;
88
};
89
90
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
91
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor);
92
93
// NOTE: See the .cpp file for detailed comment about how the use flags are interpreted.
94
class DepthScaleFactors {
95
public:
96
// This should only be used from GetDepthScaleFactors.
97
DepthScaleFactors(double offset, double scale) : offset_(offset), scale_(scale) {}
98
99
// Decodes a value from a depth buffer to a value of range 0..65536
100
float DecodeToU16(float z) const {
101
return (float)((z - offset_) * scale_);
102
}
103
104
// Encodes a value from the range 0..65536 to a normalized depth value (0-1), in the
105
// range that we write to the depth buffer.
106
float EncodeFromU16(float z_u16) const {
107
return (float)(((double)z_u16 / scale_) + offset_);
108
}
109
110
float Offset() const { return (float)offset_; }
111
112
float ScaleU16() const { return (float)scale_; }
113
float Scale() const { return (float)(scale_ / 65535.0); }
114
115
private:
116
// Doubles hardly cost anything these days, and precision matters here.
117
double offset_;
118
double scale_;
119
};
120
121
DepthScaleFactors GetDepthScaleFactors(u32 useFlags);
122
123
// These are common to all modern APIs and can be easily converted with a lookup table.
124
enum class BlendFactor : uint8_t {
125
ZERO,
126
ONE,
127
SRC_COLOR,
128
ONE_MINUS_SRC_COLOR,
129
DST_COLOR,
130
ONE_MINUS_DST_COLOR,
131
SRC_ALPHA,
132
ONE_MINUS_SRC_ALPHA,
133
DST_ALPHA,
134
ONE_MINUS_DST_ALPHA,
135
CONSTANT_COLOR,
136
ONE_MINUS_CONSTANT_COLOR,
137
CONSTANT_ALPHA,
138
ONE_MINUS_CONSTANT_ALPHA,
139
SRC1_COLOR,
140
ONE_MINUS_SRC1_COLOR,
141
SRC1_ALPHA,
142
ONE_MINUS_SRC1_ALPHA,
143
INVALID,
144
COUNT,
145
};
146
147
enum class BlendEq : uint8_t {
148
ADD,
149
SUBTRACT,
150
REVERSE_SUBTRACT,
151
MIN,
152
MAX,
153
COUNT
154
};
155
156
// Computed blend setup, including shader stuff.
157
struct GenericBlendState {
158
bool applyFramebufferRead;
159
bool dirtyShaderBlendFixValues;
160
161
// Shader generation state
162
ReplaceAlphaType replaceAlphaWithStencil;
163
ReplaceBlendType replaceBlend;
164
SimulateLogicOpType simulateLogicOpType;
165
166
// Resulting hardware blend state
167
bool blendEnabled;
168
169
BlendFactor srcColor;
170
BlendFactor dstColor;
171
BlendFactor srcAlpha;
172
BlendFactor dstAlpha;
173
174
BlendEq eqColor;
175
BlendEq eqAlpha;
176
177
bool useBlendColor;
178
u32 blendColor;
179
180
void setFactors(BlendFactor srcC, BlendFactor dstC, BlendFactor srcA, BlendFactor dstA) {
181
srcColor = srcC;
182
dstColor = dstC;
183
srcAlpha = srcA;
184
dstAlpha = dstA;
185
}
186
void setEquation(BlendEq eqC, BlendEq eqA) {
187
eqColor = eqC;
188
eqAlpha = eqA;
189
}
190
void setBlendColor(uint32_t color, uint8_t alpha) {
191
blendColor = color | ((uint32_t)alpha << 24);
192
useBlendColor = true;
193
}
194
void defaultBlendColor(uint8_t alpha) {
195
blendColor = 0xFFFFFF | ((uint32_t)alpha << 24);
196
useBlendColor = true;
197
}
198
199
void Log();
200
};
201
202
void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState);
203
204
struct GenericMaskState {
205
bool applyFramebufferRead;
206
uint32_t uniformMask; // For each bit, opposite to the PSP.
207
208
// The hardware channel masks, 1 bit per color component. From bit 0, order is RGBA like in all APIs!
209
uint8_t channelMask;
210
211
void ConvertToShaderBlend() {
212
// If we have to do it in the shader, we simply pass through all channels but mask only in the shader instead.
213
// Some GPUs have minor penalties for masks that are not all-channels-on or all-channels-off.
214
channelMask = 0xF;
215
applyFramebufferRead = true;
216
}
217
218
void Log();
219
};
220
221
struct GenericStencilFuncState {
222
bool enabled;
223
GEComparison testFunc;
224
u8 testRef;
225
u8 testMask;
226
u8 writeMask;
227
GEStencilOp sFail;
228
GEStencilOp zFail;
229
GEStencilOp zPass;
230
};
231
void ConvertStencilFuncState(GenericStencilFuncState &stencilFuncState);
232
233
struct GenericLogicState {
234
// If set, logic op is applied in the shader INSTEAD of in hardware.
235
// In this case, simulateLogicOpType and all that should be off.
236
bool applyFramebufferRead;
237
238
// Hardware
239
bool logicOpEnabled;
240
241
// Hardware and shader generation
242
GELogicOp logicOp;
243
244
void ApplyToBlendState(GenericBlendState &blendState);
245
void ConvertToShaderBlend() {
246
if (logicOp != GE_LOGIC_COPY) {
247
logicOpEnabled = false;
248
applyFramebufferRead = true;
249
// Same logicOp is kept.
250
}
251
}
252
};
253
254
struct ComputedPipelineState {
255
GenericBlendState blendState;
256
GenericMaskState maskState;
257
GenericLogicState logicState;
258
259
void Convert(bool shaderBitOpsSupported);
260
261
bool FramebufferRead() const {
262
// If blending is off, its applyFramebufferRead can be false even after state propagation.
263
// So it's not enough to check just that one.
264
return blendState.applyFramebufferRead || maskState.applyFramebufferRead || logicState.applyFramebufferRead;
265
}
266
};
267
268
// See issue #15898
269
inline bool SpongebobDepthInverseConditions(const GenericStencilFuncState &stencilState) {
270
// Check that the depth/stencil state matches the conditions exactly.
271
// Always with a depth test that's not writing to the depth buffer (only stencil.)
272
if (!gstate.isDepthTestEnabled() || gstate.isDepthWriteEnabled())
273
return false;
274
// Always GREATER_EQUAL, which we flip to LESS.
275
if (gstate.getDepthTestFunction() != GE_COMP_GEQUAL)
276
return false;
277
278
// The whole purpose here is a depth fail that we need to write to alpha.
279
if (stencilState.zFail != GE_STENCILOP_ZERO || stencilState.sFail != GE_STENCILOP_KEEP || stencilState.zPass != GE_STENCILOP_KEEP)
280
return false;
281
if (stencilState.testFunc != GE_COMP_ALWAYS || stencilState.writeMask != 0xFF)
282
return false;
283
284
// Lastly, verify no color is written. Natural way is a mask, in case another game uses it.
285
// Note that the PSP masks are reversed compared to typical APIs.
286
if (gstate.getColorMask() == 0xFFFFFF00)
287
return true;
288
289
// These games specifically use simple alpha blending with a constant zero alpha.
290
if (!gstate.isAlphaBlendEnabled() || gstate.getBlendFuncA() != GE_SRCBLEND_SRCALPHA || gstate.getBlendFuncB() != GE_DSTBLEND_INVSRCALPHA)
291
return false;
292
293
// Also make sure there's no texture, in case its alpha gets involved.
294
if (gstate.isTextureMapEnabled())
295
return false;
296
297
// Spongebob uses material alpha.
298
if (gstate.getMaterialAmbientA() == 0x00 && gstate.getMaterialUpdate() == 0)
299
return true;
300
// MX vs ATV : Reflex uses vertex colors, should really check them...
301
if (gstate.getMaterialUpdate() == 1)
302
return true;
303
304
// Okay, color is most likely being used if we didn't hit the above.
305
return false;
306
}
307
308