CoCalc -- GPUStateUtils.h

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/GPUStateUtils.h
Views: ¹⁴⁰¹
1
#pragma once
2

3
#include <cstdint>
4
#include "Common/CommonTypes.h"
5

6
#include "GPU/ge_constants.h"
7
#include "GPU/GPUState.h"
8

9
// TODO: Replace enums and structs with same from thin3d.h, for convenient mapping.
10

11
enum StencilValueType {
12
	STENCIL_VALUE_UNIFORM,
13
	STENCIL_VALUE_ZERO,
14
	STENCIL_VALUE_ONE,
15
	STENCIL_VALUE_KEEP,
16
	STENCIL_VALUE_INVERT,
17
	STENCIL_VALUE_INCR_4,
18
	STENCIL_VALUE_INCR_8,
19
	STENCIL_VALUE_DECR_4,
20
	STENCIL_VALUE_DECR_8,
21
};
22

23
enum ReplaceAlphaType {
24
	REPLACE_ALPHA_NO = 0,
25
	REPLACE_ALPHA_YES = 1,
26
	REPLACE_ALPHA_DUALSOURCE = 2,
27
};
28

29
enum ReplaceBlendType {
30
	REPLACE_BLEND_NO,  // Blend function handled directly with blend states.
31

32
	REPLACE_BLEND_STANDARD,
33

34
	// SRC part of blend function handled in-shader.
35
	REPLACE_BLEND_PRE_SRC,
36
	REPLACE_BLEND_PRE_SRC_2X_ALPHA,
37
	REPLACE_BLEND_2X_ALPHA,
38
	REPLACE_BLEND_2X_SRC,
39

40
	// Full blend equation runs in shader.
41
	// We might have to make a copy of the framebuffer target to read from.
42
	REPLACE_BLEND_READ_FRAMEBUFFER,
43

44
	// Color blend mode and color gets copied to alpha blend mode.
45
	REPLACE_BLEND_BLUE_TO_ALPHA,
46
};
47

48
enum SimulateLogicOpType {
49
	LOGICOPTYPE_NORMAL,
50
	LOGICOPTYPE_ONE,
51
	LOGICOPTYPE_INVERT,
52
};
53

54
bool IsAlphaTestTriviallyTrue();
55
bool IsColorTestAgainstZero();
56
bool IsColorTestTriviallyTrue();
57
bool IsAlphaTestAgainstZero();
58
bool NeedsTestDiscard();
59
bool IsDepthTestEffectivelyDisabled();
60
bool IsStencilTestOutputDisabled();
61

62
StencilValueType ReplaceAlphaWithStencilType();
63
ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend);
64
ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat);
65

66
// This is for the fallback path if real logic ops are not available.
67
SimulateLogicOpType SimulateLogicOpShaderTypeIfNeeded();
68

69
// Common representation, should be able to set this directly with any modern API.
70
struct ViewportAndScissor {
71
	int scissorX;
72
	int scissorY;
73
	int scissorW;
74
	int scissorH;
75
	float viewportX;
76
	float viewportY;
77
	float viewportW;
78
	float viewportH;
79
	float depthRangeMin;
80
	float depthRangeMax;
81
	float widthScale;
82
	float heightScale;
83
	float depthScale;
84
	float xOffset;
85
	float yOffset;
86
	float zOffset;
87
	bool throughMode;
88
};
89

90
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);
91
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor);
92

93
// NOTE: See the .cpp file for detailed comment about how the use flags are interpreted.
94
class DepthScaleFactors {
95
public:
96
	// This should only be used from GetDepthScaleFactors.
97
	DepthScaleFactors(double offset, double scale) : offset_(offset), scale_(scale) {}
98

99
	// Decodes a value from a depth buffer to a value of range 0..65536
100
	float DecodeToU16(float z) const {
101
		return (float)((z - offset_) * scale_);
102
	}
103

104
	// Encodes a value from the range 0..65536 to a normalized depth value (0-1), in the
105
	// range that we write to the depth buffer.
106
	float EncodeFromU16(float z_u16) const {
107
		return (float)(((double)z_u16 / scale_) + offset_);
108
	}
109

110
	float Offset() const { return (float)offset_; }
111

112
	float ScaleU16() const { return (float)scale_; }
113
	float Scale() const { return (float)(scale_ / 65535.0); }
114

115
private:
116
	// Doubles hardly cost anything these days, and precision matters here.
117
	double offset_;
118
	double scale_;
119
};
120

121
DepthScaleFactors GetDepthScaleFactors(u32 useFlags);
122

123
// These are common to all modern APIs and can be easily converted with a lookup table.
124
enum class BlendFactor : uint8_t {
125
	ZERO,
126
	ONE,
127
	SRC_COLOR,
128
	ONE_MINUS_SRC_COLOR,
129
	DST_COLOR,
130
	ONE_MINUS_DST_COLOR,
131
	SRC_ALPHA,
132
	ONE_MINUS_SRC_ALPHA,
133
	DST_ALPHA,
134
	ONE_MINUS_DST_ALPHA,
135
	CONSTANT_COLOR,
136
	ONE_MINUS_CONSTANT_COLOR,
137
	CONSTANT_ALPHA,
138
	ONE_MINUS_CONSTANT_ALPHA,
139
	SRC1_COLOR,
140
	ONE_MINUS_SRC1_COLOR,
141
	SRC1_ALPHA,
142
	ONE_MINUS_SRC1_ALPHA,
143
	INVALID,
144
	COUNT,
145
};
146

147
enum class BlendEq : uint8_t {
148
	ADD,
149
	SUBTRACT,
150
	REVERSE_SUBTRACT,
151
	MIN,
152
	MAX,
153
	COUNT
154
};
155

156
// Computed blend setup, including shader stuff.
157
struct GenericBlendState {
158
	bool applyFramebufferRead;
159
	bool dirtyShaderBlendFixValues;
160

161
	// Shader generation state
162
	ReplaceAlphaType replaceAlphaWithStencil;
163
	ReplaceBlendType replaceBlend;
164
	SimulateLogicOpType simulateLogicOpType;
165

166
	// Resulting hardware blend state
167
	bool blendEnabled;
168

169
	BlendFactor srcColor;
170
	BlendFactor dstColor;
171
	BlendFactor srcAlpha;
172
	BlendFactor dstAlpha;
173

174
	BlendEq eqColor;
175
	BlendEq eqAlpha;
176

177
	bool useBlendColor;
178
	u32 blendColor;
179

180
	void setFactors(BlendFactor srcC, BlendFactor dstC, BlendFactor srcA, BlendFactor dstA) {
181
		srcColor = srcC;
182
		dstColor = dstC;
183
		srcAlpha = srcA;
184
		dstAlpha = dstA;
185
	}
186
	void setEquation(BlendEq eqC, BlendEq eqA) {
187
		eqColor = eqC;
188
		eqAlpha = eqA;
189
	}
190
	void setBlendColor(uint32_t color, uint8_t alpha) {
191
		blendColor = color | ((uint32_t)alpha << 24);
192
		useBlendColor = true;
193
	}
194
	void defaultBlendColor(uint8_t alpha) {
195
		blendColor = 0xFFFFFF | ((uint32_t)alpha << 24);
196
		useBlendColor = true;
197
	}
198

199
	void Log();
200
};
201

202
void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState);
203

204
struct GenericMaskState {
205
	bool applyFramebufferRead;
206
	uint32_t uniformMask;  // For each bit, opposite to the PSP.
207

208
	// The hardware channel masks, 1 bit per color component. From bit 0, order is RGBA like in all APIs!
209
	uint8_t channelMask;
210

211
	void ConvertToShaderBlend() {
212
		// If we have to do it in the shader, we simply pass through all channels but mask only in the shader instead.
213
		// Some GPUs have minor penalties for masks that are not all-channels-on or all-channels-off.
214
		channelMask = 0xF;
215
		applyFramebufferRead = true;
216
	}
217

218
	void Log();
219
};
220

221
struct GenericStencilFuncState {
222
	bool enabled;
223
	GEComparison testFunc;
224
	u8 testRef;
225
	u8 testMask;
226
	u8 writeMask;
227
	GEStencilOp sFail;
228
	GEStencilOp zFail;
229
	GEStencilOp zPass;
230
};
231
void ConvertStencilFuncState(GenericStencilFuncState &stencilFuncState);
232

233
struct GenericLogicState {
234
	// If set, logic op is applied in the shader INSTEAD of in hardware.
235
	// In this case, simulateLogicOpType and all that should be off.
236
	bool applyFramebufferRead;
237

238
	// Hardware
239
	bool logicOpEnabled;
240

241
	// Hardware and shader generation
242
	GELogicOp logicOp;
243

244
	void ApplyToBlendState(GenericBlendState &blendState);
245
	void ConvertToShaderBlend() {
246
		if (logicOp != GE_LOGIC_COPY) {
247
			logicOpEnabled = false;
248
			applyFramebufferRead = true;
249
			// Same logicOp is kept.
250
		}
251
	}
252
};
253

254
struct ComputedPipelineState {
255
	GenericBlendState blendState;
256
	GenericMaskState maskState;
257
	GenericLogicState logicState;
258

259
	void Convert(bool shaderBitOpsSupported);
260

261
	bool FramebufferRead() const {
262
		// If blending is off, its applyFramebufferRead can be false even after state propagation.
263
		// So it's not enough to check just that one.
264
		return blendState.applyFramebufferRead || maskState.applyFramebufferRead || logicState.applyFramebufferRead;
265
	}
266
};
267

268
// See issue #15898
269
inline bool SpongebobDepthInverseConditions(const GenericStencilFuncState &stencilState) {
270
	// Check that the depth/stencil state matches the conditions exactly.
271
	// Always with a depth test that's not writing to the depth buffer (only stencil.)
272
	if (!gstate.isDepthTestEnabled() || gstate.isDepthWriteEnabled())
273
		return false;
274
	// Always GREATER_EQUAL, which we flip to LESS.
275
	if (gstate.getDepthTestFunction() != GE_COMP_GEQUAL)
276
		return false;
277

278
	// The whole purpose here is a depth fail that we need to write to alpha.
279
	if (stencilState.zFail != GE_STENCILOP_ZERO || stencilState.sFail != GE_STENCILOP_KEEP || stencilState.zPass != GE_STENCILOP_KEEP)
280
		return false;
281
	if (stencilState.testFunc != GE_COMP_ALWAYS || stencilState.writeMask != 0xFF)
282
		return false;
283

284
	// Lastly, verify no color is written.  Natural way is a mask, in case another game uses it.
285
	// Note that the PSP masks are reversed compared to typical APIs.
286
	if (gstate.getColorMask() == 0xFFFFFF00)
287
		return true;
288

289
	// These games specifically use simple alpha blending with a constant zero alpha.
290
	if (!gstate.isAlphaBlendEnabled() || gstate.getBlendFuncA() != GE_SRCBLEND_SRCALPHA || gstate.getBlendFuncB() != GE_DSTBLEND_INVSRCALPHA)
291
		return false;
292

293
	// Also make sure there's no texture, in case its alpha gets involved.
294
	if (gstate.isTextureMapEnabled())
295
		return false;
296

297
	// Spongebob uses material alpha.
298
	if (gstate.getMaterialAmbientA() == 0x00 && gstate.getMaterialUpdate() == 0)
299
		return true;
300
	// MX vs ATV : Reflex uses vertex colors, should really check them...
301
	if (gstate.getMaterialUpdate() == 1)
302
		return true;
303

304
	// Okay, color is most likely being used if we didn't hit the above.
305
	return false;
306
}
307

308
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

Product

Resources

Company