CoCalc -- ShaderUniforms.cpp

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/ShaderUniforms.cpp
Views: ¹⁴⁰¹
1
#include <algorithm>
2
#include <cmath>
3

4
#include "ShaderUniforms.h"
5
#include "Common/System/Display.h"
6
#include "Common/Data/Convert/SmallDataConvert.h"
7
#include "Common/Math/lin/matrix4x4.h"
8
#include "Common/Math/math_util.h"
9
#include "Common/Math/lin/vec3.h"
10
#include "GPU/GPUState.h"
11
#include "GPU/Common/FramebufferManagerCommon.h"
12
#include "GPU/Common/GPUStateUtils.h"
13
#include "GPU/Math3D.h"
14

15
using namespace Lin;
16

17
static void ConvertProjMatrixToVulkan(Matrix4x4 &in) {
18
	const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
19
	const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
20
	in.translateAndScale(trans, scale);
21
}
22

23
static void ConvertProjMatrixToD3D11(Matrix4x4 &in) {
24
	const Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
25
	const Vec3 scale(gstate_c.vpWidthScale, -gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
26
	in.translateAndScale(trans, scale);
27
}
28

29
void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bool hasNegZ) {
30
	// Account for the projection viewport adjustment when viewport is too large.
31
	auto reverseViewportX = [](float x) {
32
		float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());
33
		return (pspViewport * gstate_c.vpWidthScale) - gstate_c.vpXOffset;
34
	};
35
	auto reverseViewportY = [flipViewport](float y) {
36
		float heightScale = gstate_c.vpHeightScale;
37
		float yOffset = gstate_c.vpYOffset;
38
		if (flipViewport) {
39
			// For D3D11 and GLES non-buffered.
40
			heightScale = -heightScale;
41
			yOffset = -yOffset;
42
		}
43
		float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());
44
		return (pspViewport * heightScale) - yOffset;
45
	};
46
	auto transformZ = [hasNegZ](float z) {
47
		// Z culling ignores the viewport, so we just redo the projection matrix adjustments.
48
		if (hasNegZ) {
49
			return (z * gstate_c.vpDepthScale) + gstate_c.vpZOffset;
50
		}
51
		return (z * gstate_c.vpDepthScale * 0.5f) + gstate_c.vpZOffset * 0.5f + 0.5f;
52
	};
53
	auto sortPair = [](float a, float b) {
54
		return a > b ? std::make_pair(b, a) : std::make_pair(a, b);
55
	};
56

57
	// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.
58
	// Any vertex outside this range (unless depth clamp enabled) is discarded.
59
	auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));
60
	auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));
61
	auto z = sortPair(transformZ(-1.000030517578125f), transformZ(1.000030517578125f));
62
	// Since we have space in w, use it to pass the depth clamp flag.  We also pass NAN for w "discard".
63
	float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;
64

65
	minValues[0] = x.first;
66
	minValues[1] = y.first;
67
	minValues[2] = z.first;
68
	minValues[3] = clampEnable;
69
	maxValues[0] = x.second;
70
	maxValues[1] = y.second;
71
	maxValues[2] = z.second;
72
	maxValues[3] = NAN;
73
}
74

75
void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport, bool useBufferedRendering) {
76
	if (dirtyUniforms & DIRTY_TEXENV) {
77
		Uint8x3ToFloat3(ub->texEnvColor, gstate.texenvcolor);
78
	}
79
	if (dirtyUniforms & DIRTY_ALPHACOLORREF) {
80
		ub->alphaColorRef = gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24);
81
	}
82
	if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {
83
		ub->colorTestMask = gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24);
84
	}
85
	if (dirtyUniforms & DIRTY_FOGCOLOR) {
86
		Uint8x3ToFloat3(ub->fogColor, gstate.fogcolor);
87
	}
88
	if (dirtyUniforms & DIRTY_SHADERBLEND) {
89
		Uint8x3ToFloat3(ub->blendFixA, gstate.getFixA());
90
		Uint8x3ToFloat3(ub->blendFixB, gstate.getFixB());
91
	}
92
	if (dirtyUniforms & DIRTY_TEXCLAMP) {
93
		const float invW = 1.0f / (float)gstate_c.curTextureWidth;
94
		const float invH = 1.0f / (float)gstate_c.curTextureHeight;
95
		const int w = gstate.getTextureWidth(0);
96
		const int h = gstate.getTextureHeight(0);
97
		const float widthFactor = (float)w * invW;
98
		const float heightFactor = (float)h * invH;
99

100
		// First wrap xy, then half texel xy (for clamp.)
101
		ub->texClamp[0] = widthFactor;
102
		ub->texClamp[1] = heightFactor;
103
		ub->texClamp[2] = invW * 0.5f;
104
		ub->texClamp[3] = invH * 0.5f;
105
		ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW;
106
		ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH;
107
	}
108

109
	if (dirtyUniforms & DIRTY_MIPBIAS) {
110
		float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f);
111
		ub->mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1);
112
	}
113

114
	if (dirtyUniforms & DIRTY_PROJMATRIX) {
115
		Matrix4x4 flippedMatrix;
116
		memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
117

118
		const bool invertedY = gstate_c.vpHeight < 0;
119
		if (invertedY) {
120
			flippedMatrix[1] = -flippedMatrix[1];
121
			flippedMatrix[5] = -flippedMatrix[5];
122
			flippedMatrix[9] = -flippedMatrix[9];
123
			flippedMatrix[13] = -flippedMatrix[13];
124
		}
125
		const bool invertedX = gstate_c.vpWidth < 0;
126
		if (invertedX) {
127
			flippedMatrix[0] = -flippedMatrix[0];
128
			flippedMatrix[4] = -flippedMatrix[4];
129
			flippedMatrix[8] = -flippedMatrix[8];
130
			flippedMatrix[12] = -flippedMatrix[12];
131
		}
132
		if (flipViewport) {
133
			ConvertProjMatrixToD3D11(flippedMatrix);
134
		} else {
135
			ConvertProjMatrixToVulkan(flippedMatrix);
136
		}
137

138
		if (!useBufferedRendering && g_display.rotation != DisplayRotation::ROTATE_0) {
139
			flippedMatrix = flippedMatrix * g_display.rot_matrix;
140
		}
141
		CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr());
142

143
		ub->rotation = useBufferedRendering ? 0 : (float)g_display.rotation;
144
	}
145

146
	if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {
147
		Matrix4x4 proj_through;
148
		if (flipViewport) {
149
			proj_through.setOrthoD3D(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);
150
		} else {
151
			proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);
152
		}
153
		if (!useBufferedRendering && g_display.rotation != DisplayRotation::ROTATE_0) {
154
			proj_through = proj_through * g_display.rot_matrix;
155
		}
156

157
		// Negative RT offsets come from split framebuffers (Killzone)
158
		if (gstate_c.curRTOffsetX < 0 || gstate_c.curRTOffsetY < 0) {
159
			proj_through.wx += 2.0f * (float)gstate_c.curRTOffsetX / (float)gstate_c.curRTWidth;
160
			proj_through.wy += 2.0f * (float)gstate_c.curRTOffsetY / (float)gstate_c.curRTHeight;
161
		}
162

163
		CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());
164
	}
165

166
	// Transform
167
	if (dirtyUniforms & DIRTY_WORLDMATRIX) {
168
		ConvertMatrix4x3To3x4Transposed(ub->world, gstate.worldMatrix);
169
	}
170
	if (dirtyUniforms & DIRTY_VIEWMATRIX) {
171
		ConvertMatrix4x3To3x4Transposed(ub->view, gstate.viewMatrix);
172
	}
173
	if (dirtyUniforms & DIRTY_TEXMATRIX) {
174
		ConvertMatrix4x3To3x4Transposed(ub->tex, gstate.tgenMatrix);
175
	}
176

177
	if (dirtyUniforms & DIRTY_FOGCOEF) {
178
		float fogcoef[2] = {
179
			getFloat24(gstate.fog1),
180
			getFloat24(gstate.fog2),
181
		};
182
		// The PSP just ignores infnan here (ignoring IEEE), so take it down to a valid float.
183
		// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
184
		if (my_isnanorinf(fogcoef[0])) {
185
			// Not really sure what a sensible value might be, but let's try 64k.
186
			fogcoef[0] = std::signbit(fogcoef[0]) ? -65535.0f : 65535.0f;
187
		}
188
		if (my_isnanorinf(fogcoef[1])) {
189
			fogcoef[1] = std::signbit(fogcoef[1]) ? -65535.0f : 65535.0f;
190
		}
191
		CopyFloat2(ub->fogCoef, fogcoef);
192
	}
193

194
	if (dirtyUniforms & DIRTY_TEX_ALPHA_MUL) {
195
		bool doTextureAlpha = gstate.isTextureAlphaUsed();
196
		if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) {
197
			doTextureAlpha = false;
198
		}
199
		ub->texNoAlpha = doTextureAlpha ? 0.0f : 1.0f;
200
		ub->texMul = gstate.isColorDoublingEnabled() ? 2.0f : 1.0f;
201
	}
202

203
	if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {
204
		ub->stencilReplaceValue = (float)gstate.getStencilTestRef() * (1.0 / 255.0);
205
	}
206

207
	// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting
208
	if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {
209
		Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());
210
	}
211

212
	if (dirtyUniforms & DIRTY_COLORWRITEMASK) {
213
		ub->colorWriteMask = ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF));
214
	}
215

216
	// Texturing
217
	if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {
218
		float widthFactor = 1.0f;
219
		float heightFactor = 1.0f;
220
		if (gstate_c.textureIsFramebuffer) {
221
			const float invW = 1.0f / (float)gstate_c.curTextureWidth;
222
			const float invH = 1.0f / (float)gstate_c.curTextureHeight;
223
			const int w = gstate.getTextureWidth(0);
224
			const int h = gstate.getTextureHeight(0);
225
			widthFactor = (float)w * invW;
226
			heightFactor = (float)h * invH;
227
		}
228
		if (gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE) {
229
			// When we are generating UV coordinates through the bezier/spline, we need to apply the scaling.
230
			// However, this is missing a check that we're not getting our UV:s supplied for us in the vertices.
231
			ub->uvScaleOffset[0] = gstate_c.uv.uScale * widthFactor;
232
			ub->uvScaleOffset[1] = gstate_c.uv.vScale * heightFactor;
233
			ub->uvScaleOffset[2] = gstate_c.uv.uOff * widthFactor;
234
			ub->uvScaleOffset[3] = gstate_c.uv.vOff * heightFactor;
235
		} else {
236
			ub->uvScaleOffset[0] = widthFactor;
237
			ub->uvScaleOffset[1] = heightFactor;
238
			ub->uvScaleOffset[2] = 0.0f;
239
			ub->uvScaleOffset[3] = 0.0f;
240
		}
241
	}
242

243
	if (dirtyUniforms & DIRTY_DEPTHRANGE) {
244
		// Same formulas as D3D9 now. Should work for both Vulkan and D3D11.
245

246
		// Depth is [0, 1] mapping to [minz, maxz], not too hard.
247
		float vpZScale = gstate.getViewportZScale();
248
		float vpZCenter = gstate.getViewportZCenter();
249

250
		// These are just the reverse of the formulas in GPUStateUtils.
251
		float halfActualZRange = InfToZero(gstate_c.vpDepthScale != 0.0f ? vpZScale / gstate_c.vpDepthScale : 0.0f);
252
		float inverseDepthScale = InfToZero(gstate_c.vpDepthScale != 0.0f ? 1.0f / gstate_c.vpDepthScale : 0.0f);
253

254
		float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
255
		float viewZScale = halfActualZRange * 2.0f;
256
		float viewZCenter = minz;
257

258
		ub->depthRange[0] = viewZScale;
259
		ub->depthRange[1] = viewZCenter;
260
		ub->depthRange[2] = gstate_c.vpZOffset * 0.5f + 0.5f;
261
		ub->depthRange[3] = 2.0f * inverseDepthScale;
262
	}
263

264
	if (dirtyUniforms & DIRTY_CULLRANGE) {
265
		CalcCullRange(ub->cullRangeMin, ub->cullRangeMax, flipViewport, false);
266
	}
267

268
	if (dirtyUniforms & DIRTY_BEZIERSPLINE) {
269
		ub->spline_counts = gstate_c.spline_num_points_u;
270
	}
271

272
	if (dirtyUniforms & DIRTY_DEPAL) {
273
		int indexMask = gstate.getClutIndexMask();
274
		int indexShift = gstate.getClutIndexShift();
275
		int indexOffset = gstate.getClutIndexStartPos() >> 4;
276
		int format = gstate_c.depalFramebufferFormat;
277
		uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);
278
		// Poke in a bilinear filter flag in the top bit.
279
		if (gstate.isMagnifyFilteringEnabled())
280
			val |= 0x80000000;
281
		ub->depal_mask_shift_off_fmt = val;
282
	}
283
}
284

285
// For "light ubershader" bits.
286
// TODO: We pack these bits even when not using ubershader lighting. Maybe not bother.
287
uint32_t PackLightControlBits() {
288
	// Bit organization
289
	// Bottom 4 bits are enable bits for each light.
290
	// Then, for each light, comes 2 bits for "comp" and 2 bits for "type".
291
	// At the end, at bit 20, we put the three material update bits.
292

293
	uint32_t lightControl = 0;
294
	for (int i = 0; i < 4; i++) {
295
		if (gstate.isLightChanEnabled(i)) {
296
			lightControl |= 1 << i;
297
		}
298

299
		u32 computation = (u32)gstate.getLightComputation(i);  // 2 bits
300
		u32 type = (u32)gstate.getLightType(i);  // 2 bits
301
		if (type == 3) { type = 0; }  // Don't want to handle this degenerate case in the shader.
302
		lightControl |= computation << (4 + i * 4);
303
		lightControl |= type << (4 + i * 4 + 2);
304
	}
305

306
	// Material update is 3 bits.
307
	lightControl |= gstate.getMaterialUpdate() << 20;
308
	return lightControl;
309
}
310

311
void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {
312
	// Lighting
313
	if (dirtyUniforms & DIRTY_AMBIENT) {
314
		Uint8x3ToFloat4_AlphaUint8(ub->ambientColor, gstate.ambientcolor, gstate.getAmbientA());
315
	}
316
	if (dirtyUniforms & DIRTY_MATDIFFUSE) {
317
		Uint8x3ToFloat4(ub->materialDiffuse, gstate.materialdiffuse);
318
	}
319
	if (dirtyUniforms & DIRTY_MATSPECULAR) {
320
		Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, std::max(0.0f, getFloat24(gstate.materialspecularcoef)));
321
	}
322
	if (dirtyUniforms & DIRTY_MATEMISSIVE) {
323
		// We're not touching the fourth f32 here, because we store an u32 of control bits in it.
324
		Uint8x3ToFloat3(ub->materialEmissive, gstate.materialemissive);
325
	}
326
	if (dirtyUniforms & DIRTY_LIGHT_CONTROL) {
327
		ub->lightControl = PackLightControlBits();
328
	}
329
	for (int i = 0; i < 4; i++) {
330
		if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {
331
			if (gstate.isDirectionalLight(i)) {
332
				// Prenormalize
333
				ExpandFloat24x3ToFloat4AndNormalize(ub->lpos[i], &gstate.lpos[i * 3]);
334
			} else {
335
				ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]);
336
			}
337
			// ldir is only used for spotlights. Prenormalize it.
338
			ExpandFloat24x3ToFloat4AndNormalize(ub->ldir[i], &gstate.ldir[i * 3]);
339
			ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]);
340
			float lightAngle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
341
			CopyFloat2To4(ub->lightAngle_SpotCoef[i], lightAngle_spotCoef);
342
			Uint8x3ToFloat4(ub->lightAmbient[i], gstate.lcolor[i * 3]);
343
			Uint8x3ToFloat4(ub->lightDiffuse[i], gstate.lcolor[i * 3 + 1]);
344
			Uint8x3ToFloat4(ub->lightSpecular[i], gstate.lcolor[i * 3 + 2]);
345
		}
346
	}
347
}
348

349
void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {
350
	for (int i = 0; i < 8; i++) {
351
		if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {
352
			ConvertMatrix4x3To3x4Transposed(ub->bones[i], gstate.boneMatrix + 12 * i);
353
		}
354
	}
355
}
356

357
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

Product

Resources

Company