CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/FragmentShaderGenerator.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <cstdio>
19
#include <sstream>
20
21
#include "Common/Log.h"
22
#include "Common/StringUtils.h"
23
#include "Common/GPU/OpenGL/GLFeatures.h"
24
#include "Common/GPU/ShaderWriter.h"
25
#include "Common/GPU/thin3d.h"
26
#include "Core/Compatibility.h"
27
#include "Core/Config.h"
28
#include "Core/System.h"
29
#include "GPU/Common/GPUStateUtils.h"
30
#include "GPU/Common/ShaderId.h"
31
#include "GPU/Common/ShaderUniforms.h"
32
#include "GPU/Common/FragmentShaderGenerator.h"
33
#include "GPU/Vulkan/DrawEngineVulkan.h"
34
#include "GPU/ge_constants.h"
35
#include "GPU/GPUState.h"
36
37
#define WRITE(p, ...) p.F(__VA_ARGS__)
38
39
static const SamplerDef samplersMono[3] = {
40
{ 0, "tex" },
41
{ 1, "fbotex", SamplerFlags::ARRAY_ON_VULKAN },
42
{ 2, "pal" },
43
};
44
45
static const SamplerDef samplersStereo[3] = {
46
{ 0, "tex", SamplerFlags::ARRAY_ON_VULKAN },
47
{ 1, "fbotex", SamplerFlags::ARRAY_ON_VULKAN },
48
{ 2, "pal" },
49
};
50
51
bool GenerateFragmentShader(const FShaderID &id, char *buffer, const ShaderLanguageDesc &compat, Draw::Bugs bugs, uint64_t *uniformMask, FragmentShaderFlags *fragmentShaderFlags, std::string *errorString) {
52
*uniformMask = 0;
53
*fragmentShaderFlags = (FragmentShaderFlags)0;
54
errorString->clear();
55
56
bool useStereo = id.Bit(FS_BIT_STEREO);
57
bool highpFog = false;
58
bool highpTexcoord = false;
59
bool enableFragmentTestCache = gstate_c.Use(GPU_USE_FRAGMENT_TEST_CACHE);
60
61
if (compat.gles) {
62
// PowerVR needs highp to do the fog in MHU correctly.
63
// Others don't, and some can't handle highp in the fragment shader.
64
highpFog = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? true : false;
65
highpTexcoord = highpFog;
66
}
67
68
bool texture3D = id.Bit(FS_BIT_3D_TEXTURE);
69
bool arrayTexture = id.Bit(FS_BIT_SAMPLE_ARRAY_TEXTURE);
70
71
ReplaceAlphaType stencilToAlpha = static_cast<ReplaceAlphaType>(id.Bits(FS_BIT_STENCIL_TO_ALPHA, 2));
72
73
std::vector<const char*> extensions;
74
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
75
if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE && gl_extensions.EXT_blend_func_extended) {
76
extensions.push_back("#extension GL_EXT_blend_func_extended : require");
77
}
78
if (gl_extensions.EXT_gpu_shader4) {
79
extensions.push_back("#extension GL_EXT_gpu_shader4 : enable");
80
}
81
if (compat.framebufferFetchExtension) {
82
extensions.push_back(compat.framebufferFetchExtension);
83
}
84
if (gl_extensions.OES_texture_3D && texture3D) {
85
extensions.push_back("#extension GL_OES_texture_3D: enable");
86
}
87
}
88
89
ShaderWriterFlags flags = ShaderWriterFlags::NONE;
90
if (useStereo) {
91
flags |= ShaderWriterFlags::FS_AUTO_STEREO;
92
}
93
94
ShaderWriter p(buffer, compat, ShaderStage::Fragment, extensions, flags);
95
p.F("// %s\n", FragmentShaderDesc(id).c_str());
96
97
p.ApplySamplerMetadata(arrayTexture ? samplersStereo : samplersMono);
98
99
bool lmode = id.Bit(FS_BIT_LMODE);
100
bool doTexture = id.Bit(FS_BIT_DO_TEXTURE);
101
bool enableFog = id.Bit(FS_BIT_ENABLE_FOG);
102
bool enableAlphaTest = id.Bit(FS_BIT_ALPHA_TEST);
103
104
bool alphaTestAgainstZero = id.Bit(FS_BIT_ALPHA_AGAINST_ZERO);
105
bool testForceToZero = id.Bit(FS_BIT_TEST_DISCARD_TO_ZERO);
106
bool enableColorTest = id.Bit(FS_BIT_COLOR_TEST);
107
bool colorTestAgainstZero = id.Bit(FS_BIT_COLOR_AGAINST_ZERO);
108
bool doTextureProjection = id.Bit(FS_BIT_DO_TEXTURE_PROJ);
109
110
bool ubershader = id.Bit(FS_BIT_UBERSHADER);
111
// ubershader-controlled bits. If ubershader is on, these will not be used below (and will be false).
112
bool useTexAlpha = id.Bit(FS_BIT_TEXALPHA);
113
bool enableColorDouble = id.Bit(FS_BIT_DOUBLE_COLOR);
114
115
if (texture3D && arrayTexture) {
116
*errorString = "Invalid combination of 3D texture and array texture, shouldn't happen";
117
return false;
118
}
119
if (compat.shaderLanguage != ShaderLanguage::GLSL_VULKAN && arrayTexture) {
120
*errorString = "We only do array textures for framebuffers in Vulkan.";
121
return false;
122
}
123
124
bool flatBug = bugs.Has(Draw::Bugs::BROKEN_FLAT_IN_SHADER) && g_Config.bVendorBugChecksEnabled;
125
126
bool doFlatShading = id.Bit(FS_BIT_FLATSHADE) && !flatBug;
127
if (doFlatShading) {
128
*fragmentShaderFlags |= FragmentShaderFlags::USES_FLAT_SHADING;
129
}
130
131
ShaderDepalMode shaderDepalMode = (ShaderDepalMode)id.Bits(FS_BIT_SHADER_DEPAL_MODE, 2);
132
if (texture3D) {
133
shaderDepalMode = ShaderDepalMode::OFF;
134
}
135
if (!compat.bitwiseOps && shaderDepalMode != ShaderDepalMode::OFF) {
136
*errorString = "depal requires bitwise ops";
137
return false;
138
}
139
bool bgraTexture = id.Bit(FS_BIT_BGRA_TEXTURE);
140
bool colorWriteMask = id.Bit(FS_BIT_COLOR_WRITEMASK) && compat.bitwiseOps;
141
142
GEComparison alphaTestFunc = (GEComparison)id.Bits(FS_BIT_ALPHA_TEST_FUNC, 3);
143
GEComparison colorTestFunc = (GEComparison)id.Bits(FS_BIT_COLOR_TEST_FUNC, 2);
144
bool needShaderTexClamp = id.Bit(FS_BIT_SHADER_TEX_CLAMP);
145
146
GETexFunc texFunc = (GETexFunc)id.Bits(FS_BIT_TEXFUNC, 3);
147
148
ReplaceBlendType replaceBlend = static_cast<ReplaceBlendType>(id.Bits(FS_BIT_REPLACE_BLEND, 3));
149
150
bool blueToAlpha = false;
151
if (replaceBlend == ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA) {
152
blueToAlpha = true;
153
}
154
155
bool isModeClear = id.Bit(FS_BIT_CLEARMODE);
156
157
const char *shading = "";
158
if (compat.glslES30 || compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) {
159
shading = doFlatShading ? "flat" : "";
160
}
161
162
bool forceDepthWritesOff = id.Bit(FS_BIT_DEPTH_TEST_NEVER);
163
164
bool useDiscardStencilBugWorkaround = id.Bit(FS_BIT_NO_DEPTH_CANNOT_DISCARD_STENCIL) && !forceDepthWritesOff;
165
166
GEBlendSrcFactor replaceBlendFuncA = (GEBlendSrcFactor)id.Bits(FS_BIT_BLENDFUNC_A, 4);
167
GEBlendDstFactor replaceBlendFuncB = (GEBlendDstFactor)id.Bits(FS_BIT_BLENDFUNC_B, 4);
168
GEBlendMode replaceBlendEq = (GEBlendMode)id.Bits(FS_BIT_BLENDEQ, 3);
169
StencilValueType replaceAlphaWithStencilType = (StencilValueType)id.Bits(FS_BIT_REPLACE_ALPHA_WITH_STENCIL_TYPE, 4);
170
171
// Distinct from the logic op simulation support.
172
GELogicOp replaceLogicOpType = isModeClear ? GE_LOGIC_COPY : (GELogicOp)id.Bits(FS_BIT_REPLACE_LOGIC_OP, 4);
173
bool replaceLogicOp = replaceLogicOpType != GE_LOGIC_COPY && compat.bitwiseOps;
174
175
bool needFramebufferRead = replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER || colorWriteMask || replaceLogicOp;
176
177
bool fetchFramebuffer = needFramebufferRead && id.Bit(FS_BIT_USE_FRAMEBUFFER_FETCH);
178
bool readFramebufferTex = needFramebufferRead && !id.Bit(FS_BIT_USE_FRAMEBUFFER_FETCH);
179
180
if (fetchFramebuffer && (compat.shaderLanguage != GLSL_3xx || !compat.lastFragData)) {
181
*errorString = "framebuffer fetch requires GLSL 3xx";
182
return false;
183
}
184
185
bool needFragCoord = readFramebufferTex || gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT);
186
bool writeDepth = gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT) && !forceDepthWritesOff;
187
188
// TODO: We could have a separate mechanism to support more ops using the shader blending mechanism,
189
// on hardware that can do proper bit math in fragment shaders.
190
SimulateLogicOpType simulateLogicOpType = (SimulateLogicOpType)id.Bits(FS_BIT_SIMULATE_LOGIC_OP_TYPE, 2);
191
192
if (shaderDepalMode != ShaderDepalMode::OFF && !doTexture) {
193
*errorString = "depal requires a texture";
194
return false;
195
}
196
197
// Currently only used by Vulkan.
198
std::vector<SamplerDef> samplers;
199
200
if (compat.shaderLanguage == ShaderLanguage::GLSL_VULKAN) {
201
if (useDiscardStencilBugWorkaround && !writeDepth) {
202
WRITE(p, "layout (depth_unchanged) out float gl_FragDepth;\n");
203
}
204
205
WRITE(p, "layout (std140, set = 0, binding = %d) uniform baseUBO {\n%s};\n", DRAW_BINDING_DYNUBO_BASE, ub_baseStr);
206
if (doTexture) {
207
WRITE(p, "layout (set = 0, binding = %d) uniform %s%s tex;\n", DRAW_BINDING_TEXTURE, texture3D ? "sampler3D" : "sampler2D", arrayTexture ? "Array" : "");
208
}
209
210
if (readFramebufferTex) {
211
// The framebuffer texture is always bound as an array.
212
p.F("layout (set = 0, binding = %d) uniform sampler2DArray fbotex;\n", DRAW_BINDING_2ND_TEXTURE);
213
}
214
215
if (shaderDepalMode != ShaderDepalMode::OFF) {
216
WRITE(p, "layout (set = 0, binding = %d) uniform sampler2D pal;\n", DRAW_BINDING_DEPAL_TEXTURE);
217
}
218
219
// Note: the precision qualifiers must match the vertex shader!
220
WRITE(p, "layout (location = 1) %s in lowp vec4 v_color0;\n", shading);
221
if (lmode) {
222
WRITE(p, "layout (location = 2) %s in lowp vec3 v_color1;\n", shading);
223
}
224
WRITE(p, "layout (location = 3) in highp float v_fogdepth;\n");
225
if (doTexture) {
226
WRITE(p, "layout (location = 0) in highp vec3 v_texcoord;\n");
227
}
228
229
if (enableAlphaTest && !alphaTestAgainstZero) {
230
WRITE(p, "int roundAndScaleTo255i(in highp float x) { return int(floor(x * 255.0 + 0.5)); }\n");
231
}
232
if (enableColorTest && !colorTestAgainstZero) {
233
WRITE(p, "uint roundAndScaleTo8x4(in highp vec3 x) { uvec3 u = uvec3(floor(x * 255.0 + 0.5)); return u.r | (u.g << 8) | (u.b << 16); }\n");
234
WRITE(p, "uint packFloatsTo8x4(in vec3 x) { uvec3 u = uvec3(x); return u.r | (u.g << 8) | (u.b << 16); }\n");
235
}
236
237
WRITE(p, "layout (location = 0, index = 0) out vec4 fragColor0;\n");
238
if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
239
WRITE(p, "layout (location = 0, index = 1) out vec4 fragColor1;\n");
240
}
241
} else if (compat.shaderLanguage == HLSL_D3D11 || compat.shaderLanguage == HLSL_D3D9) {
242
if (compat.shaderLanguage == HLSL_D3D9) {
243
if (doTexture)
244
WRITE(p, "sampler tex : register(s0);\n");
245
246
if (readFramebufferTex) {
247
WRITE(p, "vec2 u_fbotexSize : register(c%i);\n", CONST_PS_FBOTEXSIZE);
248
WRITE(p, "sampler fbotex : register(s1);\n");
249
}
250
251
if (replaceBlend > REPLACE_BLEND_STANDARD) {
252
if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) {
253
WRITE(p, "float3 u_blendFixA : register(c%i);\n", CONST_PS_BLENDFIXA);
254
}
255
if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) {
256
WRITE(p, "float3 u_blendFixB : register(c%i);\n", CONST_PS_BLENDFIXB);
257
}
258
}
259
if (needShaderTexClamp && doTexture) {
260
WRITE(p, "vec4 u_texclamp : register(c%i);\n", CONST_PS_TEXCLAMP);
261
WRITE(p, "vec2 u_texclampoff : register(c%i);\n", CONST_PS_TEXCLAMPOFF);
262
}
263
264
if (enableAlphaTest || enableColorTest) {
265
WRITE(p, "vec4 u_alphacolorref : register(c%i);\n", CONST_PS_ALPHACOLORREF);
266
WRITE(p, "vec4 u_alphacolormask : register(c%i);\n", CONST_PS_ALPHACOLORMASK);
267
}
268
if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
269
WRITE(p, "float u_stencilReplaceValue : register(c%i);\n", CONST_PS_STENCILREPLACE);
270
}
271
if (doTexture) {
272
if (texFunc == GE_TEXFUNC_BLEND) {
273
WRITE(p, "float3 u_texenv : register(c%i);\n", CONST_PS_TEXENV);
274
}
275
if (ubershader) {
276
WRITE(p, "float2 u_texNoAlphaMul : register(c%i);\n", CONST_PS_TEX_NO_ALPHA_MUL);
277
}
278
}
279
if (enableFog) {
280
WRITE(p, "float3 u_fogcolor : register(c%i);\n", CONST_PS_FOGCOLOR);
281
}
282
if (texture3D) {
283
WRITE(p, "float u_mipBias : register(c%i);\n", CONST_PS_MIPBIAS);
284
}
285
} else {
286
WRITE(p, "SamplerState texSamp : register(s0);\n");
287
if (texture3D) {
288
WRITE(p, "Texture3D<vec4> tex : register(t0);\n");
289
} else {
290
WRITE(p, "Texture2D<vec4> tex : register(t0);\n");
291
}
292
if (readFramebufferTex) {
293
// No sampler required, we Load
294
WRITE(p, "Texture2D<vec4> fbotex : register(t1);\n");
295
}
296
297
if (shaderDepalMode != ShaderDepalMode::OFF) {
298
WRITE(p, "SamplerState palSamp : register(s3);\n");
299
WRITE(p, "Texture2D<vec4> pal : register(t3);\n");
300
WRITE(p, "float2 textureSize(Texture2D<float4> tex, int mip) { float2 size; tex.GetDimensions(size.x, size.y); return size; }\n");
301
}
302
303
WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr);
304
}
305
306
if (enableAlphaTest) {
307
if (compat.shaderLanguage == HLSL_D3D11) {
308
WRITE(p, "int roundAndScaleTo255i(float x) { return int(floor(x * 255.0f + 0.5f)); }\n");
309
} else {
310
// D3D11 level 9 gets to take this path.
311
WRITE(p, "float roundAndScaleTo255f(float x) { return floor(x * 255.0f + 0.5f); }\n");
312
}
313
}
314
if (enableColorTest) {
315
if (compat.shaderLanguage == HLSL_D3D11) {
316
WRITE(p, "uint roundAndScaleTo8x4(float3 x) { uvec3 u = (floor(x * 255.0f + 0.5f)); return u.r | (u.g << 8) | (u.b << 16); }\n");
317
WRITE(p, "uint packFloatsTo8x4(in vec3 x) { uvec3 u = uvec3(x); return u.r | (u.g << 8) | (u.b << 16); }\n");
318
} else {
319
WRITE(p, "vec3 roundAndScaleTo255v(float3 x) { return floor(x * 255.0f + 0.5f); }\n");
320
}
321
}
322
323
WRITE(p, "struct PS_IN {\n");
324
if (doTexture || compat.shaderLanguage == HLSL_D3D11) {
325
// In D3D11, if we always have a texcoord in the VS, we always need it in the PS too for the structs to match.
326
WRITE(p, " vec3 v_texcoord: TEXCOORD0;\n");
327
}
328
const char *colorInterpolation = doFlatShading && compat.shaderLanguage == HLSL_D3D11 ? "nointerpolation " : "";
329
WRITE(p, " %svec4 v_color0: COLOR0;\n", colorInterpolation);
330
if (lmode) {
331
WRITE(p, " vec3 v_color1: COLOR1;\n");
332
}
333
WRITE(p, " float v_fogdepth: TEXCOORD1;\n");
334
if (needFragCoord) {
335
if (compat.shaderLanguage == HLSL_D3D11) {
336
WRITE(p, " vec4 pixelPos : SV_POSITION;\n");
337
} else if (compat.shaderLanguage == HLSL_D3D9) {
338
WRITE(p, " vec4 pixelPos : VPOS;\n"); // VPOS is only supported for Shader Model 3.0, but we can probably forget about D3D9 SM2.0 at this point...
339
}
340
}
341
WRITE(p, "};\n");
342
343
if (compat.shaderLanguage == HLSL_D3D11) {
344
WRITE(p, "struct PS_OUT {\n");
345
if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
346
WRITE(p, " vec4 target : SV_Target0;\n");
347
WRITE(p, " vec4 target1 : SV_Target1;\n");
348
} else {
349
WRITE(p, " vec4 target : SV_Target;\n");
350
}
351
if (writeDepth) {
352
WRITE(p, " float depth : SV_Depth;\n");
353
}
354
WRITE(p, "};\n");
355
} else if (compat.shaderLanguage == HLSL_D3D9) {
356
WRITE(p, "struct PS_OUT {\n");
357
WRITE(p, " vec4 target : COLOR;\n");
358
if (writeDepth) {
359
WRITE(p, " float depth : DEPTH;\n");
360
}
361
WRITE(p, "};\n");
362
}
363
} else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
364
if ((shaderDepalMode != ShaderDepalMode::OFF || colorWriteMask) && gl_extensions.IsGLES) {
365
WRITE(p, "precision highp int;\n");
366
}
367
368
if (doTexture) {
369
if (texture3D) {
370
// For whatever reason, a precision specifier is required here.
371
WRITE(p, "uniform lowp sampler3D tex;\n");
372
} else {
373
WRITE(p, "uniform sampler2D tex;\n");
374
}
375
*uniformMask |= DIRTY_TEX_ALPHA_MUL;
376
if (ubershader) {
377
WRITE(p, "uniform vec2 u_texNoAlphaMul;\n");
378
}
379
}
380
381
if (readFramebufferTex) {
382
if (!compat.texelFetch) {
383
WRITE(p, "uniform vec2 u_fbotexSize;\n");
384
}
385
WRITE(p, "uniform sampler2D fbotex;\n");
386
}
387
388
if (!isModeClear && replaceBlend > REPLACE_BLEND_STANDARD) {
389
*uniformMask |= DIRTY_SHADERBLEND;
390
if (replaceBlendFuncA >= GE_SRCBLEND_FIXA) {
391
WRITE(p, "uniform vec3 u_blendFixA;\n");
392
}
393
if (replaceBlendFuncB >= GE_DSTBLEND_FIXB) {
394
WRITE(p, "uniform vec3 u_blendFixB;\n");
395
}
396
}
397
398
if (needShaderTexClamp && doTexture) {
399
*uniformMask |= DIRTY_TEXCLAMP;
400
WRITE(p, "uniform vec4 u_texclamp;\n");
401
WRITE(p, "uniform vec2 u_texclampoff;\n");
402
}
403
404
// TODO: Can get rid of some of this in the != 0 cases.
405
if (enableAlphaTest || enableColorTest) {
406
if (enableFragmentTestCache) {
407
WRITE(p, "uniform sampler2D testtex;\n");
408
} else {
409
*uniformMask |= DIRTY_ALPHACOLORREF;
410
if (compat.bitwiseOps) {
411
WRITE(p, "uniform uint u_alphacolorref;\n");
412
} else {
413
WRITE(p, "uniform vec4 u_alphacolorref;\n");
414
}
415
if (compat.bitwiseOps && ((enableColorTest && !colorTestAgainstZero) || (enableAlphaTest && !alphaTestAgainstZero))) {
416
*uniformMask |= DIRTY_ALPHACOLORMASK;
417
WRITE(p, "uniform uint u_alphacolormask;\n");
418
}
419
}
420
}
421
422
if (shaderDepalMode != ShaderDepalMode::OFF) {
423
WRITE(p, "uniform sampler2D pal;\n");
424
WRITE(p, "uniform uint u_depal_mask_shift_off_fmt;\n");
425
*uniformMask |= DIRTY_DEPAL;
426
}
427
428
if (colorWriteMask) {
429
WRITE(p, "uniform uint u_colorWriteMask;\n");
430
*uniformMask |= DIRTY_COLORWRITEMASK;
431
}
432
433
if (stencilToAlpha && replaceAlphaWithStencilType == STENCIL_VALUE_UNIFORM) {
434
*uniformMask |= DIRTY_STENCILREPLACEVALUE;
435
WRITE(p, "uniform float u_stencilReplaceValue;\n");
436
}
437
if (doTexture && texFunc == GE_TEXFUNC_BLEND) {
438
*uniformMask |= DIRTY_TEXENV;
439
WRITE(p, "uniform vec3 u_texenv;\n");
440
}
441
442
if (texture3D) {
443
*uniformMask |= DIRTY_MIPBIAS;
444
WRITE(p, "uniform float u_mipBias;\n");
445
}
446
447
WRITE(p, "%s %s lowp vec4 v_color0;\n", shading, compat.varying_fs);
448
if (lmode) {
449
WRITE(p, "%s %s lowp vec3 v_color1;\n", shading, compat.varying_fs);
450
}
451
if (enableFog) {
452
*uniformMask |= DIRTY_FOGCOLOR;
453
WRITE(p, "uniform vec3 u_fogcolor;\n");
454
}
455
WRITE(p, "%s %s float v_fogdepth;\n", compat.varying_fs, highpFog ? "highp" : "mediump");
456
if (doTexture) {
457
WRITE(p, "%s %s vec3 v_texcoord;\n", compat.varying_fs, highpTexcoord ? "highp" : "mediump");
458
}
459
460
if (!enableFragmentTestCache) {
461
if (enableAlphaTest && !alphaTestAgainstZero) {
462
if (compat.bitwiseOps) {
463
WRITE(p, "int roundAndScaleTo255i(in float x) { return int(floor(x * 255.0 + 0.5)); }\n");
464
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
465
WRITE(p, "float roundTo255thf(in mediump float x) { mediump float y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
466
} else {
467
WRITE(p, "float roundAndScaleTo255f(in float x) { return floor(x * 255.0 + 0.5); }\n");
468
}
469
}
470
if (enableColorTest && !colorTestAgainstZero) {
471
if (compat.bitwiseOps) {
472
WRITE(p, "uint roundAndScaleTo8x4(in vec3 x) { uvec3 u = uvec3(floor(x * 255.92)); return u.r | (u.g << 0x8u) | (u.b << 0x10u); }\n");
473
WRITE(p, "uint packFloatsTo8x4(in vec3 x) { uvec3 u = uvec3(x); return u.r | (u.g << 0x8u) | (u.b << 0x10u); }\n");
474
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
475
WRITE(p, "vec3 roundTo255thv(in vec3 x) { vec3 y = x + (0.5/255.0); return y - fract(y * 255.0) * (1.0 / 255.0); }\n");
476
} else {
477
WRITE(p, "vec3 roundAndScaleTo255v(in vec3 x) { return floor(x * 255.0 + 0.5); }\n");
478
}
479
}
480
}
481
482
if (!strcmp(compat.fragColor0, "fragColor0")) {
483
const char *qualifierColor0 = "out";
484
if (fetchFramebuffer && compat.lastFragData && !strcmp(compat.lastFragData, compat.fragColor0)) {
485
qualifierColor0 = "inout";
486
}
487
// Output the output color definitions.
488
if (stencilToAlpha == REPLACE_ALPHA_DUALSOURCE) {
489
WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0);
490
WRITE(p, "out vec4 fragColor1;\n");
491
} else {
492
WRITE(p, "%s vec4 fragColor0;\n", qualifierColor0);
493
}
494
}
495
}
496
497
bool hasPackUnorm4x8 = false;
498
if (compat.shaderLanguage == GLSL_VULKAN) {
499
hasPackUnorm4x8 = true;
500
} else if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
501
if (compat.gles) {
502
hasPackUnorm4x8 = compat.glslVersionNumber >= 310;
503
} else {
504
hasPackUnorm4x8 = compat.glslVersionNumber >= 400;
505
}
506
}
507
508
const char *packSuffix = "";
509
if (!hasPackUnorm4x8) {
510
packSuffix = "R";
511
}
512
513
// Provide implementations of packUnorm4x8 and unpackUnorm4x8 if not available.
514
if ((colorWriteMask || replaceLogicOp) && !hasPackUnorm4x8) {
515
WRITE(p, "uint packUnorm4x8%s(%svec4 v) {\n", packSuffix, compat.shaderLanguage == GLSL_VULKAN ? "highp " : "");
516
WRITE(p, " highp vec4 f = clamp(v, 0.0, 1.0);\n");
517
WRITE(p, " uvec4 u = uvec4(255.0 * f);\n");
518
WRITE(p, " return u.x | (u.y << 0x8u) | (u.z << 0x10u) | (u.w << 0x18u);\n");
519
WRITE(p, "}\n");
520
521
WRITE(p, "vec4 unpackUnorm4x8%s(highp uint x) {\n", packSuffix);
522
WRITE(p, " highp uvec4 u = uvec4(x & 0xFFu, (x >> 0x8u) & 0xFFu, (x >> 0x10u) & 0xFFu, (x >> 0x18u) & 0xFFu);\n");
523
WRITE(p, " highp vec4 f = vec4(u);\n");
524
WRITE(p, " return f * (1.0 / 255.0);\n");
525
WRITE(p, "}\n");
526
}
527
528
if (compat.bitwiseOps && enableColorTest) {
529
p.C("uvec3 unpackUVec3(highp uint x) {\n");
530
p.C(" return uvec3(x & 0xFFu, (x >> 0x8u) & 0xFFu, (x >> 0x10u) & 0xFFu);\n");
531
p.C("}\n");
532
}
533
534
// PowerVR needs a custom modulo function. For some reason, this has far higher precision than the builtin one.
535
if ((gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) && needShaderTexClamp) {
536
WRITE(p, "float mymod(float a, float b) { return a - b * floor(a / b); }\n");
537
}
538
539
if (compat.shaderLanguage == HLSL_D3D11) {
540
WRITE(p, "PS_OUT main( PS_IN In ) {\n");
541
WRITE(p, " PS_OUT outfragment;\n");
542
if (needFragCoord) {
543
WRITE(p, " vec4 gl_FragCoord = In.pixelPos;\n");
544
}
545
if (writeDepth) {
546
WRITE(p, " float gl_FragDepth;\n");
547
}
548
} else if (compat.shaderLanguage == HLSL_D3D9) {
549
WRITE(p, "PS_OUT main( PS_IN In ) {\n");
550
WRITE(p, " PS_OUT outfragment;\n");
551
if (needFragCoord) {
552
WRITE(p, " vec4 gl_FragCoord = In.pixelPos;\n");
553
}
554
} else {
555
WRITE(p, "void main() {\n");
556
}
557
558
if (compat.shaderLanguage == HLSL_D3D11 || compat.shaderLanguage == HLSL_D3D9) {
559
WRITE(p, " vec4 v_color0 = In.v_color0;\n");
560
if (lmode) {
561
WRITE(p, " vec3 v_color1 = In.v_color1;\n");
562
}
563
if (enableFog) {
564
WRITE(p, " float v_fogdepth = In.v_fogdepth;\n");
565
}
566
if (doTexture) {
567
WRITE(p, " vec3 v_texcoord = In.v_texcoord;\n");
568
}
569
}
570
571
// Two things read from the old framebuffer - shader replacement blending and bit-level masking.
572
if (readFramebufferTex) {
573
if (compat.shaderLanguage == HLSL_D3D11) {
574
WRITE(p, " vec4 destColor = fbotex.Load(int3((int)gl_FragCoord.x, (int)gl_FragCoord.y, 0));\n");
575
} else if (compat.shaderLanguage == HLSL_D3D9) {
576
WRITE(p, " vec4 destColor = tex2D(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
577
} else if (compat.shaderLanguage == GLSL_VULKAN) {
578
WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec3(gl_FragCoord.x, gl_FragCoord.y, %s), 0);\n", compat.texelFetch, useStereo ? "float(gl_ViewIndex)" : "0");
579
} else if (!compat.texelFetch) {
580
WRITE(p, " lowp vec4 destColor = %s(fbotex, gl_FragCoord.xy * u_fbotexSize.xy);\n", compat.texture);
581
} else {
582
WRITE(p, " lowp vec4 destColor = %s(fbotex, ivec2(gl_FragCoord.x, gl_FragCoord.y), 0);\n", compat.texelFetch);
583
}
584
} else if (fetchFramebuffer) {
585
// If we have EXT_shader_framebuffer_fetch / ARM_shader_framebuffer_fetch, we skip the blit.
586
// We can just read the prev value more directly.
587
if (compat.shaderLanguage == GLSL_3xx) {
588
WRITE(p, " lowp vec4 destColor = %s;\n", compat.lastFragData);
589
} else if (compat.shaderLanguage == GLSL_VULKAN) {
590
WRITE(p, " lowp vec4 destColor = subpassLoad(inputColor);\n");
591
} else {
592
_assert_msg_(false, "Need fetch destColor, but not a compatible language");
593
}
594
}
595
596
if (isModeClear) {
597
// Clear mode does not allow any fancy shading.
598
WRITE(p, " vec4 v = v_color0;\n");
599
} else {
600
const char *secondary = "";
601
// Secondary color for specular on top of texture
602
if (lmode) {
603
WRITE(p, " vec4 s = vec4(v_color1, 0.0);\n");
604
secondary = " + s";
605
}
606
607
if (doTexture) {
608
char texcoord[64] = "v_texcoord";
609
// TODO: Not sure the right way to do this for projection.
610
// This path destroys resolution on older PowerVR no matter what I do if projection is needed,
611
// so we disable it on SGX 540 and lesser, and live with the consequences.
612
bool terriblePrecision = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_TERRIBLE) != 0;
613
bool clampDisabled = doTextureProjection && terriblePrecision;
614
// Also with terrible precision we can't do wrapping without destroying the image. See #9189
615
if (terriblePrecision && (!id.Bit(FS_BIT_CLAMP_S) || !id.Bit(FS_BIT_CLAMP_T))) {
616
clampDisabled = true;
617
}
618
if (needShaderTexClamp && !clampDisabled) {
619
// We may be clamping inside a larger surface (tex = 64x64, buffer=480x272).
620
// We may also be wrapping in such a surface, or either one in a too-small surface.
621
// Obviously, clamping to a smaller surface won't work. But better to clamp to something.
622
std::string ucoord = "v_texcoord.x";
623
std::string vcoord = "v_texcoord.y";
624
if (doTextureProjection) {
625
ucoord = "(v_texcoord.x / v_texcoord.z)";
626
vcoord = "(v_texcoord.y / v_texcoord.z)";
627
}
628
629
std::string modulo = (gl_extensions.bugs & BUG_PVR_SHADER_PRECISION_BAD) ? "mymod" : "mod";
630
631
if (id.Bit(FS_BIT_CLAMP_S)) {
632
ucoord = "clamp(" + ucoord + ", u_texclamp.z, u_texclamp.x - u_texclamp.z)";
633
} else {
634
ucoord = modulo + "(" + ucoord + ", u_texclamp.x)";
635
}
636
if (id.Bit(FS_BIT_CLAMP_T)) {
637
vcoord = "clamp(" + vcoord + ", u_texclamp.w, u_texclamp.y - u_texclamp.w)";
638
} else {
639
vcoord = modulo + "(" + vcoord + ", u_texclamp.y)";
640
}
641
ucoord = "(" + ucoord + " + u_texclampoff.x)";
642
vcoord = "(" + vcoord + " + u_texclampoff.y)";
643
644
WRITE(p, " vec2 fixedcoord = vec2(%s, %s);\n", ucoord.c_str(), vcoord.c_str());
645
truncate_cpy(texcoord, "fixedcoord");
646
// We already projected it.
647
doTextureProjection = false;
648
}
649
650
switch (shaderDepalMode) {
651
case ShaderDepalMode::OFF:
652
if (compat.shaderLanguage == HLSL_D3D11) {
653
if (texture3D) {
654
if (doTextureProjection) {
655
WRITE(p, " vec4 t = tex.Sample(texSamp, vec3(v_texcoord.xy / v_texcoord.z, u_mipBias))%s;\n", bgraTexture ? ".bgra" : "");
656
} else {
657
WRITE(p, " vec4 t = tex.Sample(texSamp, vec3(%s.xy, u_mipBias))%s;\n", texcoord, bgraTexture ? ".bgra" : "");
658
}
659
} else {
660
if (doTextureProjection) {
661
WRITE(p, " vec4 t = tex.Sample(texSamp, v_texcoord.xy / v_texcoord.z)%s;\n", bgraTexture ? ".bgra" : "");
662
} else {
663
WRITE(p, " vec4 t = tex.Sample(texSamp, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : "");
664
}
665
}
666
} else if (compat.shaderLanguage == HLSL_D3D9) {
667
if (texture3D) {
668
if (doTextureProjection) {
669
WRITE(p, " vec4 t = tex3Dproj(tex, vec4(v_texcoord.x, v_texcoord.y, u_mipBias, v_texcoord.z))%s;\n", bgraTexture ? ".bgra" : "");
670
} else {
671
WRITE(p, " vec4 t = tex3D(tex, vec3(%s.x, %s.y, u_mipBias))%s;\n", texcoord, texcoord, bgraTexture ? ".bgra" : "");
672
}
673
} else {
674
if (doTextureProjection) {
675
WRITE(p, " vec4 t = tex2Dproj(tex, vec4(v_texcoord.x, v_texcoord.y, 0.0, v_texcoord.z))%s;\n", bgraTexture ? ".bgra" : "");
676
} else {
677
WRITE(p, " vec4 t = tex2D(tex, %s.xy)%s;\n", texcoord, bgraTexture ? ".bgra" : "");
678
}
679
}
680
} else {
681
// Note that here we're relying on the filter to be linear. We would have to otherwise to do two samples and manually filter in Z.
682
// Let's add that if we run into a case...
683
if (texture3D) {
684
if (doTextureProjection) {
685
WRITE(p, " vec4 t = %sProj(tex, vec4(%s.xy, u_mipBias, %s.z));\n", compat.texture3D, texcoord, texcoord);
686
} else {
687
WRITE(p, " vec4 t = %s(tex, vec3(%s.xy, u_mipBias));\n", compat.texture3D, texcoord);
688
}
689
} else if (arrayTexture) {
690
_dbg_assert_(compat.shaderLanguage == GLSL_VULKAN);
691
// Used for stereo rendering.
692
const char *arrayIndex = useStereo ? "float(gl_ViewIndex)" : "0.0";
693
if (doTextureProjection) {
694
// There's no textureProj for array textures, so we need to emulate it.
695
// Should be fine on any Vulkan-compatible hardware.
696
WRITE(p, " vec2 uv_proj = (%s.xy) / (%s.z);\n", texcoord, texcoord);
697
WRITE(p, " vec4 t = %s(tex, vec3(uv_proj, %s));\n", compat.texture, texcoord, arrayIndex);
698
} else {
699
WRITE(p, " vec4 t = %s(tex, vec3(%s.xy, %s));\n", compat.texture, texcoord, arrayIndex);
700
}
701
} else {
702
if (doTextureProjection) {
703
WRITE(p, " vec4 t = %sProj(tex, %s);\n", compat.texture, texcoord);
704
} else {
705
WRITE(p, " vec4 t = %s(tex, %s.xy);\n", compat.texture, texcoord);
706
}
707
}
708
}
709
break;
710
case ShaderDepalMode::SMOOTHED:
711
// Specific mode for Test Drive. Fixes the banding.
712
if (doTextureProjection) {
713
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
714
// However it is good for precision on older hardware like PowerVR.
715
p.F(" vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord);
716
} else {
717
p.F(" vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord);
718
}
719
// Restrictions on this are checked before setting the smoothed flag.
720
// Only RGB565 and RGBA5551 are supported, and only the specific shifts hitting the
721
// channels directly.
722
// Also, since we know the CLUT is smooth, we do not need to do the bilinear filter manually, we can just
723
// lookup with the filtered value once.
724
p.F(" vec4 t = ").SampleTexture2D("tex", "uv").C(";\n");
725
p.C(" uint depalShift = (u_depal_mask_shift_off_fmt >> 0x8u) & 0xFFu;\n");
726
p.C(" uint depalOffset = ((u_depal_mask_shift_off_fmt >> 0x10u) & 0xFFu) << 0x4u;\n");
727
p.C(" uint depalFmt = (u_depal_mask_shift_off_fmt >> 0x18u) & 0x3u;\n");
728
p.C(" float index0 = t.r;\n");
729
p.C(" float factor = 31.0 / 256.0;\n");
730
p.C(" if (depalFmt == 0x0u) {\n"); // yes, different versions of Test Drive use different formats. Could do compile time by adding more compat flags but meh.
731
p.C(" if (depalShift == 0x5u) { index0 = t.g; factor = 63.0 / 256.0; }\n");
732
p.C(" else if (depalShift == 0xBu) { index0 = t.b; }\n");
733
p.C(" } else {\n");
734
p.C(" if (depalShift == 0x5u) { index0 = t.g; }\n");
735
p.C(" else if (depalShift == 0xAu) { index0 = t.b; }\n");
736
p.C(" }\n");
737
p.C(" float offset = float(depalOffset) / 256.0;\n");
738
p.F(" t = ").SampleTexture2D("pal", "vec2((index0 * factor + offset) * 0.5 + 0.5 / 512.0, 0.0)").C(";\n"); // 0.5 for 512-entry CLUT.
739
break;
740
case ShaderDepalMode::NORMAL:
741
if (doTextureProjection) {
742
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
743
// However it is good for precision on older hardware like PowerVR.
744
WRITE(p, " vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord);
745
} else {
746
WRITE(p, " vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord);
747
}
748
WRITE(p, " vec2 tsize = vec2(textureSize(tex, 0).xy);\n");
749
WRITE(p, " vec2 fraction;\n");
750
WRITE(p, " bool bilinear = (u_depal_mask_shift_off_fmt >> 0x2Fu) != 0x0u;\n");
751
WRITE(p, " if (bilinear) {\n");
752
WRITE(p, " uv_round = uv * tsize - vec2(0.5, 0.5);\n");
753
WRITE(p, " fraction = fract(uv_round);\n");
754
WRITE(p, " uv_round = (uv_round - fraction + vec2(0.5, 0.5)) / tsize;\n"); // We want to take our four point samples at pixel centers.
755
WRITE(p, " } else {\n");
756
WRITE(p, " uv_round = uv;\n");
757
WRITE(p, " }\n");
758
p.C(" highp vec4 t = ").SampleTexture2D("tex", "uv_round").C(";\n");
759
p.C(" highp vec4 t1 = ").SampleTexture2DOffset("tex", "uv_round", 1, 0).C(";\n");
760
p.C(" highp vec4 t2 = ").SampleTexture2DOffset("tex", "uv_round", 0, 1).C(";\n");
761
p.C(" highp vec4 t3 = ").SampleTexture2DOffset("tex", "uv_round", 1, 1).C(";\n");
762
WRITE(p, " uint depalMask = (u_depal_mask_shift_off_fmt & 0xFFu);\n");
763
WRITE(p, " uint depalShift = (u_depal_mask_shift_off_fmt >> 0x8u) & 0xFFu;\n");
764
WRITE(p, " uint depalOffset = ((u_depal_mask_shift_off_fmt >> 0x10u) & 0xFFu) << 0x4u;\n");
765
WRITE(p, " uint depalFmt = (u_depal_mask_shift_off_fmt >> 0x18u) & 0x3u;\n");
766
WRITE(p, " uvec4 col; uint index0; uint index1; uint index2; uint index3;\n");
767
WRITE(p, " switch (int(depalFmt)) {\n"); // We might want to include fmt in the shader ID if this is a performance issue.
768
WRITE(p, " case 0:\n"); // 565
769
WRITE(p, " col = uvec4(t.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
770
WRITE(p, " index0 = (col.b << 0xBu) | (col.g << 0x5u) | (col.r);\n");
771
WRITE(p, " if (bilinear) {\n");
772
WRITE(p, " col = uvec4(t1.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
773
WRITE(p, " index1 = (col.b << 0xBu) | (col.g << 0x5u) | (col.r);\n");
774
WRITE(p, " col = uvec4(t2.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
775
WRITE(p, " index2 = (col.b << 0xBu) | (col.g << 0x5u) | (col.r);\n");
776
WRITE(p, " col = uvec4(t3.rgb * vec3(31.99, 63.99, 31.99), 0);\n");
777
WRITE(p, " index3 = (col.b << 0xBu) | (col.g << 0x5u) | (col.r);\n");
778
WRITE(p, " }\n");
779
WRITE(p, " break;\n");
780
WRITE(p, " case 1:\n"); // 5551
781
WRITE(p, " col = uvec4(t.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
782
WRITE(p, " index0 = (col.a << 0xFu) | (col.b << 0xAu) | (col.g << 0x5u) | (col.r);\n");
783
WRITE(p, " if (bilinear) {\n");
784
WRITE(p, " col = uvec4(t1.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
785
WRITE(p, " index1 = (col.a << 0xFu) | (col.b << 0xAu) | (col.g << 0x5u) | (col.r);\n");
786
WRITE(p, " col = uvec4(t2.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
787
WRITE(p, " index2 = (col.a << 0xFu) | (col.b << 0xAu) | (col.g << 0x5u) | (col.r);\n");
788
WRITE(p, " col = uvec4(t3.rgba * vec4(31.99, 31.99, 31.99, 1.0));\n");
789
WRITE(p, " index3 = (col.a << 0xFu) | (col.b << 0xAu) | (col.g << 0x5u) | (col.r);\n");
790
WRITE(p, " }\n");
791
WRITE(p, " break;\n");
792
WRITE(p, " case 2:\n"); // 4444
793
WRITE(p, " col = uvec4(t.rgba * 15.99);\n");
794
WRITE(p, " index0 = (col.a << 0xCu) | (col.b << 0x8u) | (col.g << 0x4u) | (col.r);\n");
795
WRITE(p, " if (bilinear) {\n");
796
WRITE(p, " col = uvec4(t1.rgba * 15.99);\n");
797
WRITE(p, " index1 = (col.a << 0xCu) | (col.b << 0x8u) | (col.g << 0x4u) | (col.r);\n");
798
WRITE(p, " col = uvec4(t2.rgba * 15.99);\n");
799
WRITE(p, " index2 = (col.a << 0xCu) | (col.b << 0x8u) | (col.g << 0x4u) | (col.r);\n");
800
WRITE(p, " col = uvec4(t3.rgba * 15.99);\n");
801
WRITE(p, " index3 = (col.a << 0xCu) | (col.b << 0x8u) | (col.g << 0x4u) | (col.r);\n");
802
WRITE(p, " }\n");
803
WRITE(p, " break;\n");
804
WRITE(p, " case 3:\n"); // 8888
805
WRITE(p, " col = uvec4(t.rgba * 255.99);\n");
806
WRITE(p, " index0 = (col.a << 0x18u) | (col.b << 0x10u) | (col.g << 0x8u) | (col.r);\n");
807
WRITE(p, " if (bilinear) {\n");
808
WRITE(p, " col = uvec4(t1.rgba * 255.99);\n");
809
WRITE(p, " index1 = (col.a << 0x18u) | (col.b << 0x10u) | (col.g << 0x8u) | (col.r);\n");
810
WRITE(p, " col = uvec4(t2.rgba * 255.99);\n");
811
WRITE(p, " index2 = (col.a << 0x18u) | (col.b << 0x10u) | (col.g << 0x8u) | (col.r);\n");
812
WRITE(p, " col = uvec4(t3.rgba * 255.99);\n");
813
WRITE(p, " index3 = (col.a << 0x18u) | (col.b << 0x10u) | (col.g << 0x8u) | (col.r);\n");
814
WRITE(p, " }\n");
815
WRITE(p, " break;\n");
816
WRITE(p, " };\n");
817
WRITE(p, " index0 = ((index0 >> depalShift) & depalMask) | depalOffset;\n");
818
p.C(" t = ").LoadTexture2D("pal", "ivec2(index0, 0)", 0).C(";\n");
819
WRITE(p, " if (bilinear && !(index0 == index1 && index1 == index2 && index2 == index3)) {\n");
820
WRITE(p, " index1 = ((index1 >> depalShift) & depalMask) | depalOffset;\n");
821
WRITE(p, " index2 = ((index2 >> depalShift) & depalMask) | depalOffset;\n");
822
WRITE(p, " index3 = ((index3 >> depalShift) & depalMask) | depalOffset;\n");
823
p.C(" t1 = ").LoadTexture2D("pal", "ivec2(index1, 0)", 0).C(";\n");
824
p.C(" t2 = ").LoadTexture2D("pal", "ivec2(index2, 0)", 0).C(";\n");
825
p.C(" t3 = ").LoadTexture2D("pal", "ivec2(index3, 0)", 0).C(";\n");
826
WRITE(p, " t = mix(t, t1, fraction.x);\n");
827
WRITE(p, " t2 = mix(t2, t3, fraction.x);\n");
828
WRITE(p, " t = mix(t, t2, fraction.y);\n");
829
WRITE(p, " }\n");
830
break;
831
case ShaderDepalMode::CLUT8_8888:
832
if (doTextureProjection) {
833
// We don't use textureProj because we need better control and it's probably not much of a savings anyway.
834
// However it is good for precision on older hardware like PowerVR.
835
p.F(" vec2 uv = %s.xy/%s.z;\n vec2 uv_round;\n", texcoord, texcoord);
836
} else {
837
p.F(" vec2 uv = %s.xy;\n vec2 uv_round;\n", texcoord);
838
}
839
p.C(" vec2 tsize = vec2(textureSize(tex, 0).xy);\n");
840
p.C(" uv_round = floor(uv * tsize);\n");
841
p.C(" int component = int(uv_round.x) & 3;\n");
842
p.C(" uv_round.x *= 0.25;\n");
843
p.C(" uv_round /= tsize;\n");
844
p.C(" vec4 t = ").SampleTexture2D("tex", "uv_round").C(";\n");
845
p.C(" int index;\n");
846
p.C(" switch (component) {\n");
847
p.C(" case 0: index = int(t.x * 254.99); break;\n"); // TODO: Not sure why 254.99 instead of 255.99, but it's currently needed.
848
p.C(" case 1: index = int(t.y * 254.99); break;\n");
849
p.C(" case 2: index = int(t.z * 254.99); break;\n");
850
p.C(" case 3: index = int(t.w * 254.99); break;\n");
851
p.C(" }\n");
852
p.C(" t = ").LoadTexture2D("pal", "ivec2(index, 0)", 0).C(";\n");
853
break;
854
}
855
856
WRITE(p, " vec4 p = v_color0;\n");
857
858
if (texFunc != GE_TEXFUNC_REPLACE) {
859
if (ubershader) {
860
WRITE(p, " t.a = max(t.a, u_texNoAlphaMul.x);\n");
861
} else if (!useTexAlpha) {
862
WRITE(p, " t.a = 1.0;\n");
863
}
864
}
865
866
switch (texFunc) {
867
case GE_TEXFUNC_MODULATE:
868
WRITE(p, " vec4 v = p * t%s;\n", secondary);
869
break;
870
case GE_TEXFUNC_DECAL:
871
WRITE(p, " vec4 v = vec4(mix(p.rgb, t.rgb, t.a), p.a)%s;\n", secondary);
872
break;
873
case GE_TEXFUNC_BLEND:
874
WRITE(p, " vec4 v = vec4(mix(p.rgb, u_texenv.rgb, t.rgb), p.a * t.a)%s;\n", secondary);
875
break;
876
case GE_TEXFUNC_REPLACE:
877
WRITE(p, " vec4 r = t;\n");
878
if (ubershader) {
879
WRITE(p, " r.a = mix(r.a, p.a, u_texNoAlphaMul.x);\n");
880
} else if (!useTexAlpha) {
881
WRITE(p, " r.a = p.a;\n");
882
}
883
WRITE(p, " vec4 v = r%s;\n", secondary);
884
break;
885
case GE_TEXFUNC_ADD:
886
case GE_TEXFUNC_UNKNOWN1:
887
case GE_TEXFUNC_UNKNOWN2:
888
case GE_TEXFUNC_UNKNOWN3:
889
WRITE(p, " vec4 v = vec4(p.rgb + t.rgb, p.a * t.a)%s;\n", secondary);
890
break;
891
default:
892
// Doesn't happen
893
WRITE(p, " vec4 v = p%s;\n", secondary); break;
894
break;
895
}
896
897
// This happens before fog is applied.
898
*uniformMask |= DIRTY_TEX_ALPHA_MUL;
899
900
// We only need a clamp if the color will be further processed. Otherwise the hardware color conversion will clamp for us.
901
if (ubershader) {
902
if (enableFog || enableColorTest || replaceBlend != REPLACE_BLEND_NO || simulateLogicOpType != LOGICOPTYPE_NORMAL || colorWriteMask || blueToAlpha) {
903
WRITE(p, " v.rgb = clamp(v.rgb * u_texNoAlphaMul.y, 0.0, 1.0);\n");
904
} else {
905
WRITE(p, " v.rgb *= u_texNoAlphaMul.y;\n");
906
}
907
} else if (enableColorDouble) {
908
p.C(" v.rgb = clamp(v.rgb * 2.0, 0.0, 1.0);\n");
909
}
910
} else {
911
// No texture mapping
912
WRITE(p, " vec4 v = v_color0%s;\n", secondary);
913
}
914
915
if (enableFog) {
916
WRITE(p, " float fogCoef = clamp(v_fogdepth, 0.0, 1.0);\n");
917
WRITE(p, " v = mix(vec4(u_fogcolor, v.a), v, fogCoef);\n");
918
}
919
920
// Texture access is at half texels [0.5/256, 255.5/256], but colors are normalized [0, 255].
921
// So we have to scale to account for the difference.
922
char alphaTestXCoord[64] = "0";
923
if (enableFragmentTestCache) {
924
if (enableColorTest && !colorTestAgainstZero) {
925
WRITE(p, " vec4 vScale256 = v * %f + %f;\n", 255.0 / 256.0, 0.5 / 256.0);
926
truncate_cpy(alphaTestXCoord, "vScale256.a");
927
} else if (enableAlphaTest && !alphaTestAgainstZero) {
928
snprintf(alphaTestXCoord, sizeof(alphaTestXCoord), "v.a * %f + %f", 255.0 / 256.0, 0.5 / 256.0);
929
}
930
}
931
932
const char *discardStatement = testForceToZero ? "v.a = 0.0;" : "DISCARD;";
933
if (enableAlphaTest) {
934
*fragmentShaderFlags |= FragmentShaderFlags::USES_DISCARD;
935
936
if (alphaTestAgainstZero) {
937
// When testing against 0 (extremely common), we can avoid some math.
938
// 0.002 is approximately half of 1.0 / 255.0.
939
if (alphaTestFunc == GE_COMP_NOTEQUAL || alphaTestFunc == GE_COMP_GREATER) {
940
WRITE(p, " if (v.a < 0.002) %s\n", discardStatement);
941
} else if (alphaTestFunc != GE_COMP_NEVER) {
942
// Anything else is a test for == 0. Happens sometimes, actually...
943
WRITE(p, " if (v.a > 0.002) %s\n", discardStatement);
944
} else {
945
// NEVER has been logged as used by games, although it makes little sense - statically failing.
946
// Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here.
947
WRITE(p, " %s\n", discardStatement);
948
}
949
} else if (enableFragmentTestCache) {
950
WRITE(p, " float aResult = %s(testtex, vec2(%s, 0)).a;\n", compat.texture, alphaTestXCoord);
951
WRITE(p, " if (aResult < 0.5) %s\n", discardStatement);
952
} else {
953
const char *alphaTestFuncs[] = { "#", "#", " != ", " == ", " >= ", " > ", " <= ", " < " };
954
if (alphaTestFuncs[alphaTestFunc][0] != '#') {
955
if (compat.bitwiseOps) {
956
WRITE(p, " if ((roundAndScaleTo255i(v.a) & int(u_alphacolormask >> 0x18u)) %s int(u_alphacolorref >> 0x18u)) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
957
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
958
// Work around bad PVR driver problem where equality check + discard just doesn't work.
959
if (alphaTestFunc != GE_COMP_NOTEQUAL) {
960
WRITE(p, " if (roundTo255thf(v.a) %s u_alphacolorref.a) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
961
}
962
} else {
963
WRITE(p, " if (roundAndScaleTo255f(v.a) %s u_alphacolorref.a) %s\n", alphaTestFuncs[alphaTestFunc], discardStatement);
964
}
965
} else {
966
// This means NEVER. See above.
967
WRITE(p, " %s\n", discardStatement);
968
}
969
}
970
}
971
972
if (enableColorTest) {
973
*fragmentShaderFlags |= FragmentShaderFlags::USES_DISCARD;
974
975
if (colorTestAgainstZero) {
976
// When testing against 0 (common), we can avoid some math.
977
// 0.002 is approximately half of 1.0 / 255.0.
978
if (colorTestFunc == GE_COMP_NOTEQUAL) {
979
if (compat.shaderLanguage == GLSL_VULKAN) {
980
// Old workaround for Adreno driver bug. We could make this the main path actually
981
// since the math is roughly equivalent given the non-negative inputs.
982
WRITE(p, " if (v.r + v.g + v.b < 0.002) %s\n", discardStatement);
983
} else {
984
WRITE(p, " if (v.r < 0.002 && v.g < 0.002 && v.b < 0.002) %s\n", discardStatement);
985
}
986
} else if (colorTestFunc != GE_COMP_NEVER) {
987
if (compat.shaderLanguage == GLSL_VULKAN) {
988
// See the GE_COMP_NOTEQUAL case.
989
WRITE(p, " if (v.r + v.g + v.b > 0.002) %s\n", discardStatement);
990
} else {
991
// Anything else is a test for == 0.
992
WRITE(p, " if (v.r > 0.002 || v.g > 0.002 || v.b > 0.002) %s\n", discardStatement);
993
}
994
} else {
995
// NEVER has been logged as used by games, although it makes little sense - statically failing.
996
// Maybe we could discard the drawcall, but it's pretty rare. Let's just statically discard here.
997
WRITE(p, " %s\n", discardStatement);
998
}
999
} else if (enableFragmentTestCache) {
1000
WRITE(p, " float rResult = %s(testtex, vec2(vScale256.r, 0)).r;\n", compat.texture);
1001
WRITE(p, " float gResult = %s(testtex, vec2(vScale256.g, 0)).g;\n", compat.texture);
1002
WRITE(p, " float bResult = %s(testtex, vec2(vScale256.b, 0)).b;\n", compat.texture);
1003
if (colorTestFunc == GE_COMP_EQUAL) {
1004
// Equal means all parts must be equal (so discard if any is not.)
1005
WRITE(p, " if (rResult < 0.5 || gResult < 0.5 || bResult < 0.5) %s\n", discardStatement);
1006
} else {
1007
// Not equal means any part must be not equal.
1008
WRITE(p, " if (rResult < 0.5 && gResult < 0.5 && bResult < 0.5) %s\n", discardStatement);
1009
}
1010
} else {
1011
const char *colorTestFuncs[] = { "#", "#", " != ", " == " };
1012
const char *test = colorTestFuncs[colorTestFunc];
1013
if (test[0] != '#') {
1014
// TODO: Unify these paths better.
1015
if (compat.shaderLanguage == HLSL_D3D9) {
1016
// TODO: Use a texture to lookup bitwise ops instead?
1017
WRITE(p, " vec3 colortest = roundAndScaleTo255v(v.rgb);\n");
1018
WRITE(p, " if ((colortest.r %s u_alphacolorref.r) && (colortest.g %s u_alphacolorref.g) && (colortest.b %s u_alphacolorref.b)) %s\n", test, test, test, discardStatement);
1019
} else if (compat.bitwiseOps) {
1020
WRITE(p, " uint v_uint = roundAndScaleTo8x4(v.rgb);\n");
1021
WRITE(p, " uint v_masked = v_uint & u_alphacolormask;\n");
1022
WRITE(p, " uint colorTestRef = (u_alphacolorref & u_alphacolormask) & 0xFFFFFFu;\n");
1023
WRITE(p, " if (v_masked %s colorTestRef) %s\n", test, discardStatement);
1024
} else if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
1025
WRITE(p, " if (roundTo255thv(v.rgb) %s u_alphacolorref.rgb) %s\n", test, discardStatement);
1026
} else {
1027
WRITE(p, " if (roundAndScaleTo255v(v.rgb) %s u_alphacolorref.rgb) %s\n", test, discardStatement);
1028
}
1029
} else {
1030
WRITE(p, " %s\n", discardStatement);
1031
}
1032
}
1033
}
1034
1035
if (replaceBlend == REPLACE_BLEND_2X_SRC) {
1036
WRITE(p, " v.rgb = v.rgb * 2.0;\n");
1037
}
1038
1039
// In some cases we need to replicate the first half of the blend equation here.
1040
// In case of blue-to-alpha, it's since we overwrite alpha with blue before the actual blend equation runs.
1041
if (replaceBlend == REPLACE_BLEND_PRE_SRC || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA || replaceBlend == REPLACE_BLEND_BLUE_TO_ALPHA) {
1042
const char *srcFactor = "ERROR";
1043
switch (replaceBlendFuncA) {
1044
case GE_SRCBLEND_DSTCOLOR: srcFactor = "ERROR"; break;
1045
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "ERROR"; break;
1046
case GE_SRCBLEND_SRCALPHA: srcFactor = "splat3(v.a)"; break;
1047
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break;
1048
case GE_SRCBLEND_DSTALPHA: srcFactor = "ERROR"; break;
1049
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "ERROR"; break;
1050
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "splat3(v.a * 2.0)"; break;
1051
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "splat3(1.0 - v.a * 2.0)"; break;
1052
// PRE_SRC for REPLACE_BLEND_PRE_SRC_2X_ALPHA means "double the src."
1053
// It's close to the same, but clamping can still be an issue.
1054
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "splat3(2.0)"; break;
1055
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "ERROR"; break;
1056
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
1057
default: srcFactor = "u_blendFixA"; break;
1058
}
1059
1060
if (!strcmp(srcFactor, "ERROR")) {
1061
*errorString = "Bad replaceblend src factor";
1062
return false;
1063
}
1064
1065
WRITE(p, " v.rgb = v.rgb * %s;\n", srcFactor);
1066
}
1067
1068
if (replaceBlend == REPLACE_BLEND_READ_FRAMEBUFFER) {
1069
const char *srcFactor = nullptr;
1070
const char *dstFactor = nullptr;
1071
1072
switch (replaceBlendFuncA) {
1073
case GE_SRCBLEND_DSTCOLOR: srcFactor = "destColor.rgb"; break;
1074
case GE_SRCBLEND_INVDSTCOLOR: srcFactor = "(splat3(1.0) - destColor.rgb)"; break;
1075
case GE_SRCBLEND_SRCALPHA: srcFactor = "v.aaa"; break;
1076
case GE_SRCBLEND_INVSRCALPHA: srcFactor = "splat3(1.0 - v.a)"; break;
1077
case GE_SRCBLEND_DSTALPHA: srcFactor = "destColor.aaa"; break;
1078
case GE_SRCBLEND_INVDSTALPHA: srcFactor = "(splat3(1.0) - destColor.aaa)"; break;
1079
case GE_SRCBLEND_DOUBLESRCALPHA: srcFactor = "v.aaa * 2.0"; break;
1080
case GE_SRCBLEND_DOUBLEINVSRCALPHA: srcFactor = "(splat3(1.0) - v.aaa * 2.0)"; break;
1081
case GE_SRCBLEND_DOUBLEDSTALPHA: srcFactor = "destColor.aaa * 2.0"; break;
1082
case GE_SRCBLEND_DOUBLEINVDSTALPHA: srcFactor = "(splat3(1.0) - destColor.aaa * 2.0)"; break;
1083
case GE_SRCBLEND_FIXA: srcFactor = "u_blendFixA"; break;
1084
default: srcFactor = "u_blendFixA"; break;
1085
}
1086
switch (replaceBlendFuncB) {
1087
case GE_DSTBLEND_SRCCOLOR: dstFactor = "v.rgb"; break;
1088
case GE_DSTBLEND_INVSRCCOLOR: dstFactor = "(splat3(1.0) - v.rgb)"; break;
1089
case GE_DSTBLEND_SRCALPHA: dstFactor = "v.aaa"; break;
1090
case GE_DSTBLEND_INVSRCALPHA: dstFactor = "(splat3(1.0) - v.aaa)"; break;
1091
case GE_DSTBLEND_DSTALPHA: dstFactor = "destColor.aaa"; break;
1092
case GE_DSTBLEND_INVDSTALPHA: dstFactor = "(splat3(1.0) - destColor.aaa)"; break;
1093
case GE_DSTBLEND_DOUBLESRCALPHA: dstFactor = "v.aaa * 2.0"; break;
1094
case GE_DSTBLEND_DOUBLEINVSRCALPHA: dstFactor = "(splat3(1.0) - v.aaa * 2.0)"; break;
1095
case GE_DSTBLEND_DOUBLEDSTALPHA: dstFactor = "destColor.aaa * 2.0"; break;
1096
case GE_DSTBLEND_DOUBLEINVDSTALPHA: dstFactor = "(splat3(1.0) - destColor.aaa * 2.0)"; break;
1097
case GE_DSTBLEND_FIXB: dstFactor = "u_blendFixB"; break;
1098
default: dstFactor = "u_blendFixB"; break;
1099
}
1100
1101
switch (replaceBlendEq) {
1102
case GE_BLENDMODE_MUL_AND_ADD:
1103
WRITE(p, " v.rgb = v.rgb * %s + destColor.rgb * %s;\n", srcFactor, dstFactor);
1104
break;
1105
case GE_BLENDMODE_MUL_AND_SUBTRACT:
1106
WRITE(p, " v.rgb = v.rgb * %s - destColor.rgb * %s;\n", srcFactor, dstFactor);
1107
break;
1108
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
1109
WRITE(p, " v.rgb = destColor.rgb * %s - v.rgb * %s;\n", dstFactor, srcFactor);
1110
break;
1111
case GE_BLENDMODE_MIN:
1112
WRITE(p, " v.rgb = min(v.rgb, destColor.rgb);\n");
1113
break;
1114
case GE_BLENDMODE_MAX:
1115
WRITE(p, " v.rgb = max(v.rgb, destColor.rgb);\n");
1116
break;
1117
case GE_BLENDMODE_ABSDIFF:
1118
WRITE(p, " v.rgb = abs(v.rgb - destColor.rgb);\n");
1119
break;
1120
default:
1121
*errorString = "Bad replace blend eq";
1122
return false;
1123
}
1124
}
1125
1126
if (replaceBlend == REPLACE_BLEND_2X_ALPHA || replaceBlend == REPLACE_BLEND_PRE_SRC_2X_ALPHA) {
1127
WRITE(p, " v.a *= 2.0;\n");
1128
}
1129
}
1130
1131
char replacedAlpha[64] = "0.0";
1132
if (stencilToAlpha != REPLACE_ALPHA_NO) {
1133
switch (replaceAlphaWithStencilType) {
1134
case STENCIL_VALUE_UNIFORM:
1135
truncate_cpy(replacedAlpha, "u_stencilReplaceValue");
1136
break;
1137
1138
case STENCIL_VALUE_ZERO:
1139
truncate_cpy(replacedAlpha, "0.0");
1140
break;
1141
1142
case STENCIL_VALUE_ONE:
1143
case STENCIL_VALUE_INVERT:
1144
// In invert, we subtract by one, but we want to output one here.
1145
truncate_cpy(replacedAlpha, "1.0");
1146
break;
1147
1148
case STENCIL_VALUE_INCR_4:
1149
case STENCIL_VALUE_DECR_4:
1150
// We're adding/subtracting, just by the smallest value in 4-bit.
1151
snprintf(replacedAlpha, sizeof(replacedAlpha), "%f", 1.0 / 15.0);
1152
break;
1153
1154
case STENCIL_VALUE_INCR_8:
1155
case STENCIL_VALUE_DECR_8:
1156
// We're adding/subtracting, just by the smallest value in 8-bit.
1157
snprintf(replacedAlpha, sizeof(replacedAlpha), "%f", 1.0 / 255.0);
1158
break;
1159
1160
case STENCIL_VALUE_KEEP:
1161
// Do nothing. We'll mask out the alpha using color mask.
1162
break;
1163
}
1164
}
1165
1166
switch (stencilToAlpha) {
1167
case REPLACE_ALPHA_DUALSOURCE:
1168
WRITE(p, " %s = vec4(v.rgb, %s);\n", compat.fragColor0, replacedAlpha);
1169
WRITE(p, " %s = vec4(0.0, 0.0, 0.0, v.a);\n", compat.fragColor1);
1170
break;
1171
1172
case REPLACE_ALPHA_YES:
1173
WRITE(p, " %s = vec4(v.rgb, %s);\n", compat.fragColor0, replacedAlpha);
1174
break;
1175
1176
case REPLACE_ALPHA_NO:
1177
WRITE(p, " %s = v;\n", compat.fragColor0);
1178
break;
1179
1180
default:
1181
*errorString = "Bad stencil-to-alpha type, corrupt ID?";
1182
return false;
1183
}
1184
1185
switch (simulateLogicOpType) {
1186
case LOGICOPTYPE_ONE:
1187
WRITE(p, " %s.rgb = splat3(1.0);\n", compat.fragColor0);
1188
break;
1189
case LOGICOPTYPE_INVERT:
1190
WRITE(p, " %s.rgb = splat3(1.0) - %s.rgb;\n", compat.fragColor0, compat.fragColor0);
1191
break;
1192
case LOGICOPTYPE_NORMAL:
1193
break;
1194
1195
default:
1196
*errorString = "Bad logic op type, corrupt ID?";
1197
return false;
1198
}
1199
1200
// Final color computed - apply logic ops and bitwise color write mask, through shader blending, if specified.
1201
if (colorWriteMask || replaceLogicOp) {
1202
WRITE(p, " highp uint v32 = packUnorm4x8%s(%s);\n", packSuffix, compat.fragColor0);
1203
WRITE(p, " highp uint d32 = packUnorm4x8%s(destColor);\n", packSuffix);
1204
1205
// v32 is both the "s" to the logical operation, and the value that we'll merge to the destination with masking later.
1206
// d32 is the "d" to the logical operation.
1207
// NOTE: Alpha of v32 needs to be preserved. Same equations as in the software renderer.
1208
switch (replaceLogicOpType) {
1209
case GE_LOGIC_CLEAR: p.C(" v32 &= 0xFF000000u;\n"); break;
1210
case GE_LOGIC_AND: p.C(" v32 = v32 & (d32 | 0xFF000000u);\n"); break;
1211
case GE_LOGIC_AND_REVERSE: p.C(" v32 = v32 & (~d32 | 0xFF000000u);\n"); break;
1212
case GE_LOGIC_COPY: break; // source to dest, do nothing. Will be set to this, if not used.
1213
case GE_LOGIC_AND_INVERTED: p.C(" v32 = (~v32 & (d32 & 0x00FFFFFFu)) | (v32 & 0xFF000000u);\n"); break;
1214
case GE_LOGIC_NOOP: p.C(" v32 = (d32 & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
1215
case GE_LOGIC_XOR: p.C(" v32 = v32 ^ (d32 & 0x00FFFFFFu);\n"); break;
1216
case GE_LOGIC_OR: p.C(" v32 = v32 | (d32 & 0x00FFFFFFu);\n"); break;
1217
case GE_LOGIC_NOR: p.C(" v32 = (~(v32 | d32) & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
1218
case GE_LOGIC_EQUIV: p.C(" v32 = (~(v32 ^ d32) & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
1219
case GE_LOGIC_INVERTED: p.C(" v32 = (~d32 & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
1220
case GE_LOGIC_OR_REVERSE: p.C(" v32 = v32 | (~d32 & 0x00FFFFFFu);\n"); break;
1221
case GE_LOGIC_COPY_INVERTED: p.C(" v32 = (~v32 & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
1222
case GE_LOGIC_OR_INVERTED: p.C(" v32 = ((~v32 | d32) & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
1223
case GE_LOGIC_NAND: p.C(" v32 = (~(v32 & d32) & 0x00FFFFFFu) | (v32 & 0xFF000000u);\n"); break;
1224
case GE_LOGIC_SET: p.C(" v32 |= 0x00FFFFFFu;\n"); break;
1225
}
1226
1227
// Note that the mask has already been flipped to the PC way - 1 means write.
1228
if (colorWriteMask) {
1229
if (stencilToAlpha != REPLACE_ALPHA_NO)
1230
WRITE(p, " v32 = (v32 & u_colorWriteMask) | (d32 & ~u_colorWriteMask);\n");
1231
else
1232
WRITE(p, " v32 = (v32 & u_colorWriteMask & 0x00FFFFFFu) | (d32 & (~u_colorWriteMask | 0xFF000000u));\n");
1233
}
1234
WRITE(p, " %s = unpackUnorm4x8%s(v32);\n", compat.fragColor0, packSuffix);
1235
}
1236
1237
if (blueToAlpha) {
1238
WRITE(p, " %s = vec4(0.0, 0.0, 0.0, %s.z); // blue to alpha\n", compat.fragColor0, compat.fragColor0);
1239
}
1240
1241
if (gstate_c.Use(GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT)) {
1242
DepthScaleFactors depthScale = GetDepthScaleFactors(gstate_c.UseFlags());
1243
1244
const double scale = depthScale.ScaleU16();
1245
1246
WRITE(p, " highp float z = gl_FragCoord.z;\n");
1247
if (gstate_c.Use(GPU_USE_ACCURATE_DEPTH)) {
1248
// We center the depth with an offset, but only its fraction matters.
1249
// When (DepthSliceFactor() - 1) is odd, it will be 0.5, otherwise 0.
1250
if (((int)(depthScale.Scale() - 1.0f) & 1) == 1) {
1251
WRITE(p, " z = (floor((z * %f) - (1.0 / 2.0)) + (1.0 / 2.0)) * (1.0 / %f);\n", scale, scale);
1252
} else {
1253
WRITE(p, " z = floor(z * %f) * (1.0 / %f);\n", scale, scale);
1254
}
1255
} else {
1256
WRITE(p, " z = (1.0 / 65535.0) * floor(z * 65535.0);\n");
1257
}
1258
WRITE(p, " gl_FragDepth = z;\n");
1259
} else if (useDiscardStencilBugWorkaround) {
1260
// Adreno and some Mali drivers apply early frag tests even with discard in the shader,
1261
// when only stencil is used. The exact situation seems to vary by driver.
1262
// Writing depth prevents the bug for both vendors, even with depth_unchanged specified.
1263
// This doesn't make a ton of sense, but empirically does work.
1264
WRITE(p, " gl_FragDepth = gl_FragCoord.z;\n");
1265
}
1266
1267
if (compat.shaderLanguage == HLSL_D3D11 || compat.shaderLanguage == HLSL_D3D9) {
1268
if (writeDepth) {
1269
WRITE(p, " outfragment.depth = gl_FragDepth;\n");
1270
}
1271
WRITE(p, " return outfragment;\n");
1272
}
1273
1274
WRITE(p, "}\n");
1275
1276
return true;
1277
}
1278
1279
1280