CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/GLES/ShaderManagerGLES.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#if defined(_WIN32) && defined(SHADERLOG)
19
#include "Common/CommonWindows.h"
20
#endif
21
22
#include <cmath>
23
#include <cstdio>
24
#include <map>
25
26
#include "Common/Data/Convert/SmallDataConvert.h"
27
#include "Common/Data/Text/I18n.h"
28
#include "Common/GPU/OpenGL/GLDebugLog.h"
29
#include "Common/GPU/OpenGL/GLFeatures.h"
30
#include "Common/LogReporting.h"
31
#include "Common/Math/math_util.h"
32
#include "Common/Math/lin/matrix4x4.h"
33
#include "Common/Profiler/Profiler.h"
34
#include "Common/GPU/Shader.h"
35
#include "Common/GPU/thin3d.h"
36
#include "Common/GPU/OpenGL/GLRenderManager.h"
37
#include "Common/System/Display.h"
38
#include "Common/System/OSD.h"
39
#include "Common/VR/PPSSPPVR.h"
40
41
#include "Common/Log.h"
42
#include "Common/File/FileUtil.h"
43
#include "Common/TimeUtil.h"
44
#include "Core/Config.h"
45
#include "Core/System.h"
46
#include "GPU/Math3D.h"
47
#include "GPU/GPUState.h"
48
#include "GPU/ge_constants.h"
49
#include "GPU/Common/ShaderUniforms.h"
50
#include "GPU/GLES/ShaderManagerGLES.h"
51
#include "GPU/GLES/DrawEngineGLES.h"
52
#include "GPU/GLES/FramebufferManagerGLES.h"
53
54
using namespace Lin;
55
56
Shader::Shader(GLRenderManager *render, const char *code, const std::string &desc, const ShaderDescGLES &params)
57
: render_(render), useHWTransform_(params.useHWTransform), attrMask_(params.attrMask), uniformMask_(params.uniformMask) {
58
PROFILE_THIS_SCOPE("shadercomp");
59
isFragment_ = params.glShaderType == GL_FRAGMENT_SHADER;
60
source_ = code;
61
#ifdef SHADERLOG
62
#ifdef _WIN32
63
OutputDebugStringUTF8(code);
64
#else
65
printf("%s\n", code);
66
#endif
67
#endif
68
shader = render->CreateShader(params.glShaderType, source_, desc);
69
}
70
71
Shader::~Shader() {
72
render_->DeleteShader(shader);
73
}
74
75
LinkedShader::LinkedShader(GLRenderManager *render, VShaderID VSID, Shader *vs, FShaderID FSID, Shader *fs, bool useHWTransform, bool preloading)
76
: render_(render), useHWTransform_(useHWTransform) {
77
PROFILE_THIS_SCOPE("shaderlink");
78
79
_assert_(render);
80
_assert_(vs);
81
_assert_(fs);
82
83
vs_ = vs;
84
85
std::vector<GLRShader *> shaders;
86
shaders.push_back(vs->shader);
87
shaders.push_back(fs->shader);
88
89
std::vector<GLRProgram::Semantic> semantics;
90
semantics.reserve(7);
91
semantics.push_back({ ATTR_POSITION, "position" });
92
semantics.push_back({ ATTR_TEXCOORD, "texcoord" });
93
if (useHWTransform_)
94
semantics.push_back({ ATTR_NORMAL, "normal" });
95
else
96
semantics.push_back({ ATTR_NORMAL, "fog" });
97
semantics.push_back({ ATTR_W1, "w1" });
98
semantics.push_back({ ATTR_W2, "w2" });
99
semantics.push_back({ ATTR_COLOR0, "color0" });
100
semantics.push_back({ ATTR_COLOR1, "color1" });
101
102
std::vector<GLRProgram::UniformLocQuery> queries;
103
queries.push_back({ &u_tex, "tex" });
104
queries.push_back({ &u_pal, "pal" });
105
queries.push_back({ &u_testtex, "testtex" });
106
queries.push_back({ &u_fbotex, "fbotex" });
107
108
queries.push_back({ &u_proj, "u_proj" });
109
queries.push_back({ &u_proj_lens, "u_proj_lens" });
110
queries.push_back({ &u_proj_through, "u_proj_through" });
111
queries.push_back({ &u_texenv, "u_texenv" });
112
queries.push_back({ &u_fogcolor, "u_fogcolor" });
113
queries.push_back({ &u_fogcoef, "u_fogcoef" });
114
queries.push_back({ &u_alphacolorref, "u_alphacolorref" });
115
queries.push_back({ &u_alphacolormask, "u_alphacolormask" });
116
queries.push_back({ &u_colorWriteMask, "u_colorWriteMask" });
117
queries.push_back({ &u_stencilReplaceValue, "u_stencilReplaceValue" });
118
queries.push_back({ &u_blendFixA, "u_blendFixA" });
119
queries.push_back({ &u_blendFixB, "u_blendFixB" });
120
queries.push_back({ &u_fbotexSize, "u_fbotexSize" });
121
122
// Transform
123
queries.push_back({ &u_view, "u_view" });
124
queries.push_back({ &u_world, "u_world" });
125
queries.push_back({ &u_texmtx, "u_texmtx" });
126
127
if (VSID.Bit(VS_BIT_ENABLE_BONES))
128
numBones = TranslateNumBones(VSID.Bits(VS_BIT_BONES, 3) + 1);
129
else
130
numBones = 0;
131
queries.push_back({ &u_depthRange, "u_depthRange" });
132
queries.push_back({ &u_cullRangeMin, "u_cullRangeMin" });
133
queries.push_back({ &u_cullRangeMax, "u_cullRangeMax" });
134
queries.push_back({ &u_rotation, "u_rotation" });
135
136
// These two are only used for VR, but let's always query them for simplicity.
137
queries.push_back({ &u_scaleX, "u_scaleX" });
138
queries.push_back({ &u_scaleY, "u_scaleY" });
139
140
#ifdef USE_BONE_ARRAY
141
queries.push_back({ &u_bone, "u_bone" });
142
#else
143
static const char * const boneNames[8] = { "u_bone0", "u_bone1", "u_bone2", "u_bone3", "u_bone4", "u_bone5", "u_bone6", "u_bone7", };
144
for (int i = 0; i < 8; i++) {
145
queries.push_back({ &u_bone[i], boneNames[i] });
146
}
147
#endif
148
149
// Lighting, texturing
150
queries.push_back({ &u_ambient, "u_ambient" });
151
queries.push_back({ &u_matambientalpha, "u_matambientalpha" });
152
queries.push_back({ &u_matdiffuse, "u_matdiffuse" });
153
queries.push_back({ &u_matspecular, "u_matspecular" });
154
queries.push_back({ &u_matemissive, "u_matemissive" });
155
queries.push_back({ &u_uvscaleoffset, "u_uvscaleoffset" });
156
queries.push_back({ &u_texclamp, "u_texclamp" });
157
queries.push_back({ &u_texclampoff, "u_texclampoff" });
158
queries.push_back({ &u_texNoAlphaMul, "u_texNoAlphaMul" });
159
queries.push_back({ &u_lightControl, "u_lightControl" });
160
161
for (int i = 0; i < 4; i++) {
162
static const char * const lightPosNames[4] = { "u_lightpos0", "u_lightpos1", "u_lightpos2", "u_lightpos3", };
163
queries.push_back({ &u_lightpos[i], lightPosNames[i] });
164
static const char * const lightdir_names[4] = { "u_lightdir0", "u_lightdir1", "u_lightdir2", "u_lightdir3", };
165
queries.push_back({ &u_lightdir[i], lightdir_names[i] });
166
static const char * const lightatt_names[4] = { "u_lightatt0", "u_lightatt1", "u_lightatt2", "u_lightatt3", };
167
queries.push_back({ &u_lightatt[i], lightatt_names[i] });
168
static const char * const lightangle_spotCoef_names[4] = { "u_lightangle_spotCoef0", "u_lightangle_spotCoef1", "u_lightangle_spotCoef2", "u_lightangle_spotCoef3", };
169
queries.push_back({ &u_lightangle_spotCoef[i], lightangle_spotCoef_names[i] });
170
171
static const char * const lightambient_names[4] = { "u_lightambient0", "u_lightambient1", "u_lightambient2", "u_lightambient3", };
172
queries.push_back({ &u_lightambient[i], lightambient_names[i] });
173
static const char * const lightdiffuse_names[4] = { "u_lightdiffuse0", "u_lightdiffuse1", "u_lightdiffuse2", "u_lightdiffuse3", };
174
queries.push_back({ &u_lightdiffuse[i], lightdiffuse_names[i] });
175
static const char * const lightspecular_names[4] = { "u_lightspecular0", "u_lightspecular1", "u_lightspecular2", "u_lightspecular3", };
176
queries.push_back({ &u_lightspecular[i], lightspecular_names[i] });
177
}
178
179
// We need to fetch these unconditionally, gstate_c.spline or bezier will not be set if we
180
// create this shader at load time from the shader cache.
181
queries.push_back({ &u_tess_points, "u_tess_points" });
182
queries.push_back({ &u_tess_weights_u, "u_tess_weights_u" });
183
queries.push_back({ &u_tess_weights_v, "u_tess_weights_v" });
184
queries.push_back({ &u_spline_counts, "u_spline_counts" });
185
queries.push_back({ &u_depal_mask_shift_off_fmt, "u_depal_mask_shift_off_fmt" });
186
queries.push_back({ &u_mipBias, "u_mipBias" });
187
188
attrMask = vs->GetAttrMask();
189
availableUniforms = vs->GetUniformMask() | fs->GetUniformMask();
190
191
std::vector<GLRProgram::Initializer> initialize;
192
initialize.reserve(7);
193
initialize.push_back({ &u_tex, 0, TEX_SLOT_PSP_TEXTURE });
194
initialize.push_back({ &u_fbotex, 0, TEX_SLOT_SHADERBLEND_SRC });
195
initialize.push_back({ &u_testtex, 0, TEX_SLOT_ALPHATEST });
196
initialize.push_back({ &u_pal, 0, TEX_SLOT_CLUT }); // CLUT
197
initialize.push_back({ &u_tess_points, 0, TEX_SLOT_SPLINE_POINTS }); // Control Points
198
initialize.push_back({ &u_tess_weights_u, 0, TEX_SLOT_SPLINE_WEIGHTS_U });
199
initialize.push_back({ &u_tess_weights_v, 0, TEX_SLOT_SPLINE_WEIGHTS_V });
200
201
GLRProgramFlags flags{};
202
flags.supportDualSource = gstate_c.Use(GPU_USE_DUALSOURCE_BLEND);
203
if (!VSID.Bit(VS_BIT_IS_THROUGH) && gstate_c.Use(GPU_USE_DEPTH_CLAMP)) {
204
flags.useClipDistance0 = true;
205
if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Use(GPU_USE_CLIP_DISTANCE))
206
flags.useClipDistance1 = true;
207
} else if (VSID.Bit(VS_BIT_VERTEX_RANGE_CULLING) && gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {
208
flags.useClipDistance0 = true;
209
}
210
211
program = render->CreateProgram(shaders, semantics, queries, initialize, nullptr, flags);
212
213
// The rest, use the "dirty" mechanism.
214
dirtyUniforms = DIRTY_ALL_UNIFORMS;
215
}
216
217
void LinkedShader::Delete() {
218
program->SetDeleteCallback([](void *thiz) {
219
LinkedShader *ls = (LinkedShader *)thiz;
220
delete ls;
221
}, this);
222
render_->DeleteProgram(program);
223
program = nullptr;
224
}
225
226
LinkedShader::~LinkedShader() {
227
_assert_(program == nullptr);
228
}
229
230
// Utility
231
static inline void SetFloatUniform(GLRenderManager *render, GLint *uniform, float value) {
232
render->SetUniformF(uniform, 1, &value);
233
}
234
235
static inline void SetFloatUniform2(GLRenderManager *render, GLint *uniform, float value[2]) {
236
render->SetUniformF(uniform, 2, value);
237
}
238
239
static inline void SetColorUniform3(GLRenderManager *render, GLint *uniform, u32 color) {
240
float f[4];
241
Uint8x4ToFloat4(f, color);
242
render->SetUniformF(uniform, 3, f);
243
}
244
245
static void SetColorUniform3Alpha(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) {
246
float f[4];
247
Uint8x3ToFloat4_AlphaUint8(f, color, alpha);
248
render->SetUniformF(uniform, 4, f);
249
}
250
251
// This passes colors unscaled (e.g. 0 - 255 not 0 - 1.)
252
static void SetColorUniform3Alpha255(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) {
253
if (gl_extensions.gpuVendor == GPU_VENDOR_IMGTEC) {
254
const float col[4] = {
255
(float)((color & 0xFF) >> 0) * (1.0f / 255.0f),
256
(float)((color & 0xFF00) >> 8) * (1.0f / 255.0f),
257
(float)((color & 0xFF0000) >> 16) * (1.0f / 255.0f),
258
(float)alpha * (1.0f / 255.0f)
259
};
260
render->SetUniformF(uniform, 4, col);
261
} else {
262
const float col[4] = {
263
(float)((color & 0xFF) >> 0),
264
(float)((color & 0xFF00) >> 8),
265
(float)((color & 0xFF0000) >> 16),
266
(float)alpha
267
};
268
render->SetUniformF(uniform, 4, col);
269
}
270
}
271
272
static void SetColorUniform3iAlpha(GLRenderManager *render, GLint *uniform, u32 color, u8 alpha) {
273
const int col[4] = {
274
(int)((color & 0xFF) >> 0),
275
(int)((color & 0xFF00) >> 8),
276
(int)((color & 0xFF0000) >> 16),
277
(int)alpha,
278
};
279
render->SetUniformI(uniform, 4, col);
280
}
281
282
static void SetColorUniform3ExtraFloat(GLRenderManager *render, GLint *uniform, u32 color, float extra) {
283
const float col[4] = {
284
((color & 0xFF)) / 255.0f,
285
((color & 0xFF00) >> 8) / 255.0f,
286
((color & 0xFF0000) >> 16) / 255.0f,
287
extra
288
};
289
render->SetUniformF(uniform, 4, col);
290
}
291
292
static void SetFloat24Uniform3(GLRenderManager *render, GLint *uniform, const uint32_t data[3]) {
293
float f[4];
294
ExpandFloat24x3ToFloat4(f, data);
295
render->SetUniformF(uniform, 3, f);
296
}
297
298
static void SetFloat24Uniform3Normalized(GLRenderManager *render, GLint *uniform, const uint32_t data[3]) {
299
float f[4];
300
ExpandFloat24x3ToFloat4AndNormalize(f, data);
301
render->SetUniformF(uniform, 3, f);
302
}
303
304
static void SetFloatUniform4(GLRenderManager *render, GLint *uniform, float data[4]) {
305
render->SetUniformF(uniform, 4, data);
306
}
307
308
static void SetMatrix4x3(GLRenderManager *render, GLint *uniform, const float *m4x3) {
309
float m4x4[16];
310
ConvertMatrix4x3To4x4Transposed(m4x4, m4x3);
311
render->SetUniformM4x4(uniform, m4x4);
312
}
313
314
static inline void ScaleProjMatrix(Matrix4x4 &in, bool useBufferedRendering) {
315
float yOffset = gstate_c.vpYOffset;
316
if (!useBufferedRendering) {
317
// GL upside down is a pain as usual.
318
yOffset = -yOffset;
319
}
320
const Vec3 trans(gstate_c.vpXOffset, yOffset, gstate_c.vpZOffset);
321
const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale);
322
in.translateAndScale(trans, scale);
323
}
324
325
static inline void FlipProjMatrix(Matrix4x4 &in, bool useBufferedRendering) {
326
327
const bool invertedY = useBufferedRendering ? (gstate_c.vpHeight < 0) : (gstate_c.vpHeight > 0);
328
if (invertedY) {
329
in[1] = -in[1];
330
in[5] = -in[5];
331
in[9] = -in[9];
332
in[13] = -in[13];
333
}
334
const bool invertedX = gstate_c.vpWidth < 0;
335
if (invertedX) {
336
in[0] = -in[0];
337
in[4] = -in[4];
338
in[8] = -in[8];
339
in[12] = -in[12];
340
}
341
}
342
343
static inline bool GuessVRDrawingHUD(bool is2D, bool flatScreen) {
344
345
bool hud = true;
346
//HUD can be disabled in settings
347
if (!g_Config.bRescaleHUD) hud = false;
348
//HUD cannot be rendered in flatscreen
349
else if (flatScreen) hud = false;
350
//HUD has to be 2D
351
else if (!is2D) hud = false;
352
//HUD has to be blended
353
else if (!gstate.isAlphaBlendEnabled()) hud = false;
354
//HUD cannot be rendered with clear color mask
355
else if (gstate.isClearModeColorMask()) hud = false;
356
//HUD cannot be rendered with depth color mask
357
else if (gstate.isClearModeDepthMask()) hud = false;
358
//HUD texture has to contain alpha channel
359
else if (!gstate.isTextureAlphaUsed()) hud = false;
360
//HUD texture cannot be in 5551 format
361
else if (gstate.getTextureFormat() == GETextureFormat::GE_TFMT_5551) hud = false;
362
//HUD texture cannot be in CLUT16 format
363
else if (gstate.getTextureFormat() == GETextureFormat::GE_TFMT_CLUT16) hud = false;
364
//HUD texture cannot be in CLUT32 format
365
else if (gstate.getTextureFormat() == GETextureFormat::GE_TFMT_CLUT32) hud = false;
366
//HUD cannot have full texture alpha
367
else if (gstate_c.textureFullAlpha && gstate.getTextureFormat() != GETextureFormat::GE_TFMT_CLUT4) hud = false;
368
//HUD must have full vertex alpha
369
else if (!gstate_c.vertexFullAlpha && gstate.getDepthTestFunction() == GE_COMP_NEVER) hud = false;
370
//HUD cannot render FB screenshot
371
else if (gstate_c.curTextureHeight % 68 <= 1) hud = false;
372
//HUD cannot be rendered with add function
373
else if (gstate.getTextureFunction() == GETexFunc::GE_TEXFUNC_ADD) hud = false;
374
//HUD cannot be rendered with replace function
375
else if (gstate.getTextureFunction() == GETexFunc::GE_TEXFUNC_REPLACE) hud = false;
376
//HUD cannot be rendered with full clear color mask
377
else if ((gstate.getClearModeColorMask() == 0xFFFFFF) && (gstate.getColorMask() == 0xFFFFFF)) hud = false;
378
379
return hud;
380
}
381
382
void LinkedShader::use(const ShaderID &VSID) const {
383
render_->BindProgram(program);
384
// Note that we no longer track attr masks here - we do it for the input layouts instead.
385
}
386
387
void LinkedShader::UpdateUniforms(const ShaderID &vsid, bool useBufferedRendering, const ShaderLanguageDesc &shaderLanguage) {
388
u64 dirty = dirtyUniforms & availableUniforms;
389
dirtyUniforms = 0;
390
391
// Analyze scene
392
bool is2D, flatScreen;
393
if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY)) {
394
is2D = Is2DVRObject(gstate.projMatrix, gstate.isModeThrough());
395
flatScreen = IsFlatVRScene();
396
}
397
398
if (!dirty)
399
return;
400
401
if (dirty & DIRTY_DEPAL) {
402
int indexMask = gstate.getClutIndexMask();
403
int indexShift = gstate.getClutIndexShift();
404
int indexOffset = gstate.getClutIndexStartPos() >> 4;
405
int format = gstate_c.depalFramebufferFormat;
406
uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);
407
// Poke in a bilinear filter flag in the top bit.
408
val |= gstate.isMagnifyFilteringEnabled() << 31;
409
render_->SetUniformUI1(&u_depal_mask_shift_off_fmt, val);
410
}
411
412
// Set HUD mode
413
if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY)) {
414
if (GuessVRDrawingHUD(is2D, flatScreen)) {
415
float aspect = 480.0f / 272.0f * (IsImmersiveVRMode() ? 0.5f : 1.0f);
416
render_->SetUniformF1(&u_scaleX, g_Config.fHeadUpDisplayScale * aspect);
417
render_->SetUniformF1(&u_scaleY, g_Config.fHeadUpDisplayScale);
418
} else {
419
render_->SetUniformF1(&u_scaleX, 1.0f);
420
render_->SetUniformF1(&u_scaleY, 1.0f);
421
}
422
}
423
424
// Update any dirty uniforms before we draw
425
if (dirty & DIRTY_PROJMATRIX) {
426
if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY)) {
427
Matrix4x4 vrProjection;
428
if (flatScreen || is2D) {
429
memcpy(&vrProjection, gstate.projMatrix, 16 * sizeof(float));
430
} else {
431
UpdateVRProjection(gstate.projMatrix, vrProjection.m);
432
}
433
UpdateVRParams(gstate.projMatrix);
434
435
FlipProjMatrix(vrProjection, useBufferedRendering);
436
ScaleProjMatrix(vrProjection, useBufferedRendering);
437
438
render_->SetUniformM4x4(&u_proj_lens, vrProjection.m);
439
}
440
441
Matrix4x4 flippedMatrix;
442
memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));
443
444
FlipProjMatrix(flippedMatrix, useBufferedRendering);
445
ScaleProjMatrix(flippedMatrix, useBufferedRendering);
446
447
render_->SetUniformM4x4(&u_proj, flippedMatrix.m);
448
render_->SetUniformF1(&u_rotation, useBufferedRendering ? 0 : (float)g_display.rotation);
449
}
450
if (dirty & DIRTY_PROJTHROUGHMATRIX) {
451
Matrix4x4 proj_through;
452
if (useBufferedRendering) {
453
proj_through.setOrtho(0.0f, gstate_c.curRTWidth, 0.0f, gstate_c.curRTHeight, 0.0f, 1.0f);
454
} else {
455
proj_through.setOrtho(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0.0f, 0.0f, 1.0f);
456
}
457
render_->SetUniformM4x4(&u_proj_through, proj_through.getReadPtr());
458
}
459
if (dirty & DIRTY_TEXENV) {
460
SetColorUniform3(render_, &u_texenv, gstate.texenvcolor);
461
}
462
if (dirty & DIRTY_TEX_ALPHA_MUL) {
463
bool doTextureAlpha = gstate.isTextureAlphaUsed();
464
if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) {
465
doTextureAlpha = false;
466
}
467
float noAlphaMul[2] = { doTextureAlpha ? 0.0f : 1.0f, gstate.isColorDoublingEnabled() ? 2.0f : 1.0f };
468
render_->SetUniformF(&u_texNoAlphaMul, 2, noAlphaMul);
469
}
470
if (dirty & DIRTY_ALPHACOLORREF) {
471
if (shaderLanguage.bitwiseOps) {
472
render_->SetUniformUI1(&u_alphacolorref, gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24));
473
} else {
474
SetColorUniform3Alpha255(render_, &u_alphacolorref, gstate.getColorTestRef(), gstate.getAlphaTestRef() & gstate.getAlphaTestMask());
475
}
476
}
477
if (dirty & DIRTY_ALPHACOLORMASK) {
478
render_->SetUniformUI1(&u_alphacolormask, gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24));
479
}
480
if (dirty & DIRTY_COLORWRITEMASK) {
481
render_->SetUniformUI1(&u_colorWriteMask, ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF)));
482
}
483
if (dirty & DIRTY_FOGCOLOR) {
484
SetColorUniform3(render_, &u_fogcolor, gstate.fogcolor);
485
if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY)) {
486
SetVRCompat(VR_COMPAT_FOG_COLOR, gstate.fogcolor);
487
}
488
}
489
if (dirty & DIRTY_FOGCOEF) {
490
float fogcoef[2] = {
491
getFloat24(gstate.fog1),
492
getFloat24(gstate.fog2),
493
};
494
// The PSP just ignores infnan here (ignoring IEEE), so take it down to a valid float.
495
// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988
496
if (my_isnanorinf(fogcoef[0])) {
497
// Not really sure what a sensible value might be, but let's try 64k.
498
fogcoef[0] = std::signbit(fogcoef[0]) ? -65535.0f : 65535.0f;
499
}
500
if (my_isnanorinf(fogcoef[1])) {
501
fogcoef[1] = std::signbit(fogcoef[1]) ? -65535.0f : 65535.0f;
502
}
503
render_->SetUniformF(&u_fogcoef, 2, fogcoef);
504
}
505
if (dirty & DIRTY_UVSCALEOFFSET) {
506
float widthFactor = 1.0f;
507
float heightFactor = 1.0f;
508
if (gstate_c.textureIsFramebuffer) {
509
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
510
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
511
const int w = gstate.getTextureWidth(0);
512
const int h = gstate.getTextureHeight(0);
513
widthFactor = (float)w * invW;
514
heightFactor = (float)h * invH;
515
}
516
float uvscaleoff[4];
517
if (gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE) {
518
// When we are generating UV coordinates through the bezier/spline, we need to apply the scaling.
519
// However, this is missing a check that we're not getting our UV:s supplied for us in the vertices.
520
uvscaleoff[0] = gstate_c.uv.uScale * widthFactor;
521
uvscaleoff[1] = gstate_c.uv.vScale * heightFactor;
522
uvscaleoff[2] = gstate_c.uv.uOff * widthFactor;
523
uvscaleoff[3] = gstate_c.uv.vOff * heightFactor;
524
} else {
525
uvscaleoff[0] = widthFactor;
526
uvscaleoff[1] = heightFactor;
527
uvscaleoff[2] = 0.0f;
528
uvscaleoff[3] = 0.0f;
529
}
530
render_->SetUniformF(&u_uvscaleoffset, 4, uvscaleoff);
531
}
532
533
if ((dirty & DIRTY_TEXCLAMP) && u_texclamp != -1) {
534
const float invW = 1.0f / (float)gstate_c.curTextureWidth;
535
const float invH = 1.0f / (float)gstate_c.curTextureHeight;
536
const int w = gstate.getTextureWidth(0);
537
const int h = gstate.getTextureHeight(0);
538
const float widthFactor = (float)w * invW;
539
const float heightFactor = (float)h * invH;
540
541
// First wrap xy, then half texel xy (for clamp.)
542
const float texclamp[4] = {
543
widthFactor,
544
heightFactor,
545
invW * 0.5f,
546
invH * 0.5f,
547
};
548
const float texclampoff[2] = {
549
gstate_c.curTextureXOffset * invW,
550
gstate_c.curTextureYOffset * invH,
551
};
552
render_->SetUniformF(&u_texclamp, 4, texclamp);
553
if (u_texclampoff != -1) {
554
render_->SetUniformF(&u_texclampoff, 2, texclampoff);
555
}
556
}
557
558
if ((dirty & DIRTY_MIPBIAS) && u_mipBias != -1) {
559
float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f);
560
mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1);
561
562
render_->SetUniformF(&u_mipBias, 1, &mipBias);
563
}
564
565
// Transform
566
if (dirty & DIRTY_WORLDMATRIX) {
567
SetMatrix4x3(render_, &u_world, gstate.worldMatrix);
568
}
569
if (dirty & DIRTY_VIEWMATRIX) {
570
if (gstate_c.Use(GPU_USE_VIRTUAL_REALITY)) {
571
float leftEyeView[16];
572
float rightEyeView[16];
573
ConvertMatrix4x3To4x4Transposed(leftEyeView, gstate.viewMatrix);
574
ConvertMatrix4x3To4x4Transposed(rightEyeView, gstate.viewMatrix);
575
if (!is2D) {
576
UpdateVRView(leftEyeView, rightEyeView);
577
}
578
render_->SetUniformM4x4Stereo("u_view", &u_view, leftEyeView, rightEyeView);
579
} else {
580
SetMatrix4x3(render_, &u_view, gstate.viewMatrix);
581
}
582
}
583
if (dirty & DIRTY_TEXMATRIX) {
584
SetMatrix4x3(render_, &u_texmtx, gstate.tgenMatrix);
585
}
586
if (dirty & DIRTY_DEPTHRANGE) {
587
// Since depth is [-1, 1] mapping to [minz, maxz], this is easyish.
588
float vpZScale = gstate.getViewportZScale();
589
float vpZCenter = gstate.getViewportZCenter();
590
591
// These are just the reverse of the formulas in GPUStateUtils.
592
float halfActualZRange = InfToZero(gstate_c.vpDepthScale != 0.0f ? vpZScale / gstate_c.vpDepthScale : 0.0f);
593
float inverseDepthScale = InfToZero(gstate_c.vpDepthScale != 0.0f ? 1.0f / gstate_c.vpDepthScale : 0.0f);
594
float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;
595
float viewZScale = halfActualZRange;
596
float viewZCenter = minz + halfActualZRange;
597
598
if (!gstate_c.Use(GPU_USE_ACCURATE_DEPTH)) {
599
viewZScale = vpZScale;
600
viewZCenter = vpZCenter;
601
}
602
603
float data[4] = { viewZScale, viewZCenter, gstate_c.vpZOffset, inverseDepthScale };
604
SetFloatUniform4(render_, &u_depthRange, data);
605
}
606
if (dirty & DIRTY_CULLRANGE) {
607
float minValues[4], maxValues[4];
608
CalcCullRange(minValues, maxValues, !useBufferedRendering, true);
609
SetFloatUniform4(render_, &u_cullRangeMin, minValues);
610
SetFloatUniform4(render_, &u_cullRangeMax, maxValues);
611
}
612
613
if (dirty & DIRTY_STENCILREPLACEVALUE) {
614
float f = (float)gstate.getStencilTestRef() * (1.0f / 255.0f);
615
render_->SetUniformF(&u_stencilReplaceValue, 1, &f);
616
}
617
float bonetemp[16];
618
for (int i = 0; i < numBones; i++) {
619
if (dirty & (DIRTY_BONEMATRIX0 << i)) {
620
ConvertMatrix4x3To4x4Transposed(bonetemp, gstate.boneMatrix + 12 * i);
621
render_->SetUniformM4x4(&u_bone[i], bonetemp);
622
}
623
}
624
625
if (dirty & DIRTY_SHADERBLEND) {
626
if (u_blendFixA != -1) {
627
SetColorUniform3(render_, &u_blendFixA, gstate.getFixA());
628
}
629
if (u_blendFixB != -1) {
630
SetColorUniform3(render_, &u_blendFixB, gstate.getFixB());
631
}
632
633
const float fbotexSize[2] = {
634
1.0f / (float)gstate_c.curRTRenderWidth,
635
1.0f / (float)gstate_c.curRTRenderHeight,
636
};
637
if (u_fbotexSize != -1) {
638
render_->SetUniformF(&u_fbotexSize, 2, fbotexSize);
639
}
640
}
641
642
// Lighting
643
if (dirty & DIRTY_LIGHT_CONTROL) {
644
render_->SetUniformUI1(&u_lightControl, PackLightControlBits());
645
}
646
if (dirty & DIRTY_AMBIENT) {
647
SetColorUniform3Alpha(render_, &u_ambient, gstate.ambientcolor, gstate.getAmbientA());
648
}
649
if (dirty & DIRTY_MATAMBIENTALPHA) {
650
SetColorUniform3Alpha(render_, &u_matambientalpha, gstate.materialambient, gstate.getMaterialAmbientA());
651
}
652
if (dirty & DIRTY_MATDIFFUSE) {
653
SetColorUniform3(render_, &u_matdiffuse, gstate.materialdiffuse);
654
}
655
if (dirty & DIRTY_MATEMISSIVE) {
656
SetColorUniform3(render_, &u_matemissive, gstate.materialemissive);
657
}
658
if (dirty & DIRTY_MATSPECULAR) {
659
SetColorUniform3ExtraFloat(render_, &u_matspecular, gstate.materialspecular, getFloat24(gstate.materialspecularcoef));
660
}
661
662
for (int i = 0; i < 4; i++) {
663
if (dirty & (DIRTY_LIGHT0 << i)) {
664
if (gstate.isDirectionalLight(i)) {
665
// Prenormalize for cheaper calculations in shader
666
SetFloat24Uniform3Normalized(render_, &u_lightpos[i], &gstate.lpos[i * 3]);
667
} else {
668
SetFloat24Uniform3(render_, &u_lightpos[i], &gstate.lpos[i * 3]);
669
}
670
if (u_lightdir[i] != -1) SetFloat24Uniform3Normalized(render_, &u_lightdir[i], &gstate.ldir[i * 3]);
671
if (u_lightatt[i] != -1) SetFloat24Uniform3(render_, &u_lightatt[i], &gstate.latt[i * 3]);
672
if (u_lightangle_spotCoef[i] != -1) {
673
float lightangle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };
674
SetFloatUniform2(render_, &u_lightangle_spotCoef[i], lightangle_spotCoef);
675
}
676
if (u_lightambient[i] != -1) SetColorUniform3(render_, &u_lightambient[i], gstate.lcolor[i * 3]);
677
if (u_lightdiffuse[i] != -1) SetColorUniform3(render_, &u_lightdiffuse[i], gstate.lcolor[i * 3 + 1]);
678
if (u_lightspecular[i] != -1) SetColorUniform3(render_, &u_lightspecular[i], gstate.lcolor[i * 3 + 2]);
679
}
680
}
681
682
if (dirty & DIRTY_BEZIERSPLINE) {
683
if (u_spline_counts != -1) {
684
render_->SetUniformI1(&u_spline_counts, gstate_c.spline_num_points_u);
685
}
686
}
687
}
688
689
static constexpr size_t CODE_BUFFER_SIZE = 32768;
690
691
ShaderManagerGLES::ShaderManagerGLES(Draw::DrawContext *draw)
692
: ShaderManagerCommon(draw), fsCache_(16), vsCache_(16) {
693
render_ = (GLRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
694
codeBuffer_ = new char[CODE_BUFFER_SIZE];
695
lastFSID_.set_invalid();
696
lastVSID_.set_invalid();
697
}
698
699
ShaderManagerGLES::~ShaderManagerGLES() {
700
delete [] codeBuffer_;
701
}
702
703
void ShaderManagerGLES::Clear() {
704
DirtyLastShader();
705
for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
706
iter->ls->Delete();
707
}
708
fsCache_.Iterate([&](const FShaderID &key, Shader *shader) {
709
delete shader;
710
});
711
vsCache_.Iterate([&](const VShaderID &key, Shader *shader) {
712
delete shader;
713
});
714
linkedShaderCache_.clear();
715
fsCache_.Clear();
716
vsCache_.Clear();
717
DirtyLastShader();
718
}
719
720
void ShaderManagerGLES::ClearShaders() {
721
// TODO: Recreate all from the diskcache when we come back.
722
Clear();
723
}
724
725
void ShaderManagerGLES::DeviceLost() {
726
Clear();
727
render_ = nullptr;
728
draw_ = nullptr;
729
}
730
731
void ShaderManagerGLES::DeviceRestore(Draw::DrawContext *draw) {
732
render_ = (GLRenderManager *)draw->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
733
draw_ = draw;
734
}
735
736
void ShaderManagerGLES::DirtyLastShader() {
737
// Forget the last shader ID
738
lastFSID_.set_invalid();
739
lastVSID_.set_invalid();
740
gstate_c.Dirty(DIRTY_ALL_UNIFORMS | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE);
741
shaderSwitchDirtyUniforms_ = 0;
742
lastShader_ = nullptr;
743
lastVShaderSame_ = false;
744
}
745
746
// Can only fail by failing to generate the code (bad FSID).
747
// Any actual failures driver-side happens later in the render manager.
748
Shader *ShaderManagerGLES::CompileFragmentShader(FShaderID FSID) {
749
uint64_t uniformMask;
750
std::string errorString;
751
FragmentShaderFlags flags;
752
if (!GenerateFragmentShader(FSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &uniformMask, &flags, &errorString)) {
753
ERROR_LOG_REPORT(Log::G3D, "FS shader gen error: %s (%s: %08x:%08x)", errorString.c_str(), "GLES", FSID.d[0], FSID.d[1]);
754
return nullptr;
755
}
756
_assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "FS length error: %d", (int)strlen(codeBuffer_));
757
std::string desc = FragmentShaderDesc(FSID);
758
ShaderDescGLES params{ GL_FRAGMENT_SHADER, 0, uniformMask };
759
return new Shader(render_, codeBuffer_, desc, params);
760
}
761
762
// Can only fail by failing to generate the code (bad VSID).
763
// Any actual failures driver-side happens later in the render manager.
764
Shader *ShaderManagerGLES::CompileVertexShader(VShaderID VSID) {
765
bool useHWTransform = VSID.Bit(VS_BIT_USE_HW_TRANSFORM);
766
uint32_t attrMask;
767
uint64_t uniformMask;
768
std::string errorString;
769
VertexShaderFlags flags;
770
if (!GenerateVertexShader(VSID, codeBuffer_, draw_->GetShaderLanguageDesc(), draw_->GetBugs(), &attrMask, &uniformMask, &flags, &errorString)) {
771
ERROR_LOG_REPORT(Log::G3D, "VS shader gen error: %s (%s: %08x:%08x)", errorString.c_str(), "GLES", VSID.d[0], VSID.d[1]);
772
return nullptr;
773
}
774
_assert_msg_(strlen(codeBuffer_) < CODE_BUFFER_SIZE, "VS length error: %d", (int)strlen(codeBuffer_));
775
std::string desc = VertexShaderDesc(VSID);
776
ShaderDescGLES params{ GL_VERTEX_SHADER, attrMask, uniformMask };
777
params.useHWTransform = useHWTransform;
778
return new Shader(render_, codeBuffer_, desc, params);
779
}
780
781
Shader *ShaderManagerGLES::ApplyVertexShader(bool useHWTransform, bool useHWTessellation, VertexDecoder *decoder, bool weightsAsFloat, bool useSkinInDecode, VShaderID *VSID) {
782
if (gstate_c.IsDirty(DIRTY_VERTEXSHADER_STATE)) {
783
gstate_c.Clean(DIRTY_VERTEXSHADER_STATE);
784
ComputeVertexShaderID(VSID, decoder, useHWTransform, useHWTessellation, weightsAsFloat, useSkinInDecode);
785
} else {
786
*VSID = lastVSID_;
787
}
788
789
if (lastShader_ != nullptr && *VSID == lastVSID_) {
790
lastVShaderSame_ = true;
791
return lastShader_->vs_; // Already all set.
792
} else {
793
lastVShaderSame_ = false;
794
}
795
lastVSID_ = *VSID;
796
797
Shader *vs;
798
if (vsCache_.Get(*VSID, &vs)) {
799
return vs;
800
}
801
802
// Vertex shader not in cache. Let's compile it.
803
vs = CompileVertexShader(*VSID);
804
if (!vs) {
805
ERROR_LOG(Log::G3D, "Vertex shader generation failed, falling back to software transform");
806
if (!g_Config.bHideSlowWarnings) {
807
auto gr = GetI18NCategory(I18NCat::GRAPHICS);
808
g_OSD.Show(OSDType::MESSAGE_ERROR, gr->T("hardware transform error - falling back to software"), 2.5f);
809
}
810
811
// TODO: Look for existing shader with the appropriate ID, use that instead of generating a new one - however, need to make sure
812
// that that shader ID is not used when computing the linked shader ID below, because then IDs won't match
813
// next time and we'll do this over and over...
814
815
// Can still work with software transform.
816
VShaderID vsidTemp;
817
ComputeVertexShaderID(&vsidTemp, decoder, false, false, weightsAsFloat, true);
818
vs = CompileVertexShader(vsidTemp);
819
}
820
821
vsCache_.Insert(*VSID, vs);
822
return vs;
823
}
824
825
LinkedShader *ShaderManagerGLES::ApplyFragmentShader(VShaderID VSID, Shader *vs, const ComputedPipelineState &pipelineState, bool useBufferedRendering) {
826
uint64_t dirty = gstate_c.GetDirtyUniforms();
827
if (dirty) {
828
if (lastShader_)
829
lastShader_->dirtyUniforms |= dirty;
830
shaderSwitchDirtyUniforms_ |= dirty;
831
gstate_c.CleanUniforms();
832
}
833
834
FShaderID FSID;
835
if (gstate_c.IsDirty(DIRTY_FRAGMENTSHADER_STATE)) {
836
gstate_c.Clean(DIRTY_FRAGMENTSHADER_STATE);
837
ComputeFragmentShaderID(&FSID, pipelineState, draw_->GetBugs());
838
} else {
839
FSID = lastFSID_;
840
}
841
842
if (lastVShaderSame_ && FSID == lastFSID_) {
843
lastShader_->UpdateUniforms(VSID, useBufferedRendering, draw_->GetShaderLanguageDesc());
844
return lastShader_;
845
}
846
847
lastFSID_ = FSID;
848
849
Shader *fs;
850
if (!fsCache_.Get(FSID, &fs)) {
851
// Fragment shader not in cache. Let's compile it.
852
// Can't really tell if we succeeded since the compile is on the GPU thread later.
853
// Could fail to generate, in which case we're kinda screwed.
854
fs = CompileFragmentShader(FSID);
855
if (!fs) {
856
ERROR_LOG(Log::G3D, "Failed to generate fragment shader with ID %08x:%08x", FSID.d[0], FSID.d[1]);
857
// Still insert it so we don't end up spamming generation.
858
}
859
fsCache_.Insert(FSID, fs);
860
}
861
862
// Okay, we have both shaders. Let's see if there's a linked one.
863
LinkedShader *ls = nullptr;
864
865
u64 switchDirty = shaderSwitchDirtyUniforms_;
866
for (auto iter = linkedShaderCache_.begin(); iter != linkedShaderCache_.end(); ++iter) {
867
// Deferred dirtying! Let's see if we can make this even more clever later.
868
iter->ls->dirtyUniforms |= switchDirty;
869
870
if (iter->vs == vs && iter->fs == fs) {
871
ls = iter->ls;
872
}
873
}
874
shaderSwitchDirtyUniforms_ = 0;
875
876
if (ls == nullptr) {
877
_dbg_assert_(FSID.Bit(FS_BIT_LMODE) == VSID.Bit(VS_BIT_LMODE));
878
_dbg_assert_(FSID.Bit(FS_BIT_FLATSHADE) == VSID.Bit(VS_BIT_FLATSHADE));
879
880
if (vs == nullptr || fs == nullptr) {
881
// Can't draw. This shouldn't really happen (but can happen if fragment shader generation fails)
882
return nullptr;
883
}
884
885
// Check if we can link these.
886
ls = new LinkedShader(render_, VSID, vs, FSID, fs, vs->UseHWTransform());
887
ls->use(VSID);
888
const LinkedShaderCacheEntry entry(vs, fs, ls);
889
linkedShaderCache_.push_back(entry);
890
} else {
891
ls->use(VSID);
892
}
893
ls->UpdateUniforms(VSID, useBufferedRendering, draw_->GetShaderLanguageDesc());
894
895
lastShader_ = ls;
896
return ls;
897
}
898
899
std::string Shader::GetShaderString(DebugShaderStringType type, ShaderID id) const {
900
switch (type) {
901
case SHADER_STRING_SOURCE_CODE:
902
return source_;
903
case SHADER_STRING_SHORT_DESC:
904
return isFragment_ ? FragmentShaderDesc(FShaderID(id)) : VertexShaderDesc(VShaderID(id));
905
default:
906
return "N/A";
907
}
908
}
909
910
std::vector<std::string> ShaderManagerGLES::DebugGetShaderIDs(DebugShaderType type) {
911
std::string id;
912
std::vector<std::string> ids;
913
switch (type) {
914
case SHADER_TYPE_VERTEX:
915
vsCache_.Iterate([&](const VShaderID &id, Shader *shader) {
916
std::string idstr;
917
id.ToString(&idstr);
918
ids.push_back(idstr);
919
});
920
break;
921
case SHADER_TYPE_FRAGMENT:
922
fsCache_.Iterate([&](const FShaderID &id, Shader *shader) {
923
std::string idstr;
924
id.ToString(&idstr);
925
ids.push_back(idstr);
926
});
927
break;
928
default:
929
break;
930
}
931
return ids;
932
}
933
934
std::string ShaderManagerGLES::DebugGetShaderString(std::string id, DebugShaderType type, DebugShaderStringType stringType) {
935
ShaderID shaderId;
936
shaderId.FromString(id);
937
switch (type) {
938
case SHADER_TYPE_VERTEX:
939
{
940
Shader *vs;
941
if (vsCache_.Get(VShaderID(shaderId), &vs) && vs) {
942
return vs->GetShaderString(stringType, shaderId);
943
} else {
944
return "";
945
}
946
}
947
948
case SHADER_TYPE_FRAGMENT:
949
{
950
Shader *fs;
951
if (fsCache_.Get(FShaderID(shaderId), &fs) && fs) {
952
return fs->GetShaderString(stringType, shaderId);
953
} else {
954
return "";
955
}
956
}
957
default:
958
return "N/A";
959
}
960
}
961
962
// Shader pseudo-cache.
963
//
964
// We simply store the IDs of the shaders used during gameplay. On next startup of
965
// the same game, we simply compile all the shaders from the start, so we don't have to
966
// compile them on the fly later. Ideally we would store the actual compiled shaders
967
// rather than just their IDs, but OpenGL does not support this, except for a few obscure
968
// vendor-specific extensions.
969
//
970
// If things like GPU supported features have changed since the last time, we discard the cache
971
// as sometimes these features might have an effect on the ID bits.
972
973
enum class CacheDetectFlags {
974
EQUAL_DEPTH = 1,
975
};
976
977
#define CACHE_HEADER_MAGIC 0x83277592
978
#define CACHE_VERSION 36
979
980
struct CacheHeader {
981
uint32_t magic;
982
uint32_t version;
983
uint32_t useFlags;
984
uint32_t detectFlags;
985
int numVertexShaders;
986
int numFragmentShaders;
987
int numLinkedPrograms;
988
};
989
990
bool ShaderManagerGLES::LoadCacheFlags(File::IOFile &f, DrawEngineGLES *drawEngine) {
991
CacheHeader header;
992
if (!f.ReadArray(&header, 1)) {
993
return false;
994
}
995
if (header.magic != CACHE_HEADER_MAGIC || header.version != CACHE_VERSION) {
996
return false;
997
}
998
999
if ((header.detectFlags & (uint32_t)CacheDetectFlags::EQUAL_DEPTH) != 0) {
1000
drawEngine->SetEverUsedExactEqualDepth(true);
1001
}
1002
1003
return true;
1004
}
1005
1006
bool ShaderManagerGLES::LoadCache(File::IOFile &f) {
1007
// TODO: Get rid of this struct.
1008
struct {
1009
std::vector<VShaderID> vert;
1010
std::vector<FShaderID> frag;
1011
std::vector<std::pair<VShaderID, FShaderID>> link;
1012
1013
size_t vertPos = 0;
1014
size_t fragPos = 0;
1015
size_t linkPos = 0;
1016
double start;
1017
1018
void Clear() {
1019
vert.clear();
1020
frag.clear();
1021
link.clear();
1022
vertPos = 0;
1023
fragPos = 0;
1024
linkPos = 0;
1025
}
1026
1027
bool Done() const {
1028
return vertPos >= vert.size() && fragPos >= frag.size() && linkPos >= link.size();
1029
}
1030
} diskCachePending_;
1031
1032
u64 sz = f.GetSize();
1033
f.Seek(0, SEEK_SET);
1034
CacheHeader header;
1035
if (!f.ReadArray(&header, 1)) {
1036
return false;
1037
}
1038
// We don't recheck the version, done in LoadCacheFlags().
1039
if (header.useFlags != gstate_c.GetUseFlags()) {
1040
return false;
1041
}
1042
diskCachePending_.start = time_now_d();
1043
diskCachePending_.Clear();
1044
1045
// Sanity check the file contents
1046
if (header.numFragmentShaders > 1000 || header.numVertexShaders > 1000 || header.numLinkedPrograms > 1000) {
1047
ERROR_LOG(Log::G3D, "Corrupt shader cache file header, aborting.");
1048
return false;
1049
}
1050
1051
// Also make sure the size makes sense, in case there's corruption.
1052
u64 expectedSize = sizeof(header);
1053
expectedSize += header.numVertexShaders * sizeof(VShaderID);
1054
expectedSize += header.numFragmentShaders * sizeof(FShaderID);
1055
expectedSize += header.numLinkedPrograms * (sizeof(VShaderID) + sizeof(FShaderID));
1056
if (sz != expectedSize) {
1057
ERROR_LOG(Log::G3D, "Shader cache file is wrong size: %lld instead of %lld", sz, expectedSize);
1058
return false;
1059
}
1060
1061
diskCachePending_.vert.resize(header.numVertexShaders);
1062
if (!f.ReadArray(&diskCachePending_.vert[0], header.numVertexShaders)) {
1063
diskCachePending_.vert.clear();
1064
return false;
1065
}
1066
1067
diskCachePending_.frag.resize(header.numFragmentShaders);
1068
if (!f.ReadArray(&diskCachePending_.frag[0], header.numFragmentShaders)) {
1069
diskCachePending_.vert.clear();
1070
diskCachePending_.frag.clear();
1071
return false;
1072
}
1073
1074
for (int i = 0; i < header.numLinkedPrograms; i++) {
1075
VShaderID vsid;
1076
FShaderID fsid;
1077
if (!f.ReadArray(&vsid, 1)) {
1078
return false;
1079
}
1080
if (!f.ReadArray(&fsid, 1)) {
1081
return false;
1082
}
1083
diskCachePending_.link.emplace_back(vsid, fsid);
1084
}
1085
1086
auto &pending = diskCachePending_;
1087
if (pending.Done()) {
1088
return true;
1089
}
1090
1091
PSP_SetLoading("Compiling shaders...");
1092
1093
double start = time_now_d();
1094
1095
for (size_t &i = pending.vertPos; i < pending.vert.size(); i++) {
1096
const VShaderID &id = pending.vert[i];
1097
if (!vsCache_.ContainsKey(id)) {
1098
if (id.Bit(VS_BIT_IS_THROUGH) && id.Bit(VS_BIT_USE_HW_TRANSFORM)) {
1099
// Clearly corrupt, bailing.
1100
ERROR_LOG_REPORT(Log::G3D, "Corrupt shader cache: Both IS_THROUGH and USE_HW_TRANSFORM set.");
1101
pending.Clear();
1102
return false;
1103
}
1104
1105
Shader *vs = CompileVertexShader(id);
1106
if (!vs) {
1107
// Give up on using the cache, just bail. We can't safely create the fallback shaders here
1108
// without trying to deduce the vertType from the VSID.
1109
ERROR_LOG(Log::G3D, "Failed to compile a vertex shader loading from cache. Skipping rest of shader cache.");
1110
pending.Clear();
1111
return false;
1112
}
1113
vsCache_.Insert(id, vs);
1114
} else {
1115
WARN_LOG(Log::G3D, "Duplicate vertex shader found in GL shader cache, ignoring");
1116
}
1117
}
1118
1119
for (size_t &i = pending.fragPos; i < pending.frag.size(); i++) {
1120
const FShaderID &id = pending.frag[i];
1121
if (!fsCache_.ContainsKey(id)) {
1122
Shader *fs = CompileFragmentShader(id);
1123
if (!fs) {
1124
// Give up on using the cache - something went wrong.
1125
// We'll still keep the shaders we generated so far around.
1126
ERROR_LOG(Log::G3D, "Failed to compile a fragment shader loading from cache. Skipping rest of shader cache.");
1127
pending.Clear();
1128
return false;
1129
}
1130
fsCache_.Insert(id, fs);
1131
} else {
1132
WARN_LOG(Log::G3D, "Duplicate fragment shader found in GL shader cache, ignoring");
1133
}
1134
}
1135
1136
linkedShaderCache_.reserve(pending.link.size() - pending.linkPos);
1137
for (size_t &i = pending.linkPos; i < pending.link.size(); i++) {
1138
const VShaderID &vsid = pending.link[i].first;
1139
const FShaderID &fsid = pending.link[i].second;
1140
Shader *vs = nullptr;
1141
Shader *fs = nullptr;
1142
vsCache_.Get(vsid, &vs);
1143
fsCache_.Get(fsid, &fs);
1144
if (vs && fs) {
1145
LinkedShader *ls = new LinkedShader(render_, vsid, vs, fsid, fs, vs->UseHWTransform(), true);
1146
LinkedShaderCacheEntry entry(vs, fs, ls);
1147
linkedShaderCache_.push_back(entry);
1148
}
1149
}
1150
1151
// Okay, finally done. Time to report status.
1152
double finish = time_now_d();
1153
1154
NOTICE_LOG(Log::G3D, "Precompile: Compiled and linked %d programs (%d vertex, %d fragment) in %0.1f milliseconds", (int)pending.link.size(), (int)pending.vert.size(), (int)pending.frag.size(), 1000 * (finish - pending.start));
1155
pending.Clear();
1156
1157
return true;
1158
}
1159
1160
void ShaderManagerGLES::SaveCache(const Path &filename, DrawEngineGLES *drawEngine) {
1161
if (linkedShaderCache_.empty()) {
1162
return;
1163
}
1164
INFO_LOG(Log::G3D, "Saving the shader cache to '%s'", filename.c_str());
1165
FILE *f = File::OpenCFile(filename, "wb");
1166
if (!f) {
1167
// Can't save, give up for now.
1168
return;
1169
}
1170
CacheHeader header;
1171
header.magic = CACHE_HEADER_MAGIC;
1172
header.version = CACHE_VERSION;
1173
header.detectFlags = 0;
1174
if (drawEngine->EverUsedExactEqualDepth())
1175
header.detectFlags |= (uint32_t)CacheDetectFlags::EQUAL_DEPTH;
1176
header.useFlags = gstate_c.GetUseFlags();
1177
header.numVertexShaders = GetNumVertexShaders();
1178
header.numFragmentShaders = GetNumFragmentShaders();
1179
header.numLinkedPrograms = GetNumPrograms();
1180
fwrite(&header, 1, sizeof(header), f);
1181
vsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
1182
fwrite(&id, 1, sizeof(id), f);
1183
});
1184
fsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
1185
fwrite(&id, 1, sizeof(id), f);
1186
});
1187
for (const auto &iter : linkedShaderCache_) {
1188
ShaderID vsid, fsid;
1189
vsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
1190
if (iter.vs == shader)
1191
vsid = id;
1192
});
1193
fsCache_.Iterate([&](const ShaderID &id, Shader *shader) {
1194
if (iter.fs == shader)
1195
fsid = id;
1196
});
1197
fwrite(&vsid, 1, sizeof(vsid), f);
1198
fwrite(&fsid, 1, sizeof(fsid), f);
1199
}
1200
fclose(f);
1201
}
1202
1203