CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/GPUStateUtils.cpp
Views: 1401
1
// Copyright (c) 2015- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <algorithm>
19
#include <limits>
20
21
#include "Common/System/Display.h"
22
23
#include "Common/StringUtils.h"
24
#include "Core/Config.h"
25
#include "Core/ConfigValues.h"
26
#include "Core/System.h"
27
28
#include "GPU/ge_constants.h"
29
#include "GPU/GPUState.h"
30
#include "GPU/Math3D.h"
31
#include "GPU/Common/FramebufferManagerCommon.h"
32
#include "GPU/Common/PresentationCommon.h"
33
#include "GPU/Common/ShaderId.h"
34
#include "GPU/Common/VertexDecoderCommon.h"
35
36
#include "GPU/Common/GPUStateUtils.h"
37
38
bool IsStencilTestOutputDisabled() {
39
// The mask applies on all stencil ops.
40
if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF) {
41
if (gstate_c.framebufFormat == GE_FORMAT_565) {
42
return true;
43
}
44
return gstate.getStencilOpZPass() == GE_STENCILOP_KEEP && gstate.getStencilOpZFail() == GE_STENCILOP_KEEP && gstate.getStencilOpSFail() == GE_STENCILOP_KEEP;
45
}
46
return true;
47
}
48
49
bool NeedsTestDiscard() {
50
// We assume this is called only when enabled and not trivially true (may also be for color testing.)
51
if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF)
52
return true;
53
if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled())
54
return true;
55
if (!gstate.isAlphaBlendEnabled())
56
return true;
57
if (gstate.getBlendFuncA() != GE_SRCBLEND_SRCALPHA && gstate.getBlendFuncA() != GE_SRCBLEND_DOUBLESRCALPHA)
58
return true;
59
// GE_DSTBLEND_DOUBLEINVSRCALPHA is actually inverse double src alpha, and doubling zero is still zero.
60
if (gstate.getBlendFuncB() != GE_DSTBLEND_INVSRCALPHA && gstate.getBlendFuncB() != GE_DSTBLEND_DOUBLEINVSRCALPHA) {
61
if (gstate.getBlendFuncB() != GE_DSTBLEND_FIXB || gstate.getFixB() != 0xFFFFFF)
62
return true;
63
}
64
if (gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_ADD && gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE)
65
return true;
66
if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY)
67
return true;
68
69
return false;
70
}
71
72
bool IsAlphaTestTriviallyTrue() {
73
switch (gstate.getAlphaTestFunction()) {
74
case GE_COMP_NEVER:
75
return false;
76
77
case GE_COMP_ALWAYS:
78
return true;
79
80
case GE_COMP_GEQUAL:
81
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
82
return true; // If alpha is full, it doesn't matter what the ref value is.
83
return gstate.getAlphaTestRef() == 0;
84
85
// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.
86
// Speeds up Lumines by a LOT on PowerVR.
87
case GE_COMP_NOTEQUAL:
88
if (gstate.getAlphaTestRef() == 255) {
89
// Likely to be rare. Let's just skip the vertexFullAlpha optimization here instead of adding
90
// complicated code to discard the draw or whatnot.
91
return false;
92
}
93
// Fallthrough on purpose
94
95
case GE_COMP_GREATER:
96
{
97
// If the texture and vertex only use 1.0 alpha, then the ref value doesn't matter.
98
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
99
return true;
100
return gstate.getAlphaTestRef() == 0 && !NeedsTestDiscard();
101
}
102
103
case GE_COMP_LEQUAL:
104
return gstate.getAlphaTestRef() == 255;
105
106
case GE_COMP_EQUAL:
107
case GE_COMP_LESS:
108
return false;
109
110
default:
111
return false;
112
}
113
}
114
115
bool IsAlphaTestAgainstZero() {
116
return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF;
117
}
118
119
bool IsColorTestAgainstZero() {
120
return gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF;
121
}
122
123
bool IsColorTestTriviallyTrue() {
124
switch (gstate.getColorTestFunction()) {
125
case GE_COMP_NEVER:
126
return false;
127
128
case GE_COMP_ALWAYS:
129
return true;
130
131
case GE_COMP_EQUAL:
132
case GE_COMP_NOTEQUAL:
133
return false;
134
default:
135
return false;
136
}
137
}
138
139
bool IsDepthTestEffectivelyDisabled() {
140
if (!gstate.isDepthTestEnabled())
141
return true;
142
// We can ignore stencil, because ALWAYS and disabled choose the same stencil path.
143
if (gstate.getDepthTestFunction() != GE_COMP_ALWAYS)
144
return false;
145
return !gstate.isDepthWriteEnabled();
146
}
147
148
const bool nonAlphaSrcFactors[16] = {
149
true, // GE_SRCBLEND_DSTCOLOR,
150
true, // GE_SRCBLEND_INVDSTCOLOR,
151
false, // GE_SRCBLEND_SRCALPHA,
152
false, // GE_SRCBLEND_INVSRCALPHA,
153
true, // GE_SRCBLEND_DSTALPHA,
154
true, // GE_SRCBLEND_INVDSTALPHA,
155
false, // GE_SRCBLEND_DOUBLESRCALPHA,
156
false, // GE_SRCBLEND_DOUBLEINVSRCALPHA,
157
true, // GE_SRCBLEND_DOUBLEDSTALPHA,
158
true, // GE_SRCBLEND_DOUBLEINVDSTALPHA,
159
true, // GE_SRCBLEND_FIXA,
160
true,
161
true,
162
true,
163
true,
164
true,
165
};
166
167
const bool nonAlphaDestFactors[16] = {
168
true, // GE_DSTBLEND_SRCCOLOR,
169
true, // GE_DSTBLEND_INVSRCCOLOR,
170
false, // GE_DSTBLEND_SRCALPHA,
171
false, // GE_DSTBLEND_INVSRCALPHA,
172
true, // GE_DSTBLEND_DSTALPHA,
173
true, // GE_DSTBLEND_INVDSTALPHA,
174
false, // GE_DSTBLEND_DOUBLESRCALPHA,
175
false, // GE_DSTBLEND_DOUBLEINVSRCALPHA,
176
true, // GE_DSTBLEND_DOUBLEDSTALPHA,
177
true, // GE_DSTBLEND_DOUBLEINVDSTALPHA,
178
true, // GE_DSTBLEND_FIXB,
179
true,
180
true,
181
true,
182
true,
183
true,
184
};
185
186
ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) {
187
if (IsStencilTestOutputDisabled() || gstate.isModeClear()) {
188
return REPLACE_ALPHA_NO;
189
}
190
191
if (replaceBlend != REPLACE_BLEND_NO && replaceBlend != REPLACE_BLEND_READ_FRAMEBUFFER) {
192
if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {
193
return REPLACE_ALPHA_YES;
194
} else {
195
if (gstate_c.Use(GPU_USE_DUALSOURCE_BLEND)) {
196
return REPLACE_ALPHA_DUALSOURCE;
197
} else {
198
return REPLACE_ALPHA_NO;
199
}
200
}
201
}
202
203
if (replaceBlend == ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA) {
204
return REPLACE_ALPHA_NO; // irrelevant
205
}
206
207
return REPLACE_ALPHA_YES;
208
}
209
210
StencilValueType ReplaceAlphaWithStencilType() {
211
switch (gstate_c.framebufFormat) {
212
case GE_FORMAT_565:
213
// There's never a stencil value. Maybe the right alpha is 1?
214
return STENCIL_VALUE_ONE;
215
216
case GE_FORMAT_5551:
217
switch (gstate.getStencilOpZPass()) {
218
// Technically, this should only ever use zero/one.
219
case GE_STENCILOP_REPLACE:
220
return (gstate.getStencilTestRef() & 0x80) != 0 ? STENCIL_VALUE_ONE : STENCIL_VALUE_ZERO;
221
222
// Decrementing always zeros, since there's only one bit.
223
case GE_STENCILOP_DECR:
224
case GE_STENCILOP_ZERO:
225
return STENCIL_VALUE_ZERO;
226
227
// Incrementing always fills, since there's only one bit.
228
case GE_STENCILOP_INCR:
229
return STENCIL_VALUE_ONE;
230
231
case GE_STENCILOP_INVERT:
232
return STENCIL_VALUE_INVERT;
233
234
case GE_STENCILOP_KEEP:
235
return STENCIL_VALUE_KEEP;
236
}
237
break;
238
239
case GE_FORMAT_4444:
240
case GE_FORMAT_8888:
241
case GE_FORMAT_INVALID:
242
case GE_FORMAT_DEPTH16:
243
case GE_FORMAT_CLUT8:
244
switch (gstate.getStencilOpZPass()) {
245
case GE_STENCILOP_REPLACE:
246
// TODO: Could detect zero here and force ZERO - less uniform updates?
247
return STENCIL_VALUE_UNIFORM;
248
249
case GE_STENCILOP_ZERO:
250
return STENCIL_VALUE_ZERO;
251
252
case GE_STENCILOP_DECR:
253
return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8;
254
255
case GE_STENCILOP_INCR:
256
return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8;
257
258
case GE_STENCILOP_INVERT:
259
return STENCIL_VALUE_INVERT;
260
261
case GE_STENCILOP_KEEP:
262
return STENCIL_VALUE_KEEP;
263
}
264
break;
265
}
266
267
return STENCIL_VALUE_KEEP;
268
}
269
270
ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat) {
271
if (gstate_c.blueToAlpha) {
272
return REPLACE_BLEND_BLUE_TO_ALPHA;
273
}
274
275
if (!gstate.isAlphaBlendEnabled() || gstate.isModeClear()) {
276
return REPLACE_BLEND_NO;
277
}
278
279
GEBlendMode eq = gstate.getBlendEq();
280
// Let's get the non-factor modes out of the way first.
281
switch (eq) {
282
case GE_BLENDMODE_ABSDIFF:
283
return REPLACE_BLEND_READ_FRAMEBUFFER;
284
285
case GE_BLENDMODE_MIN:
286
case GE_BLENDMODE_MAX:
287
if (gstate_c.Use(GPU_USE_BLEND_MINMAX)) {
288
return REPLACE_BLEND_STANDARD;
289
} else {
290
return REPLACE_BLEND_READ_FRAMEBUFFER;
291
}
292
293
case GE_BLENDMODE_MUL_AND_ADD:
294
case GE_BLENDMODE_MUL_AND_SUBTRACT:
295
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
296
// Handled below.
297
break;
298
299
default:
300
// Other blend equations simply don't blend on hardware.
301
return REPLACE_BLEND_NO;
302
}
303
304
GEBlendSrcFactor funcA = gstate.getBlendFuncA();
305
GEBlendDstFactor funcB = gstate.getBlendFuncB();
306
307
switch (funcA) {
308
case GE_SRCBLEND_DOUBLESRCALPHA:
309
case GE_SRCBLEND_DOUBLEINVSRCALPHA:
310
// 2x alpha in the source function and not in the dest = source color doubling.
311
// Even dest alpha is safe, since we're moving the * 2.0 into the src color.
312
switch (funcB) {
313
case GE_DSTBLEND_SRCCOLOR:
314
case GE_DSTBLEND_INVSRCCOLOR:
315
// When inversing, alpha clamping isn't an issue.
316
if (funcA == GE_SRCBLEND_DOUBLEINVSRCALPHA)
317
return REPLACE_BLEND_2X_ALPHA;
318
// Can't double, we need the source color to be correct.
319
// Doubling only alpha would clamp the src alpha incorrectly.
320
return REPLACE_BLEND_READ_FRAMEBUFFER;
321
322
case GE_DSTBLEND_DOUBLEDSTALPHA:
323
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
324
if (bufferFormat == GE_FORMAT_565)
325
return REPLACE_BLEND_2X_ALPHA;
326
return REPLACE_BLEND_READ_FRAMEBUFFER;
327
328
case GE_DSTBLEND_DOUBLESRCALPHA:
329
// We can't technically do this correctly (due to clamping) without reading the dst color.
330
// Using a copy isn't accurate either, though, when there's overlap.
331
if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))
332
return REPLACE_BLEND_READ_FRAMEBUFFER;
333
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
334
335
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
336
// For the inverse, doubling alpha is safe, because it will clamp correctly.
337
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
338
339
case GE_DSTBLEND_SRCALPHA:
340
case GE_DSTBLEND_INVSRCALPHA:
341
case GE_DSTBLEND_DSTALPHA:
342
case GE_DSTBLEND_INVDSTALPHA:
343
case GE_DSTBLEND_FIXB:
344
default:
345
// TODO: Could use vertexFullAlpha, but it's not calculated yet.
346
// This outputs the original alpha for the dest factor.
347
return REPLACE_BLEND_PRE_SRC;
348
}
349
350
case GE_SRCBLEND_DOUBLEDSTALPHA:
351
switch (funcB) {
352
case GE_DSTBLEND_SRCCOLOR:
353
case GE_DSTBLEND_INVSRCCOLOR:
354
if (bufferFormat == GE_FORMAT_565) {
355
// Dest alpha should be zero.
356
return REPLACE_BLEND_STANDARD;
357
}
358
// Can't double, we need the source color to be correct.
359
return REPLACE_BLEND_READ_FRAMEBUFFER;
360
361
case GE_DSTBLEND_DOUBLEDSTALPHA:
362
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
363
if (bufferFormat == GE_FORMAT_565) {
364
// Both blend factors are 0 or 1, no need to read it, since it's known.
365
// Doubling will have no effect here.
366
return REPLACE_BLEND_STANDARD;
367
}
368
return REPLACE_BLEND_READ_FRAMEBUFFER;
369
370
case GE_DSTBLEND_DOUBLESRCALPHA:
371
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
372
if (bufferFormat == GE_FORMAT_565) {
373
return REPLACE_BLEND_2X_ALPHA;
374
}
375
// Double both src (for dst alpha) and alpha (for dst factor.)
376
// But to be accurate (clamping), we need to read the dst color.
377
return REPLACE_BLEND_READ_FRAMEBUFFER;
378
379
case GE_DSTBLEND_SRCALPHA:
380
case GE_DSTBLEND_INVSRCALPHA:
381
case GE_DSTBLEND_DSTALPHA:
382
case GE_DSTBLEND_INVDSTALPHA:
383
case GE_DSTBLEND_FIXB:
384
default:
385
if (bufferFormat == GE_FORMAT_565) {
386
return REPLACE_BLEND_STANDARD;
387
}
388
// We can't technically do this correctly (due to clamping) without reading the dst alpha.
389
return REPLACE_BLEND_READ_FRAMEBUFFER;
390
}
391
392
case GE_SRCBLEND_DOUBLEINVDSTALPHA:
393
// Inverse double dst alpha is tricky. Doubling the src color is probably the wrong direction,
394
// halving might be more correct. We really need to read the dst color.
395
switch (funcB) {
396
case GE_DSTBLEND_SRCCOLOR:
397
case GE_DSTBLEND_INVSRCCOLOR:
398
case GE_DSTBLEND_DOUBLEDSTALPHA:
399
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
400
if (bufferFormat == GE_FORMAT_565) {
401
return REPLACE_BLEND_STANDARD;
402
}
403
return REPLACE_BLEND_READ_FRAMEBUFFER;
404
405
case GE_DSTBLEND_DOUBLESRCALPHA:
406
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
407
if (bufferFormat == GE_FORMAT_565) {
408
return REPLACE_BLEND_2X_ALPHA;
409
}
410
return REPLACE_BLEND_READ_FRAMEBUFFER;
411
412
case GE_DSTBLEND_SRCALPHA:
413
case GE_DSTBLEND_INVSRCALPHA:
414
case GE_DSTBLEND_DSTALPHA:
415
case GE_DSTBLEND_INVDSTALPHA:
416
case GE_DSTBLEND_FIXB:
417
default:
418
if (bufferFormat == GE_FORMAT_565) {
419
return REPLACE_BLEND_STANDARD;
420
}
421
return REPLACE_BLEND_READ_FRAMEBUFFER;
422
}
423
424
case GE_SRCBLEND_FIXA:
425
default:
426
switch (funcB) {
427
case GE_DSTBLEND_DOUBLESRCALPHA:
428
// Can't safely double alpha, will clamp.
429
return REPLACE_BLEND_READ_FRAMEBUFFER;
430
431
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
432
// Doubling alpha is safe for the inverse, will clamp to zero either way.
433
return REPLACE_BLEND_2X_ALPHA;
434
435
case GE_DSTBLEND_DOUBLEDSTALPHA:
436
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
437
if (bufferFormat == GE_FORMAT_565) {
438
return REPLACE_BLEND_STANDARD;
439
}
440
return REPLACE_BLEND_READ_FRAMEBUFFER;
441
442
case GE_DSTBLEND_FIXB:
443
default:
444
if (gstate.getFixA() == 0xFFFFFF && gstate.getFixB() == 0x000000) {
445
// Some games specify this. Some cards may prefer blending off entirely.
446
return REPLACE_BLEND_NO;
447
} else if (gstate.getFixA() == 0xFFFFFF || gstate.getFixA() == 0x000000 || gstate.getFixB() == 0xFFFFFF || gstate.getFixB() == 0x000000) {
448
return REPLACE_BLEND_STANDARD;
449
} else {
450
// Multiply the src color in the shader, that way it's always accurate.
451
return REPLACE_BLEND_PRE_SRC;
452
}
453
454
case GE_DSTBLEND_SRCCOLOR:
455
case GE_DSTBLEND_INVSRCCOLOR:
456
case GE_DSTBLEND_SRCALPHA:
457
case GE_DSTBLEND_INVSRCALPHA:
458
case GE_DSTBLEND_DSTALPHA:
459
case GE_DSTBLEND_INVDSTALPHA:
460
return REPLACE_BLEND_STANDARD;
461
}
462
463
case GE_SRCBLEND_DSTCOLOR:
464
case GE_SRCBLEND_INVDSTCOLOR:
465
case GE_SRCBLEND_SRCALPHA:
466
case GE_SRCBLEND_INVSRCALPHA:
467
case GE_SRCBLEND_DSTALPHA:
468
case GE_SRCBLEND_INVDSTALPHA:
469
switch (funcB) {
470
case GE_DSTBLEND_DOUBLESRCALPHA:
471
if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
472
// Can't safely double alpha, will clamp. However, a copy may easily be worse due to overlap.
473
if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))
474
return REPLACE_BLEND_READ_FRAMEBUFFER;
475
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
476
} else {
477
// This means dst alpha/color is used in the src factor.
478
// Unfortunately, copying here causes overlap problems in Silent Hill games (it seems?)
479
// We will just hope that doubling alpha for the dst factor will not clamp too badly.
480
if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))
481
return REPLACE_BLEND_READ_FRAMEBUFFER;
482
return REPLACE_BLEND_2X_ALPHA;
483
}
484
485
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
486
// For inverse, things are simpler. Clamping isn't an issue, as long as we avoid
487
// messing with the other factor's components.
488
if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
489
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
490
}
491
return REPLACE_BLEND_2X_ALPHA;
492
493
case GE_DSTBLEND_DOUBLEDSTALPHA:
494
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
495
if (bufferFormat == GE_FORMAT_565) {
496
return REPLACE_BLEND_STANDARD;
497
}
498
return REPLACE_BLEND_READ_FRAMEBUFFER;
499
500
default:
501
return REPLACE_BLEND_STANDARD;
502
}
503
}
504
505
// Should never get here.
506
return REPLACE_BLEND_STANDARD;
507
}
508
509
static const float DEPTH_SLICE_FACTOR_HIGH = 4.0f;
510
static const float DEPTH_SLICE_FACTOR_16BIT = 256.0f;
511
512
// The supported flag combinations. TODO: Maybe they should be distilled down into an enum.
513
//
514
// 0 - "Old"-style GL depth.
515
// Or "Non-accurate depth" : effectively ignore minz / maxz. Map Z values based on viewport, which clamps.
516
// This skews depth in many instances. Depth can be inverted in this mode if viewport says.
517
// This is completely wrong, but works in some cases (probably because some game devs assumed it was how it worked)
518
// and avoids some depth clamp issues.
519
//
520
// GPU_USE_ACCURATE_DEPTH:
521
// Accurate depth: Z in the framebuffer matches the range of Z used on the PSP linearly in some way. We choose
522
// a centered range, to simulate clamping by letting otherwise out-of-range pixels survive the 0 and 1 cutoffs.
523
// Clip depth based on minz/maxz, and viewport is just a means to scale and center the value, not clipping or mapping to stored values.
524
//
525
// GPU_USE_ACCURATE_DEPTH | GPU_USE_DEPTH_CLAMP:
526
// Variant of GPU_USE_ACCURATE_DEPTH, just the range is the nice and convenient 0-1 since we can use
527
// hardware depth clamp. only viable in accurate depth mode, clamps depth and therefore uses the full 0-1 range. Using the full 0-1 range is not what accurate means, it's implied by depth clamp (which also means we're clamping.)
528
//
529
// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT:
530
// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT | GPU_USE_DEPTH_CLAMP:
531
// Only viable in accurate depth mode, means to use a range of the 24-bit depth values available
532
// from the GPU to represent the 16-bit values the PSP had, to try to make everything round and
533
// z-fight (close to) the same way as on hardware, cheaply (cheaper than rounding depth in fragment shader).
534
// We automatically switch to this if Z tests for equality are used.
535
// Depth clamp has no effect on the depth scaling here if set, though will still be enabled
536
// and clamp wildly out of line values.
537
//
538
// Any other combinations of these particular flags are bogus (like for example a lonely GPU_USE_DEPTH_CLAMP).
539
540
float DepthSliceFactor(u32 useFlags) {
541
if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
542
// Old style depth.
543
return 1.0f;
544
}
545
if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {
546
// Accurate depth but 16-bit resolution, so squish.
547
return DEPTH_SLICE_FACTOR_16BIT;
548
}
549
if (useFlags & GPU_USE_DEPTH_CLAMP) {
550
// Accurate depth, but we can use the full range since clamping is available.
551
return 1.0f;
552
}
553
554
// Standard accurate depth.
555
return DEPTH_SLICE_FACTOR_HIGH;
556
}
557
558
// See class DepthScaleFactors for how to apply.
559
DepthScaleFactors GetDepthScaleFactors(u32 useFlags) {
560
if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
561
return DepthScaleFactors(0.0f, 65535.0f);
562
}
563
564
if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {
565
const double offset = 0.5 * (DEPTH_SLICE_FACTOR_16BIT - 1.0) / DEPTH_SLICE_FACTOR_16BIT;
566
// Use one bit for each value, rather than 1.0 / (65535.0 * 256.0).
567
const double scale = 16777215.0;
568
return DepthScaleFactors(offset, scale);
569
} else if (useFlags & GPU_USE_DEPTH_CLAMP) {
570
return DepthScaleFactors(0.0f, 65535.0f);
571
} else {
572
const double offset = 0.5f * (DEPTH_SLICE_FACTOR_HIGH - 1.0f) * (1.0f / DEPTH_SLICE_FACTOR_HIGH);
573
return DepthScaleFactors(offset, (float)(DEPTH_SLICE_FACTOR_HIGH * 65535.0));
574
}
575
}
576
577
void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
578
out.throughMode = gstate.isModeThrough();
579
580
float renderWidthFactor, renderHeightFactor;
581
float renderX = 0.0f, renderY = 0.0f;
582
float displayOffsetX, displayOffsetY;
583
if (useBufferedRendering) {
584
displayOffsetX = 0.0f;
585
displayOffsetY = 0.0f;
586
renderWidthFactor = (float)renderWidth / (float)bufferWidth;
587
renderHeightFactor = (float)renderHeight / (float)bufferHeight;
588
} else {
589
float pixelW = PSP_CoreParameter().pixelWidth;
590
float pixelH = PSP_CoreParameter().pixelHeight;
591
FRect frame = GetScreenFrame(pixelW, pixelH);
592
FRect rc;
593
CalculateDisplayOutputRect(&rc, 480, 272, frame, ROTATION_LOCKED_HORIZONTAL);
594
displayOffsetX = rc.x;
595
displayOffsetY = rc.y;
596
renderWidth = rc.w;
597
renderHeight = rc.h;
598
renderWidthFactor = renderWidth / 480.0f;
599
renderHeightFactor = renderHeight / 272.0f;
600
}
601
602
// We take care negative offsets of in the projection matrix.
603
// These come from split framebuffers (Killzone).
604
// TODO: Might be safe to do get rid of this here and do the same for positive offsets?
605
renderX = std::max(gstate_c.curRTOffsetX, 0);
606
renderY = std::max(gstate_c.curRTOffsetY, 0);
607
608
// Scissor
609
int scissorX1 = gstate.getScissorX1();
610
int scissorY1 = gstate.getScissorY1();
611
int scissorX2 = gstate.getScissorX2() + 1;
612
int scissorY2 = gstate.getScissorY2() + 1;
613
614
if (scissorX2 < scissorX1 || scissorY2 < scissorY1) {
615
out.scissorX = 0;
616
out.scissorY = 0;
617
out.scissorW = 0;
618
out.scissorH = 0;
619
} else {
620
out.scissorX = (renderX * renderWidthFactor) + displayOffsetX + scissorX1 * renderWidthFactor;
621
out.scissorY = (renderY * renderHeightFactor) + displayOffsetY + scissorY1 * renderHeightFactor;
622
out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor;
623
out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor;
624
}
625
626
int curRTWidth = gstate_c.curRTWidth;
627
int curRTHeight = gstate_c.curRTHeight;
628
629
float offsetX = gstate.getOffsetX();
630
float offsetY = gstate.getOffsetY();
631
632
DepthScaleFactors depthScale = GetDepthScaleFactors(gstate_c.UseFlags());
633
634
if (out.throughMode) {
635
// If renderX/renderY are offset to compensate for a split framebuffer,
636
// applying the offset to the viewport isn't enough, since the viewport clips.
637
// We need to apply either directly to the vertices, or to the "through" projection matrix.
638
out.viewportX = renderX * renderWidthFactor + displayOffsetX;
639
out.viewportY = renderY * renderHeightFactor + displayOffsetY;
640
out.viewportW = curRTWidth * renderWidthFactor;
641
out.viewportH = curRTHeight * renderHeightFactor;
642
out.depthRangeMin = depthScale.EncodeFromU16(0.0f);
643
out.depthRangeMax = depthScale.EncodeFromU16(65536.0f);
644
} else {
645
// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
646
float vpXScale = gstate.getViewportXScale();
647
float vpXCenter = gstate.getViewportXCenter();
648
float vpYScale = gstate.getViewportYScale();
649
float vpYCenter = gstate.getViewportYCenter();
650
651
// The viewport transform appears to go like this:
652
// Xscreen = -offsetX + vpXCenter + vpXScale * Xview
653
// Yscreen = -offsetY + vpYCenter + vpYScale * Yview
654
// Zscreen = vpZCenter + vpZScale * Zview
655
656
// The viewport is normally centered at 2048,2048 but can also be centered at other locations.
657
// Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover
658
// the desired screen area ([0-480)x[0-272)), so 1808,1912.
659
660
// This means that to get the analogue glViewport we must:
661
float vpX0 = vpXCenter - offsetX - fabsf(vpXScale);
662
float vpY0 = vpYCenter - offsetY - fabsf(vpYScale);
663
gstate_c.vpWidth = vpXScale * 2.0f;
664
gstate_c.vpHeight = vpYScale * 2.0f;
665
666
float vpWidth = fabsf(gstate_c.vpWidth);
667
float vpHeight = fabsf(gstate_c.vpHeight);
668
669
float left = renderX + vpX0;
670
float top = renderY + vpY0;
671
float right = left + vpWidth;
672
float bottom = top + vpHeight;
673
674
out.widthScale = 1.0f;
675
out.xOffset = 0.0f;
676
out.heightScale = 1.0f;
677
out.yOffset = 0.0f;
678
679
// If we're within the bounds, we want clipping the viewport way. So leave it be.
680
{
681
float overageLeft = std::max(-left, 0.0f);
682
float overageRight = std::max(right - bufferWidth, 0.0f);
683
684
// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
685
if (right < scissorX2) {
686
overageRight -= scissorX2 - right;
687
}
688
if (left > scissorX1) {
689
overageLeft += scissorX1 - left;
690
}
691
692
// Our center drifted by the difference in overages.
693
float drift = overageRight - overageLeft;
694
695
if (overageLeft != 0.0f || overageRight != 0.0f) {
696
left += overageLeft;
697
right -= overageRight;
698
699
// Protect against the viewport being entirely outside the scissor.
700
// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.
701
if (right <= left) {
702
right = left + 1.0f;
703
}
704
705
out.widthScale = vpWidth / (right - left);
706
out.xOffset = drift / (right - left);
707
}
708
}
709
710
{
711
float overageTop = std::max(-top, 0.0f);
712
float overageBottom = std::max(bottom - bufferHeight, 0.0f);
713
714
// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
715
if (bottom < scissorY2) {
716
overageBottom -= scissorY2 - bottom;
717
}
718
if (top > scissorY1) {
719
overageTop += scissorY1 - top;
720
}
721
// Our center drifted by the difference in overages.
722
float drift = overageBottom - overageTop;
723
724
if (overageTop != 0.0f || overageBottom != 0.0f) {
725
top += overageTop;
726
bottom -= overageBottom;
727
728
// Protect against the viewport being entirely outside the scissor.
729
// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.
730
if (bottom <= top) {
731
bottom = top + 1.0f;
732
}
733
734
out.heightScale = vpHeight / (bottom - top);
735
out.yOffset = drift / (bottom - top);
736
}
737
}
738
739
out.viewportX = left * renderWidthFactor + displayOffsetX;
740
out.viewportY = top * renderHeightFactor + displayOffsetY;
741
out.viewportW = (right - left) * renderWidthFactor;
742
out.viewportH = (bottom - top) * renderHeightFactor;
743
744
// The depth viewport parameters are the same, but we handle it a bit differently.
745
// When clipping is enabled, depth is clamped to [0, 65535]. And minz/maxz discard.
746
// So, we apply the depth range as minz/maxz, and transform for the viewport.
747
float vpZScale = gstate.getViewportZScale();
748
float vpZCenter = gstate.getViewportZCenter();
749
// TODO: This clip the entire draw if minz > maxz.
750
float minz = gstate.getDepthRangeMin();
751
float maxz = gstate.getDepthRangeMax();
752
753
if (gstate.isDepthClampEnabled() && (minz == 0 || maxz == 65535)) {
754
// Here, we should "clamp." But clamping per fragment would be slow.
755
// So, instead, we just increase the available range and hope.
756
// If depthSliceFactor is 4, it means (75% / 2) of the depth lies in each direction.
757
float fullDepthRange = 65535.0f * (depthScale.Scale() - 1.0f) * (1.0f / 2.0f);
758
if (minz == 0) {
759
minz -= fullDepthRange;
760
}
761
if (maxz == 65535) {
762
maxz += fullDepthRange;
763
}
764
} else if (maxz == 65535) {
765
// This means clamp isn't enabled, but we still want to allow values up to 65535.99.
766
// If DepthSliceFactor() is 1.0, though, this would make out.depthRangeMax exceed 1.
767
// Since that would clamp, it would make Z=1234 not match between draws when maxz changes.
768
if (depthScale.Scale() > 1.0f)
769
maxz = 65535.99f;
770
}
771
772
// Okay. So, in our shader, -1 will map to minz, and +1 will map to maxz.
773
float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
774
out.depthScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;
775
// This adjusts the center from halfActualZRange to vpZCenter.
776
out.zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
777
778
if (!gstate_c.Use(GPU_USE_ACCURATE_DEPTH)) {
779
out.depthScale = 1.0f;
780
out.zOffset = 0.0f;
781
out.depthRangeMin = depthScale.EncodeFromU16(vpZCenter - vpZScale);
782
out.depthRangeMax = depthScale.EncodeFromU16(vpZCenter + vpZScale);
783
} else {
784
out.depthRangeMin = depthScale.EncodeFromU16(minz);
785
out.depthRangeMax = depthScale.EncodeFromU16(maxz);
786
}
787
788
// OpenGL will clamp these for us anyway, and Direct3D will error if not clamped.
789
// Of course, if this happens we've skewed out.depthScale/out.zOffset and may get z-fighting.
790
out.depthRangeMin = std::max(out.depthRangeMin, 0.0f);
791
out.depthRangeMax = std::min(out.depthRangeMax, 1.0f);
792
}
793
}
794
795
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor) {
796
if (vpAndScissor.throughMode)
797
return;
798
799
bool scaleChanged = gstate_c.vpWidthScale != vpAndScissor.widthScale || gstate_c.vpHeightScale != vpAndScissor.heightScale;
800
bool offsetChanged = gstate_c.vpXOffset != vpAndScissor.xOffset || gstate_c.vpYOffset != vpAndScissor.yOffset;
801
bool depthChanged = gstate_c.vpDepthScale != vpAndScissor.depthScale || gstate_c.vpZOffset != vpAndScissor.zOffset;
802
if (scaleChanged || offsetChanged || depthChanged) {
803
gstate_c.vpWidthScale = vpAndScissor.widthScale;
804
gstate_c.vpHeightScale = vpAndScissor.heightScale;
805
gstate_c.vpDepthScale = vpAndScissor.depthScale;
806
gstate_c.vpXOffset = vpAndScissor.xOffset;
807
gstate_c.vpYOffset = vpAndScissor.yOffset;
808
gstate_c.vpZOffset = vpAndScissor.zOffset;
809
810
gstate_c.Dirty(DIRTY_PROJMATRIX);
811
if (depthChanged) {
812
gstate_c.Dirty(DIRTY_DEPTHRANGE);
813
}
814
}
815
}
816
817
static const BlendFactor genericALookup[11] = {
818
BlendFactor::DST_COLOR,
819
BlendFactor::ONE_MINUS_DST_COLOR,
820
BlendFactor::SRC_ALPHA,
821
BlendFactor::ONE_MINUS_SRC_ALPHA,
822
BlendFactor::DST_ALPHA,
823
BlendFactor::ONE_MINUS_DST_ALPHA,
824
BlendFactor::SRC_ALPHA, // GE_SRCBLEND_DOUBLESRCALPHA
825
BlendFactor::ONE_MINUS_SRC_ALPHA, // GE_SRCBLEND_DOUBLEINVSRCALPHA
826
BlendFactor::DST_ALPHA, // GE_SRCBLEND_DOUBLEDSTALPHA
827
BlendFactor::ONE_MINUS_DST_ALPHA, // GE_SRCBLEND_DOUBLEINVDSTALPHA
828
BlendFactor::CONSTANT_COLOR, // FIXA
829
};
830
831
static const BlendFactor genericBLookup[11] = {
832
BlendFactor::SRC_COLOR,
833
BlendFactor::ONE_MINUS_SRC_COLOR,
834
BlendFactor::SRC_ALPHA,
835
BlendFactor::ONE_MINUS_SRC_ALPHA,
836
BlendFactor::DST_ALPHA,
837
BlendFactor::ONE_MINUS_DST_ALPHA,
838
BlendFactor::SRC_ALPHA, // GE_SRCBLEND_DOUBLESRCALPHA
839
BlendFactor::ONE_MINUS_SRC_ALPHA, // GE_SRCBLEND_DOUBLEINVSRCALPHA
840
BlendFactor::DST_ALPHA, // GE_SRCBLEND_DOUBLEDSTALPHA
841
BlendFactor::ONE_MINUS_DST_ALPHA, // GE_SRCBLEND_DOUBLEINVDSTALPHA
842
BlendFactor::CONSTANT_COLOR, // FIXB
843
};
844
845
static const BlendEq eqLookupNoMinMax[] = {
846
BlendEq::ADD,
847
BlendEq::SUBTRACT,
848
BlendEq::REVERSE_SUBTRACT,
849
BlendEq::ADD, // GE_BLENDMODE_MIN
850
BlendEq::ADD, // GE_BLENDMODE_MAX
851
BlendEq::ADD, // GE_BLENDMODE_ABSDIFF
852
};
853
854
static const BlendEq eqLookup[] = {
855
BlendEq::ADD,
856
BlendEq::SUBTRACT,
857
BlendEq::REVERSE_SUBTRACT,
858
BlendEq::MIN, // GE_BLENDMODE_MIN
859
BlendEq::MAX, // GE_BLENDMODE_MAX
860
BlendEq::MAX, // GE_BLENDMODE_ABSDIFF
861
};
862
863
static BlendFactor toDualSource(BlendFactor blendfunc) {
864
switch (blendfunc) {
865
case BlendFactor::SRC_ALPHA:
866
return BlendFactor::SRC1_ALPHA;
867
case BlendFactor::ONE_MINUS_SRC_ALPHA:
868
return BlendFactor::ONE_MINUS_SRC1_ALPHA;
869
default:
870
return blendfunc;
871
}
872
}
873
874
static BlendFactor blendColor2Func(u32 fix, bool &approx) {
875
if (fix == 0xFFFFFF)
876
return BlendFactor::ONE;
877
if (fix == 0)
878
return BlendFactor::ZERO;
879
880
// Otherwise, it's approximate if we pick ONE/ZERO.
881
approx = true;
882
883
const Vec3f fix3 = Vec3f::FromRGB(fix);
884
if (fix3.x >= 0.99 && fix3.y >= 0.99 && fix3.z >= 0.99)
885
return BlendFactor::ONE;
886
else if (fix3.x <= 0.01 && fix3.y <= 0.01 && fix3.z <= 0.01)
887
return BlendFactor::ZERO;
888
return BlendFactor::INVALID;
889
}
890
891
// abs is a quagmire of compiler incompatibilities, so...
892
inline int iabs(int x) {
893
return x >= 0 ? x : -x;
894
}
895
896
static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) { // 25 ~= 0.1 * 255
897
int diffx = iabs((a & 0xff) - (b & 0xff));
898
int diffy = iabs(((a >> 8) & 0xff) - ((b >> 8) & 0xff));
899
int diffz = iabs(((a >> 16) & 0xff) - ((b >> 16) & 0xff));
900
if (diffx <= margin && diffy <= margin && diffz <= margin)
901
return true;
902
return false;
903
}
904
905
// Try to simulate some common logic ops by using blend, if needed.
906
// The shader might also need modification, the below function SimulateLogicOpShaderTypeIfNeeded
907
// takes care of that.
908
static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
909
if (!gstate.isLogicOpEnabled())
910
return false;
911
912
// Note: our shader solution applies logic ops BEFORE blending, not correctly after.
913
// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.
914
if (!gstate_c.Use(GPU_USE_LOGIC_OP)) {
915
switch (gstate.getLogicOp()) {
916
case GE_LOGIC_CLEAR:
917
srcBlend = BlendFactor::ZERO;
918
dstBlend = BlendFactor::ZERO;
919
blendEq = BlendEq::ADD;
920
return true;
921
case GE_LOGIC_AND:
922
case GE_LOGIC_AND_REVERSE:
923
WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, Log::G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
924
break;
925
case GE_LOGIC_COPY:
926
// This is the same as off.
927
break;
928
case GE_LOGIC_COPY_INVERTED:
929
// Handled in the shader.
930
break;
931
case GE_LOGIC_AND_INVERTED:
932
case GE_LOGIC_NOR:
933
case GE_LOGIC_NAND:
934
case GE_LOGIC_EQUIV:
935
// Handled in the shader.
936
WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, Log::G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
937
break;
938
case GE_LOGIC_INVERTED:
939
srcBlend = BlendFactor::ONE;
940
dstBlend = BlendFactor::ONE;
941
blendEq = BlendEq::SUBTRACT;
942
WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, Log::G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
943
return true;
944
case GE_LOGIC_NOOP:
945
srcBlend = BlendFactor::ZERO;
946
dstBlend = BlendFactor::ONE;
947
blendEq = BlendEq::ADD;
948
return true;
949
case GE_LOGIC_XOR:
950
WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, Log::G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
951
break;
952
case GE_LOGIC_OR:
953
case GE_LOGIC_OR_INVERTED:
954
// Inverted in shader.
955
srcBlend = BlendFactor::ONE;
956
dstBlend = BlendFactor::ONE;
957
blendEq = BlendEq::ADD;
958
WARN_LOG_REPORT_ONCE(d3dLogicOpOr, Log::G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
959
return true;
960
case GE_LOGIC_OR_REVERSE:
961
WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, Log::G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
962
break;
963
case GE_LOGIC_SET:
964
srcBlend = BlendFactor::ONE;
965
dstBlend = BlendFactor::ONE;
966
blendEq = BlendEq::ADD;
967
WARN_LOG_REPORT_ONCE(d3dLogicOpSet, Log::G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
968
return true;
969
}
970
} else {
971
// Even if we support hardware logic ops, alpha is handled wrong.
972
// It's better to override blending for the simple cases.
973
switch (gstate.getLogicOp()) {
974
case GE_LOGIC_CLEAR:
975
srcBlend = BlendFactor::ZERO;
976
dstBlend = BlendFactor::ZERO;
977
blendEq = BlendEq::ADD;
978
return true;
979
case GE_LOGIC_NOOP:
980
srcBlend = BlendFactor::ZERO;
981
dstBlend = BlendFactor::ONE;
982
blendEq = BlendEq::ADD;
983
return true;
984
985
default:
986
// Let's hope hardware gets it right.
987
return false;
988
}
989
}
990
return false;
991
}
992
993
// Choose the shader part of the above logic op fallback simulation.
994
SimulateLogicOpType SimulateLogicOpShaderTypeIfNeeded() {
995
if (!gstate_c.Use(GPU_USE_LOGIC_OP) && gstate.isLogicOpEnabled()) {
996
switch (gstate.getLogicOp()) {
997
case GE_LOGIC_COPY_INVERTED:
998
case GE_LOGIC_AND_INVERTED:
999
case GE_LOGIC_OR_INVERTED:
1000
case GE_LOGIC_NOR:
1001
case GE_LOGIC_NAND:
1002
case GE_LOGIC_EQUIV:
1003
return LOGICOPTYPE_INVERT;
1004
case GE_LOGIC_INVERTED:
1005
return LOGICOPTYPE_ONE;
1006
case GE_LOGIC_SET:
1007
return LOGICOPTYPE_ONE;
1008
default:
1009
return LOGICOPTYPE_NORMAL;
1010
}
1011
}
1012
return LOGICOPTYPE_NORMAL;
1013
}
1014
1015
void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState) {
1016
StencilValueType stencilType = STENCIL_VALUE_KEEP;
1017
if (replaceAlphaWithStencil == REPLACE_ALPHA_YES) {
1018
stencilType = ReplaceAlphaWithStencilType();
1019
}
1020
1021
// Normally, we would add src + 0 with blending off, but the logic op may have us do differently.
1022
BlendFactor srcBlend = BlendFactor::ONE;
1023
BlendFactor dstBlend = BlendFactor::ZERO;
1024
BlendEq blendEq = BlendEq::ADD;
1025
1026
// We're not blending, but we may still want to "blend" for stencil.
1027
// This is only useful for INCR/DECR/INVERT. Others can write directly.
1028
switch (stencilType) {
1029
case STENCIL_VALUE_INCR_4:
1030
case STENCIL_VALUE_INCR_8:
1031
// We'll add the incremented value output by the shader.
1032
blendState.blendEnabled = true;
1033
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
1034
blendState.setEquation(blendEq, BlendEq::ADD);
1035
break;
1036
1037
case STENCIL_VALUE_DECR_4:
1038
case STENCIL_VALUE_DECR_8:
1039
// We'll subtract the incremented value output by the shader.
1040
blendState.blendEnabled = true;
1041
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
1042
blendState.setEquation(blendEq, BlendEq::SUBTRACT);
1043
break;
1044
1045
case STENCIL_VALUE_INVERT:
1046
// The shader will output one, and reverse subtracting will essentially invert.
1047
blendState.blendEnabled = true;
1048
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
1049
blendState.setEquation(blendEq, BlendEq::REVERSE_SUBTRACT);
1050
break;
1051
1052
default:
1053
if (srcBlend == BlendFactor::ONE && dstBlend == BlendFactor::ZERO && blendEq == BlendEq::ADD) {
1054
blendState.blendEnabled = false;
1055
} else {
1056
blendState.blendEnabled = true;
1057
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ZERO);
1058
blendState.setEquation(blendEq, BlendEq::ADD);
1059
}
1060
break;
1061
}
1062
}
1063
1064
// If we can we emulate the colorMask by simply toggling the full R G B A masks offered
1065
// by modern hardware, we do that. This is 99.9% of the time.
1066
// When that's not enough, we fall back on a technique similar to shader blending,
1067
// we read from the framebuffer (or a copy of it).
1068
// We also prepare uniformMask so that if doing this in the shader gets forced-on,
1069
// we have the right mask already.
1070
static void ConvertMaskState(GenericMaskState &maskState, bool shaderBitOpsSupported) {
1071
if (gstate_c.blueToAlpha) {
1072
maskState.applyFramebufferRead = false;
1073
maskState.uniformMask = 0xFF000000;
1074
maskState.channelMask = 0x8;
1075
return;
1076
}
1077
1078
// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
1079
uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));
1080
1081
maskState.uniformMask = colorMask;
1082
maskState.applyFramebufferRead = false;
1083
maskState.channelMask = 0;
1084
for (int i = 0; i < 4; i++) {
1085
uint32_t channelMask = (colorMask >> (i * 8)) & 0xFF;
1086
switch (channelMask) {
1087
case 0x0:
1088
break;
1089
case 0xFF:
1090
maskState.channelMask |= 1 << i;
1091
break;
1092
default:
1093
if (shaderBitOpsSupported && PSP_CoreParameter().compat.flags().ShaderColorBitmask) {
1094
// Shaders can emulate masking accurately. Let's make use of that.
1095
maskState.applyFramebufferRead = true;
1096
maskState.channelMask |= 1 << i;
1097
} else {
1098
// Use the old inaccurate heuristic.
1099
if (channelMask >= 128) {
1100
maskState.channelMask |= 1 << i;
1101
}
1102
}
1103
}
1104
}
1105
1106
// Let's not write to alpha if stencil isn't enabled.
1107
// Also if the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
1108
if (IsStencilTestOutputDisabled() || ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
1109
maskState.channelMask &= ~8;
1110
maskState.uniformMask &= ~0xFF000000;
1111
}
1112
1113
// For 5551, only the top alpha bit matters. We might even want to swizzle 4444.
1114
// Alpha should correctly read as 255 from a 5551 texture.
1115
if (gstate.FrameBufFormat() == GE_FORMAT_5551) {
1116
if ((maskState.uniformMask & 0x80000000) != 0)
1117
maskState.uniformMask |= 0xFF000000;
1118
else
1119
maskState.uniformMask &= ~0xFF000000;
1120
}
1121
}
1122
1123
// Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state.
1124
static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBlend) {
1125
// Blending is a bit complex to emulate. This is due to several reasons:
1126
//
1127
// * Doubled blend modes (src, dst, inversed) aren't supported in OpenGL.
1128
// If possible, we double the src color or src alpha in the shader to account for these.
1129
// These may clip incorrectly, so we avoid unfortunately.
1130
// * OpenGL only has one arbitrary fixed color. We premultiply the other in the shader.
1131
// * The written output alpha should actually be the stencil value. Alpha is not written.
1132
//
1133
// If we can't apply blending, we make a copy of the framebuffer and do it manually.
1134
1135
blendState.applyFramebufferRead = false;
1136
blendState.dirtyShaderBlendFixValues = false;
1137
blendState.useBlendColor = false;
1138
1139
ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.framebufFormat);
1140
if (forceReplaceBlend) {
1141
// Enforce blend replacement if enabled. If not, shouldn't do anything of course.
1142
replaceBlend = gstate.isAlphaBlendEnabled() ? REPLACE_BLEND_READ_FRAMEBUFFER : REPLACE_BLEND_NO;
1143
}
1144
1145
blendState.replaceBlend = replaceBlend;
1146
1147
blendState.simulateLogicOpType = SimulateLogicOpShaderTypeIfNeeded();
1148
1149
ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend);
1150
blendState.replaceAlphaWithStencil = replaceAlphaWithStencil;
1151
1152
bool usePreSrc = false;
1153
1154
bool blueToAlpha = false;
1155
1156
switch (replaceBlend) {
1157
case REPLACE_BLEND_NO:
1158
// We may still want to do something about stencil -> alpha.
1159
ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState);
1160
1161
if (forceReplaceBlend) {
1162
// If this is true, the logic and mask replacements will be applied, at least. In that case,
1163
// we should not apply any logic op simulation.
1164
blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;
1165
}
1166
return;
1167
1168
case REPLACE_BLEND_BLUE_TO_ALPHA:
1169
blueToAlpha = true;
1170
blendState.blendEnabled = gstate.isAlphaBlendEnabled();
1171
// We'll later convert the color blend to blend in the alpha channel.
1172
break;
1173
1174
case REPLACE_BLEND_READ_FRAMEBUFFER:
1175
blendState.blendEnabled = true;
1176
blendState.applyFramebufferRead = true;
1177
blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;
1178
break;
1179
1180
case REPLACE_BLEND_PRE_SRC:
1181
case REPLACE_BLEND_PRE_SRC_2X_ALPHA:
1182
blendState.blendEnabled = true;
1183
usePreSrc = true;
1184
break;
1185
1186
case REPLACE_BLEND_STANDARD:
1187
case REPLACE_BLEND_2X_ALPHA:
1188
case REPLACE_BLEND_2X_SRC:
1189
blendState.blendEnabled = true;
1190
break;
1191
}
1192
1193
const GEBlendMode blendFuncEq = gstate.getBlendEq();
1194
GEBlendSrcFactor blendFuncA = gstate.getBlendFuncA();
1195
GEBlendDstFactor blendFuncB = gstate.getBlendFuncB();
1196
const u32 fixA = gstate.getFixA();
1197
const u32 fixB = gstate.getFixB();
1198
1199
if (blendFuncA > GE_SRCBLEND_FIXA)
1200
blendFuncA = GE_SRCBLEND_FIXA;
1201
if (blendFuncB > GE_DSTBLEND_FIXB)
1202
blendFuncB = GE_DSTBLEND_FIXB;
1203
1204
int constantAlpha = 255;
1205
BlendFactor constantAlphaGL = BlendFactor::ONE;
1206
if (!IsStencilTestOutputDisabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) {
1207
switch (ReplaceAlphaWithStencilType()) {
1208
case STENCIL_VALUE_UNIFORM:
1209
constantAlpha = gstate.getStencilTestRef();
1210
break;
1211
1212
case STENCIL_VALUE_INCR_4:
1213
case STENCIL_VALUE_DECR_4:
1214
constantAlpha = 16;
1215
break;
1216
1217
case STENCIL_VALUE_INCR_8:
1218
case STENCIL_VALUE_DECR_8:
1219
constantAlpha = 1;
1220
break;
1221
1222
default:
1223
break;
1224
}
1225
1226
// Otherwise it will stay GL_ONE.
1227
if (constantAlpha <= 0) {
1228
constantAlphaGL = BlendFactor::ZERO;
1229
} else if (constantAlpha < 255) {
1230
constantAlphaGL = BlendFactor::CONSTANT_ALPHA;
1231
}
1232
}
1233
1234
// Shortcut by using GL_ONE where possible, no need to set blendcolor
1235
bool approxFuncA = false;
1236
BlendFactor glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(fixA, approxFuncA) : genericALookup[blendFuncA];
1237
bool approxFuncB = false;
1238
BlendFactor glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(fixB, approxFuncB) : genericBLookup[blendFuncB];
1239
1240
if (gstate_c.framebufFormat == GE_FORMAT_565) {
1241
if (blendFuncA == GE_SRCBLEND_DSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEDSTALPHA) {
1242
glBlendFuncA = BlendFactor::ZERO;
1243
}
1244
if (blendFuncA == GE_SRCBLEND_INVDSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEINVDSTALPHA) {
1245
glBlendFuncA = BlendFactor::ONE;
1246
}
1247
if (blendFuncB == GE_DSTBLEND_DSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEDSTALPHA) {
1248
glBlendFuncB = BlendFactor::ZERO;
1249
}
1250
if (blendFuncB == GE_DSTBLEND_INVDSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEINVDSTALPHA) {
1251
glBlendFuncB = BlendFactor::ONE;
1252
}
1253
}
1254
1255
if (usePreSrc) {
1256
glBlendFuncA = BlendFactor::ONE;
1257
// Need to pull in the fixed color. TODO: If it hasn't changed, no need to dirty.
1258
if (blendFuncA == GE_SRCBLEND_FIXA) {
1259
blendState.dirtyShaderBlendFixValues = true;
1260
}
1261
}
1262
1263
if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE) {
1264
glBlendFuncA = toDualSource(glBlendFuncA);
1265
glBlendFuncB = toDualSource(glBlendFuncB);
1266
}
1267
1268
if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) {
1269
if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB != BlendFactor::INVALID) {
1270
// Can use blendcolor trivially.
1271
blendState.setBlendColor(fixA, constantAlpha);
1272
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1273
} else if (glBlendFuncA != BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {
1274
// Can use blendcolor trivially.
1275
blendState.setBlendColor(fixB, constantAlpha);
1276
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1277
} else if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {
1278
if (blendColorSimilar(fixA, 0xFFFFFF ^ fixB)) {
1279
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1280
glBlendFuncB = BlendFactor::ONE_MINUS_CONSTANT_COLOR;
1281
blendState.setBlendColor(fixA, constantAlpha);
1282
} else if (blendColorSimilar(fixA, fixB)) {
1283
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1284
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1285
blendState.setBlendColor(fixA, constantAlpha);
1286
} else {
1287
DEBUG_LOG(Log::G3D, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", fixA, fixB, blendFuncA, blendFuncB);
1288
// Let's approximate, at least. Close is better than totally off.
1289
const bool nearZeroA = blendColorSimilar(fixA, 0, 64);
1290
const bool nearZeroB = blendColorSimilar(fixB, 0, 64);
1291
if (nearZeroA || blendColorSimilar(fixA, 0xFFFFFF, 64)) {
1292
glBlendFuncA = nearZeroA ? BlendFactor::ZERO : BlendFactor::ONE;
1293
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1294
blendState.setBlendColor(fixB, constantAlpha);
1295
} else {
1296
// We need to pick something. Let's go with A as the fixed color.
1297
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1298
glBlendFuncB = nearZeroB ? BlendFactor::ZERO : BlendFactor::ONE;
1299
blendState.setBlendColor(fixA, constantAlpha);
1300
}
1301
}
1302
} else {
1303
// We optimized both, but that's probably not necessary, so let's pick one to be constant.
1304
if (blendFuncA == GE_SRCBLEND_FIXA && !usePreSrc && approxFuncA) {
1305
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1306
blendState.setBlendColor(fixA, constantAlpha);
1307
} else if (approxFuncB) {
1308
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1309
blendState.setBlendColor(fixB, constantAlpha);
1310
} else {
1311
if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {
1312
blendState.defaultBlendColor(constantAlpha);
1313
}
1314
}
1315
}
1316
} else {
1317
if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {
1318
blendState.defaultBlendColor(constantAlpha);
1319
}
1320
}
1321
1322
// Some Android devices (especially old Mali, it seems) composite badly if there's alpha in the backbuffer.
1323
// So in non-buffered rendering, we will simply consider the dest alpha to be zero in blending equations.
1324
#ifdef __ANDROID__
1325
if (g_Config.bSkipBufferEffects) {
1326
if (glBlendFuncA == BlendFactor::DST_ALPHA) glBlendFuncA = BlendFactor::ZERO;
1327
if (glBlendFuncB == BlendFactor::DST_ALPHA) glBlendFuncB = BlendFactor::ZERO;
1328
if (glBlendFuncA == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncA = BlendFactor::ONE;
1329
if (glBlendFuncB == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncB = BlendFactor::ONE;
1330
}
1331
#endif
1332
1333
// At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow.
1334
BlendEq colorEq;
1335
if (gstate_c.Use(GPU_USE_BLEND_MINMAX)) {
1336
colorEq = eqLookup[blendFuncEq];
1337
} else {
1338
colorEq = eqLookupNoMinMax[blendFuncEq];
1339
}
1340
1341
// The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't
1342
// do any blending in the alpha channel as that doesn't seem to happen on PSP. So, we attempt to
1343
// apply the stencil to the alpha, since that's what should be stored.
1344
BlendEq alphaEq = BlendEq::ADD;
1345
if (replaceAlphaWithStencil != REPLACE_ALPHA_NO) {
1346
// Let the fragment shader take care of it.
1347
switch (ReplaceAlphaWithStencilType()) {
1348
case STENCIL_VALUE_INCR_4:
1349
case STENCIL_VALUE_INCR_8:
1350
// We'll add the increment value.
1351
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1352
break;
1353
1354
case STENCIL_VALUE_DECR_4:
1355
case STENCIL_VALUE_DECR_8:
1356
// Like add with a small value, but subtracting.
1357
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1358
alphaEq = BlendEq::SUBTRACT;
1359
break;
1360
1361
case STENCIL_VALUE_INVERT:
1362
// This will subtract by one, effectively inverting the bits.
1363
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1364
alphaEq = BlendEq::REVERSE_SUBTRACT;
1365
break;
1366
1367
default:
1368
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ZERO);
1369
break;
1370
}
1371
} else if (!IsStencilTestOutputDisabled()) {
1372
StencilValueType stencilValue = ReplaceAlphaWithStencilType();
1373
if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0x00) {
1374
stencilValue = STENCIL_VALUE_ZERO;
1375
} else if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0xFF) {
1376
stencilValue = STENCIL_VALUE_ONE;
1377
}
1378
switch (stencilValue) {
1379
case STENCIL_VALUE_KEEP:
1380
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);
1381
break;
1382
case STENCIL_VALUE_ONE:
1383
// This won't give one but it's our best shot...
1384
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1385
break;
1386
case STENCIL_VALUE_ZERO:
1387
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ZERO);
1388
break;
1389
case STENCIL_VALUE_UNIFORM:
1390
// This won't give a correct value (it multiplies) but it may be better than random values.
1391
blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ZERO);
1392
break;
1393
case STENCIL_VALUE_INCR_4:
1394
case STENCIL_VALUE_INCR_8:
1395
// This won't give a correct value always, but it will try to increase at least.
1396
blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);
1397
break;
1398
case STENCIL_VALUE_DECR_4:
1399
case STENCIL_VALUE_DECR_8:
1400
// This won't give a correct value always, but it will try to decrease at least.
1401
blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);
1402
alphaEq = BlendEq::SUBTRACT;
1403
break;
1404
case STENCIL_VALUE_INVERT:
1405
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1406
// If the output alpha is near 1, this will basically invert. It's our best shot.
1407
alphaEq = BlendEq::REVERSE_SUBTRACT;
1408
break;
1409
}
1410
} else if (blueToAlpha) {
1411
blendState.setFactors(BlendFactor::ZERO, BlendFactor::ZERO, BlendFactor::ONE, glBlendFuncB);
1412
blendState.setEquation(BlendEq::ADD, colorEq);
1413
return;
1414
} else {
1415
// Retain the existing value when stencil testing is off.
1416
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);
1417
}
1418
1419
blendState.setEquation(colorEq, alphaEq);
1420
}
1421
1422
static void ConvertLogicOpState(GenericLogicState &logicOpState, bool logicSupported, bool shaderBitOpsSupported, bool forceApplyFramebuffer) {
1423
// TODO: We can get more detailed with checks here. Some logic ops don't involve the destination at all.
1424
// Several can be trivially supported even without any bitwise logic.
1425
if (!gstate.isLogicOpEnabled() || gstate.getLogicOp() == GE_LOGIC_COPY) {
1426
// No matter what, don't need to do anything.
1427
logicOpState.logicOpEnabled = false;
1428
logicOpState.logicOp = GE_LOGIC_COPY;
1429
logicOpState.applyFramebufferRead = forceApplyFramebuffer;
1430
return;
1431
}
1432
1433
if (forceApplyFramebuffer && shaderBitOpsSupported) {
1434
// We have to emulate logic ops in the shader.
1435
logicOpState.logicOpEnabled = false; // Don't use any hardware logic op, supported or not.
1436
logicOpState.applyFramebufferRead = true;
1437
logicOpState.logicOp = gstate.getLogicOp();
1438
} else if (logicSupported) {
1439
// We can use hardware logic ops, if needed.
1440
logicOpState.applyFramebufferRead = false;
1441
if (gstate.isLogicOpEnabled()) {
1442
logicOpState.logicOpEnabled = true;
1443
logicOpState.logicOp = gstate.getLogicOp();
1444
} else {
1445
logicOpState.logicOpEnabled = false;
1446
logicOpState.logicOp = GE_LOGIC_COPY;
1447
}
1448
} else if (shaderBitOpsSupported) {
1449
// D3D11 and some OpenGL versions will end up here.
1450
// Logic ops not support, bitops supported. Let's punt to the shader.
1451
// We should possibly always do this and never use the hardware ops, since they'll mishandle the alpha channel..
1452
logicOpState.logicOpEnabled = false; // Don't use any hardware logic op, supported or not.
1453
logicOpState.applyFramebufferRead = true;
1454
logicOpState.logicOp = gstate.getLogicOp();
1455
} else {
1456
// In this case, the SIMULATE fallback should kick in.
1457
// Need to make sure this is checking for the same things though...
1458
logicOpState.logicOpEnabled = false;
1459
logicOpState.logicOp = GE_LOGIC_COPY;
1460
logicOpState.applyFramebufferRead = false;
1461
}
1462
}
1463
1464
static void ConvertStencilFunc5551(GenericStencilFuncState &state) {
1465
// Flaws:
1466
// - INVERT should convert 1, 5, 0xFF to 0. Currently it won't always.
1467
// - INCR twice shouldn't change the value.
1468
// - REPLACE should write 0 for 0x00 - 0x7F, and non-zero for 0x80 - 0xFF.
1469
// - Write mask may need double checking, but likely only the top bit matters.
1470
1471
const bool usesRef = state.sFail == GE_STENCILOP_REPLACE || state.zFail == GE_STENCILOP_REPLACE || state.zPass == GE_STENCILOP_REPLACE;
1472
const u8 maskedRef = state.testRef & state.testMask;
1473
const u8 usedRef = (state.testRef & 0x80) != 0 ? 0xFF : 0x00;
1474
1475
auto rewriteFunc = [&](GEComparison func, u8 ref) {
1476
// We can only safely rewrite if it doesn't use the ref, or if the ref is the same.
1477
if (!usesRef || usedRef == ref) {
1478
state.testFunc = func;
1479
state.testRef = ref;
1480
state.testMask = 0xFF;
1481
}
1482
};
1483
auto rewriteRef = [&](bool always) {
1484
state.testFunc = always ? GE_COMP_ALWAYS : GE_COMP_NEVER;
1485
if (usesRef) {
1486
// Rewrite the ref (for REPLACE) to 0x00 or 0xFF (the "best" values) if safe.
1487
// This will only be called if the test doesn't need the ref.
1488
state.testRef = usedRef;
1489
// Nuke the mask as well, since this is always/never, just for consistency.
1490
state.testMask = 0xFF;
1491
} else {
1492
// Not used, so let's make the ref 0xFF which is a useful value later.
1493
state.testRef = 0xFF;
1494
state.testMask = 0xFF;
1495
}
1496
};
1497
1498
// For 5551, we treat any non-zero value in the buffer as 255. Only zero is treated as zero.
1499
// See: https://github.com/hrydgard/ppsspp/pull/4150#issuecomment-26211193
1500
switch (state.testFunc) {
1501
case GE_COMP_NEVER:
1502
case GE_COMP_ALWAYS:
1503
// Fine as is.
1504
rewriteRef(state.testFunc == GE_COMP_ALWAYS);
1505
break;
1506
case GE_COMP_EQUAL: // maskedRef == maskedBuffer
1507
if (maskedRef == 0) {
1508
// Remove any mask, we might have bits less than 255 but that should not match.
1509
rewriteFunc(GE_COMP_EQUAL, 0);
1510
} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1511
// Equal to 255, for our buffer, means not equal to zero.
1512
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1513
} else {
1514
// This should never pass, regardless of buffer value. Only 0 and 255 are directly equal.
1515
rewriteRef(false);
1516
}
1517
break;
1518
case GE_COMP_NOTEQUAL: // maskedRef != maskedBuffer
1519
if (maskedRef == 0) {
1520
// Remove the mask, since our buffer might not be exactly 255.
1521
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1522
} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1523
// The only value != 255 is 0, in our buffer.
1524
rewriteFunc(GE_COMP_EQUAL, 0);
1525
} else {
1526
// Every other value evaluates as not equal, always.
1527
rewriteRef(true);
1528
}
1529
break;
1530
case GE_COMP_LESS: // maskedRef < maskedBuffer
1531
if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1532
// No possible value is less than 255.
1533
rewriteRef(false);
1534
} else {
1535
// "0 < (0 or 255)" and "254 < (0 or 255)" can only work for non zero.
1536
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1537
}
1538
break;
1539
case GE_COMP_LEQUAL: // maskedRef <= maskedBuffer
1540
if (maskedRef == 0) {
1541
// 0 is <= every possible value.
1542
rewriteRef(true);
1543
} else {
1544
// "1 <= (0 or 255)" and "255 <= (0 or 255)" simply mean, anything but zero.
1545
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1546
}
1547
break;
1548
case GE_COMP_GREATER: // maskedRef > maskedBuffer
1549
if (maskedRef > 0) {
1550
// "1 > (0 or 255)" and "255 > (0 or 255)" can only match 0.
1551
rewriteFunc(GE_COMP_EQUAL, 0);
1552
} else {
1553
// 0 is never greater than any possible value.
1554
rewriteRef(false);
1555
}
1556
break;
1557
case GE_COMP_GEQUAL: // maskedRef >= maskedBuffer
1558
if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1559
// 255 is >= every possible value.
1560
rewriteRef(true);
1561
} else {
1562
// "0 >= (0 or 255)" and "254 >= "(0 or 255)" are the same, equal to zero.
1563
rewriteFunc(GE_COMP_EQUAL, 0);
1564
}
1565
break;
1566
}
1567
1568
auto rewriteOps = [&](GEStencilOp from, GEStencilOp to) {
1569
if (state.sFail == from)
1570
state.sFail = to;
1571
if (state.zFail == from)
1572
state.zFail = to;
1573
if (state.zPass == from)
1574
state.zPass = to;
1575
};
1576
1577
// Decrement always zeros, so let's rewrite those to be safe (even if it's not 1.)
1578
rewriteOps(GE_STENCILOP_DECR, GE_STENCILOP_ZERO);
1579
1580
if (state.testFunc == GE_COMP_NOTEQUAL && state.testRef == 0 && state.testMask != 0) {
1581
// If it's != 0 (as optimized above), then we can rewrite INVERT to ZERO.
1582
// With 1 bit of stencil, INVERT != 0 can only make it 0.
1583
rewriteOps(GE_STENCILOP_INVERT, GE_STENCILOP_ZERO);
1584
}
1585
if (state.testFunc == GE_COMP_EQUAL && state.testRef == 0 && state.testMask != 0) {
1586
// If it's == 0 (as optimized above), then we can rewrite INCR to INVERT.
1587
// Otherwise we get 1, which we mostly handle, but won't INVERT correctly.
1588
rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_INVERT);
1589
}
1590
if (!usesRef && state.testRef == 0xFF) {
1591
// Safe to use REPLACE instead of INCR.
1592
rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_REPLACE);
1593
}
1594
}
1595
1596
static void ConvertStencilMask5551(GenericStencilFuncState &state) {
1597
state.writeMask = state.writeMask >= 0x80 ? 0xff : 0x00;
1598
}
1599
1600
void ConvertStencilFuncState(GenericStencilFuncState &state) {
1601
// The PSP's mask is reversed (bits not to write.) Ignore enabled, used for clears too.
1602
state.writeMask = (~gstate.getStencilWriteMask()) & 0xFF;
1603
state.enabled = gstate.isStencilTestEnabled();
1604
if (!state.enabled) {
1605
if (gstate_c.framebufFormat == GE_FORMAT_5551)
1606
ConvertStencilMask5551(state);
1607
return;
1608
}
1609
1610
state.sFail = gstate.getStencilOpSFail();
1611
state.zFail = gstate.getStencilOpZFail();
1612
state.zPass = gstate.getStencilOpZPass();
1613
1614
state.testFunc = gstate.getStencilTestFunction();
1615
state.testRef = gstate.getStencilTestRef();
1616
state.testMask = gstate.getStencilTestMask();
1617
1618
bool depthTest = gstate.isDepthTestEnabled();
1619
if ((state.sFail == state.zFail || !depthTest) && state.sFail == state.zPass) {
1620
// Common case: we're writing only to stencil (usually REPLACE/REPLACE/REPLACE.)
1621
// We want to write stencil to alpha in this case, so switch to ALWAYS if already masked.
1622
bool depthWrite = gstate.isDepthWriteEnabled();
1623
if ((gstate.getColorMask() & 0x00FFFFFF) == 0x00FFFFFF && (!depthTest || !depthWrite)) {
1624
state.testFunc = GE_COMP_ALWAYS;
1625
}
1626
}
1627
1628
switch (gstate_c.framebufFormat) {
1629
case GE_FORMAT_565:
1630
state.writeMask = 0;
1631
break;
1632
1633
case GE_FORMAT_5551:
1634
ConvertStencilMask5551(state);
1635
ConvertStencilFunc5551(state);
1636
break;
1637
1638
default:
1639
// Hard to do anything useful for 4444, and 8888 is fine.
1640
break;
1641
}
1642
}
1643
1644
void GenericMaskState::Log() {
1645
WARN_LOG(Log::G3D, "Mask: %08x %01X readfb=%d", uniformMask, channelMask, applyFramebufferRead);
1646
}
1647
1648
void GenericBlendState::Log() {
1649
WARN_LOG(Log::G3D, "Blend: hwenable=%d readfb=%d replblend=%d replalpha=%d",
1650
blendEnabled, applyFramebufferRead, replaceBlend, (int)replaceAlphaWithStencil);
1651
}
1652
1653
void ComputedPipelineState::Convert(bool shaderBitOpsSuppported) {
1654
// Passing on the previous applyFramebufferRead as forceFrameBuffer read in the next one,
1655
// thus propagating forward.
1656
ConvertMaskState(maskState, shaderBitOpsSuppported);
1657
ConvertLogicOpState(logicState, gstate_c.Use(GPU_USE_LOGIC_OP), shaderBitOpsSuppported, maskState.applyFramebufferRead);
1658
ConvertBlendState(blendState, logicState.applyFramebufferRead);
1659
1660
// Note: If the blend state decided it had to use framebuffer reads,
1661
// we need to make sure that both mask and logic also use it, otherwise things will go wrong.
1662
if (blendState.applyFramebufferRead || logicState.applyFramebufferRead) {
1663
maskState.ConvertToShaderBlend();
1664
logicState.ConvertToShaderBlend();
1665
} else {
1666
// If it isn't a read, we may need to change blending to apply the logic op.
1667
logicState.ApplyToBlendState(blendState);
1668
}
1669
}
1670
1671
void GenericLogicState::ApplyToBlendState(GenericBlendState &blendState) {
1672
if (SimulateLogicOpIfNeeded(blendState.srcColor, blendState.dstColor, blendState.eqColor)) {
1673
if (!blendState.blendEnabled) {
1674
// If it wasn't turned on, make sure it is now.
1675
blendState.blendEnabled = true;
1676
blendState.srcAlpha = BlendFactor::ONE;
1677
blendState.dstAlpha = BlendFactor::ZERO;
1678
blendState.eqAlpha = BlendEq::ADD;
1679
}
1680
logicOpEnabled = false;
1681
logicOp = GE_LOGIC_COPY;
1682
}
1683
}
1684
1685