Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/GPUStateUtils.cpp
5654 views
1
// Copyright (c) 2015- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#include <algorithm>
20
#include <limits>
21
22
#include "Core/ConfigValues.h"
23
#include "Core/System.h"
24
#include "Core/Config.h"
25
#include "Core/Reporting.h"
26
27
#include "GPU/ge_constants.h"
28
#include "GPU/GPUState.h"
29
#include "GPU/Math3D.h"
30
#include "GPU/Common/PresentationCommon.h"
31
32
#include "GPU/Common/GPUStateUtils.h"
33
34
bool IsStencilTestOutputDisabled() {
35
// The mask applies on all stencil ops.
36
if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF) {
37
if (gstate_c.framebufFormat == GE_FORMAT_565) {
38
return true;
39
}
40
return gstate.getStencilOpZPass() == GE_STENCILOP_KEEP && gstate.getStencilOpZFail() == GE_STENCILOP_KEEP && gstate.getStencilOpSFail() == GE_STENCILOP_KEEP;
41
}
42
return true;
43
}
44
45
bool NeedsTestDiscard() {
46
// We assume this is called only when enabled and not trivially true (may also be for color testing.)
47
if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF)
48
return true;
49
if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled())
50
return true;
51
if (!gstate.isAlphaBlendEnabled())
52
return true;
53
if (gstate.getBlendFuncA() != GE_SRCBLEND_SRCALPHA && gstate.getBlendFuncA() != GE_SRCBLEND_DOUBLESRCALPHA)
54
return true;
55
// GE_DSTBLEND_DOUBLEINVSRCALPHA is actually inverse double src alpha, and doubling zero is still zero.
56
if (gstate.getBlendFuncB() != GE_DSTBLEND_INVSRCALPHA && gstate.getBlendFuncB() != GE_DSTBLEND_DOUBLEINVSRCALPHA) {
57
if (gstate.getBlendFuncB() != GE_DSTBLEND_FIXB || gstate.getFixB() != 0xFFFFFF)
58
return true;
59
}
60
if (gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_ADD && gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE)
61
return true;
62
if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY)
63
return true;
64
65
return false;
66
}
67
68
bool IsAlphaTestTriviallyTrue() {
69
switch (gstate.getAlphaTestFunction()) {
70
case GE_COMP_NEVER:
71
return false;
72
73
case GE_COMP_ALWAYS:
74
return true;
75
76
case GE_COMP_GEQUAL:
77
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
78
return true; // If alpha is full, it doesn't matter what the ref value is.
79
return gstate.getAlphaTestRef() == 0;
80
81
// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.
82
// Speeds up Lumines by a LOT on PowerVR.
83
case GE_COMP_NOTEQUAL:
84
if (gstate.getAlphaTestRef() == 255) {
85
// Likely to be rare. Let's just skip the vertexFullAlpha optimization here instead of adding
86
// complicated code to discard the draw or whatnot.
87
return false;
88
}
89
// Fallthrough on purpose
90
[[fallthrough]];
91
case GE_COMP_GREATER:
92
{
93
// If the texture and vertex only use 1.0 alpha, then the ref value doesn't matter.
94
if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))
95
return true;
96
return gstate.getAlphaTestRef() == 0 && !NeedsTestDiscard();
97
}
98
99
case GE_COMP_LEQUAL:
100
return gstate.getAlphaTestRef() == 255;
101
102
case GE_COMP_EQUAL:
103
case GE_COMP_LESS:
104
return false;
105
106
default:
107
return false;
108
}
109
}
110
111
bool IsAlphaTestAgainstZero() {
112
return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF;
113
}
114
115
bool IsColorTestAgainstZero() {
116
return gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF;
117
}
118
119
bool IsColorTestTriviallyTrue() {
120
switch (gstate.getColorTestFunction()) {
121
case GE_COMP_NEVER:
122
return false;
123
124
case GE_COMP_ALWAYS:
125
return true;
126
127
case GE_COMP_EQUAL:
128
case GE_COMP_NOTEQUAL:
129
return false;
130
default:
131
return false;
132
}
133
}
134
135
bool IsDepthTestEffectivelyDisabled() {
136
if (!gstate.isDepthTestEnabled())
137
return true;
138
// We can ignore stencil, because ALWAYS and disabled choose the same stencil path.
139
if (gstate.getDepthTestFunction() != GE_COMP_ALWAYS)
140
return false;
141
return !gstate.isDepthWriteEnabled();
142
}
143
144
const bool nonAlphaSrcFactors[16] = {
145
true, // GE_SRCBLEND_DSTCOLOR,
146
true, // GE_SRCBLEND_INVDSTCOLOR,
147
false, // GE_SRCBLEND_SRCALPHA,
148
false, // GE_SRCBLEND_INVSRCALPHA,
149
true, // GE_SRCBLEND_DSTALPHA,
150
true, // GE_SRCBLEND_INVDSTALPHA,
151
false, // GE_SRCBLEND_DOUBLESRCALPHA,
152
false, // GE_SRCBLEND_DOUBLEINVSRCALPHA,
153
true, // GE_SRCBLEND_DOUBLEDSTALPHA,
154
true, // GE_SRCBLEND_DOUBLEINVDSTALPHA,
155
true, // GE_SRCBLEND_FIXA,
156
true,
157
true,
158
true,
159
true,
160
true,
161
};
162
163
const bool nonAlphaDestFactors[16] = {
164
true, // GE_DSTBLEND_SRCCOLOR,
165
true, // GE_DSTBLEND_INVSRCCOLOR,
166
false, // GE_DSTBLEND_SRCALPHA,
167
false, // GE_DSTBLEND_INVSRCALPHA,
168
true, // GE_DSTBLEND_DSTALPHA,
169
true, // GE_DSTBLEND_INVDSTALPHA,
170
false, // GE_DSTBLEND_DOUBLESRCALPHA,
171
false, // GE_DSTBLEND_DOUBLEINVSRCALPHA,
172
true, // GE_DSTBLEND_DOUBLEDSTALPHA,
173
true, // GE_DSTBLEND_DOUBLEINVDSTALPHA,
174
true, // GE_DSTBLEND_FIXB,
175
true,
176
true,
177
true,
178
true,
179
true,
180
};
181
182
ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) {
183
if (IsStencilTestOutputDisabled() || gstate.isModeClear()) {
184
return REPLACE_ALPHA_NO;
185
}
186
187
if (replaceBlend != REPLACE_BLEND_NO && replaceBlend != REPLACE_BLEND_READ_FRAMEBUFFER) {
188
if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {
189
return REPLACE_ALPHA_YES;
190
} else {
191
if (gstate_c.Use(GPU_USE_DUALSOURCE_BLEND)) {
192
return REPLACE_ALPHA_DUALSOURCE;
193
} else {
194
return REPLACE_ALPHA_NO;
195
}
196
}
197
}
198
199
if (replaceBlend == ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA) {
200
return REPLACE_ALPHA_NO; // irrelevant
201
}
202
203
return REPLACE_ALPHA_YES;
204
}
205
206
StencilValueType ReplaceAlphaWithStencilType() {
207
switch (gstate_c.framebufFormat) {
208
case GE_FORMAT_565:
209
// There's never a stencil value. Maybe the right alpha is 1?
210
return STENCIL_VALUE_ONE;
211
212
case GE_FORMAT_5551:
213
switch (gstate.getStencilOpZPass()) {
214
// Technically, this should only ever use zero/one.
215
case GE_STENCILOP_REPLACE:
216
return (gstate.getStencilTestRef() & 0x80) != 0 ? STENCIL_VALUE_ONE : STENCIL_VALUE_ZERO;
217
218
// Decrementing always zeros, since there's only one bit.
219
case GE_STENCILOP_DECR:
220
case GE_STENCILOP_ZERO:
221
return STENCIL_VALUE_ZERO;
222
223
// Incrementing always fills, since there's only one bit.
224
case GE_STENCILOP_INCR:
225
return STENCIL_VALUE_ONE;
226
227
case GE_STENCILOP_INVERT:
228
return STENCIL_VALUE_INVERT;
229
230
case GE_STENCILOP_KEEP:
231
return STENCIL_VALUE_KEEP;
232
}
233
break;
234
235
case GE_FORMAT_4444:
236
case GE_FORMAT_8888:
237
case GE_FORMAT_INVALID:
238
case GE_FORMAT_DEPTH16:
239
case GE_FORMAT_CLUT8:
240
switch (gstate.getStencilOpZPass()) {
241
case GE_STENCILOP_REPLACE:
242
// TODO: Could detect zero here and force ZERO - less uniform updates?
243
return STENCIL_VALUE_UNIFORM;
244
245
case GE_STENCILOP_ZERO:
246
return STENCIL_VALUE_ZERO;
247
248
case GE_STENCILOP_DECR:
249
return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8;
250
251
case GE_STENCILOP_INCR:
252
return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8;
253
254
case GE_STENCILOP_INVERT:
255
return STENCIL_VALUE_INVERT;
256
257
case GE_STENCILOP_KEEP:
258
return STENCIL_VALUE_KEEP;
259
}
260
break;
261
}
262
263
return STENCIL_VALUE_KEEP;
264
}
265
266
ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat) {
267
if (gstate_c.blueToAlpha) {
268
return REPLACE_BLEND_BLUE_TO_ALPHA;
269
}
270
271
if (!gstate.isAlphaBlendEnabled() || gstate.isModeClear()) {
272
return REPLACE_BLEND_NO;
273
}
274
275
GEBlendMode eq = gstate.getBlendEq();
276
// Let's get the non-factor modes out of the way first.
277
switch (eq) {
278
case GE_BLENDMODE_ABSDIFF:
279
return REPLACE_BLEND_READ_FRAMEBUFFER;
280
281
case GE_BLENDMODE_MIN:
282
case GE_BLENDMODE_MAX:
283
if (gstate_c.Use(GPU_USE_BLEND_MINMAX)) {
284
return REPLACE_BLEND_STANDARD;
285
} else {
286
return REPLACE_BLEND_READ_FRAMEBUFFER;
287
}
288
289
case GE_BLENDMODE_MUL_AND_ADD:
290
case GE_BLENDMODE_MUL_AND_SUBTRACT:
291
case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:
292
// Other blend equations simply don't blend on hardware.
293
break;
294
295
default:
296
return REPLACE_BLEND_NO;
297
}
298
299
GEBlendSrcFactor funcA = gstate.getBlendFuncA();
300
GEBlendDstFactor funcB = gstate.getBlendFuncB();
301
302
switch (funcA) {
303
case GE_SRCBLEND_DOUBLESRCALPHA:
304
case GE_SRCBLEND_DOUBLEINVSRCALPHA:
305
// 2x alpha in the source function and not in the dest = source color doubling.
306
// Even dest alpha is safe, since we're moving the * 2.0 into the src color.
307
switch (funcB) {
308
case GE_DSTBLEND_SRCCOLOR:
309
case GE_DSTBLEND_INVSRCCOLOR:
310
// When inversing, alpha clamping isn't an issue.
311
if (funcA == GE_SRCBLEND_DOUBLEINVSRCALPHA)
312
return REPLACE_BLEND_2X_ALPHA;
313
// Can't double, we need the source color to be correct.
314
// Doubling only alpha would clamp the src alpha incorrectly.
315
return REPLACE_BLEND_READ_FRAMEBUFFER;
316
317
case GE_DSTBLEND_DOUBLEDSTALPHA:
318
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
319
if (bufferFormat == GE_FORMAT_565)
320
return REPLACE_BLEND_2X_ALPHA;
321
return REPLACE_BLEND_READ_FRAMEBUFFER;
322
323
case GE_DSTBLEND_DOUBLESRCALPHA:
324
// We can't technically do this correctly (due to clamping) without reading the dst color.
325
// Using a copy isn't accurate either, though, when there's overlap.
326
if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))
327
return REPLACE_BLEND_READ_FRAMEBUFFER;
328
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
329
330
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
331
// For the inverse, doubling alpha is safe, because it will clamp correctly.
332
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
333
334
case GE_DSTBLEND_SRCALPHA:
335
case GE_DSTBLEND_INVSRCALPHA:
336
case GE_DSTBLEND_DSTALPHA:
337
case GE_DSTBLEND_INVDSTALPHA:
338
case GE_DSTBLEND_FIXB:
339
default:
340
// TODO: Could use vertexFullAlpha, but it's not calculated yet.
341
// This outputs the original alpha for the dest factor.
342
return REPLACE_BLEND_PRE_SRC;
343
}
344
345
case GE_SRCBLEND_DOUBLEDSTALPHA:
346
switch (funcB) {
347
case GE_DSTBLEND_SRCCOLOR:
348
case GE_DSTBLEND_INVSRCCOLOR:
349
if (bufferFormat == GE_FORMAT_565) {
350
// Dest alpha should be zero.
351
return REPLACE_BLEND_STANDARD;
352
}
353
// Can't double, we need the source color to be correct.
354
return REPLACE_BLEND_READ_FRAMEBUFFER;
355
356
case GE_DSTBLEND_DOUBLEDSTALPHA:
357
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
358
if (bufferFormat == GE_FORMAT_565) {
359
// Both blend factors are 0 or 1, no need to read it, since it's known.
360
// Doubling will have no effect here.
361
return REPLACE_BLEND_STANDARD;
362
}
363
return REPLACE_BLEND_READ_FRAMEBUFFER;
364
365
case GE_DSTBLEND_DOUBLESRCALPHA:
366
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
367
if (bufferFormat == GE_FORMAT_565) {
368
return REPLACE_BLEND_2X_ALPHA;
369
}
370
// Double both src (for dst alpha) and alpha (for dst factor.)
371
// But to be accurate (clamping), we need to read the dst color.
372
return REPLACE_BLEND_READ_FRAMEBUFFER;
373
374
case GE_DSTBLEND_SRCALPHA:
375
case GE_DSTBLEND_INVSRCALPHA:
376
case GE_DSTBLEND_DSTALPHA:
377
case GE_DSTBLEND_INVDSTALPHA:
378
case GE_DSTBLEND_FIXB:
379
default:
380
if (bufferFormat == GE_FORMAT_565) {
381
return REPLACE_BLEND_STANDARD;
382
}
383
// We can't technically do this correctly (due to clamping) without reading the dst alpha.
384
return REPLACE_BLEND_READ_FRAMEBUFFER;
385
}
386
387
case GE_SRCBLEND_DOUBLEINVDSTALPHA:
388
// Inverse double dst alpha is tricky. Doubling the src color is probably the wrong direction,
389
// halving might be more correct. We really need to read the dst color.
390
switch (funcB) {
391
case GE_DSTBLEND_SRCCOLOR:
392
case GE_DSTBLEND_INVSRCCOLOR:
393
case GE_DSTBLEND_DOUBLEDSTALPHA:
394
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
395
if (bufferFormat == GE_FORMAT_565) {
396
return REPLACE_BLEND_STANDARD;
397
}
398
return REPLACE_BLEND_READ_FRAMEBUFFER;
399
400
case GE_DSTBLEND_DOUBLESRCALPHA:
401
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
402
if (bufferFormat == GE_FORMAT_565) {
403
return REPLACE_BLEND_2X_ALPHA;
404
}
405
return REPLACE_BLEND_READ_FRAMEBUFFER;
406
407
case GE_DSTBLEND_SRCALPHA:
408
case GE_DSTBLEND_INVSRCALPHA:
409
case GE_DSTBLEND_DSTALPHA:
410
case GE_DSTBLEND_INVDSTALPHA:
411
case GE_DSTBLEND_FIXB:
412
default:
413
if (bufferFormat == GE_FORMAT_565) {
414
return REPLACE_BLEND_STANDARD;
415
}
416
return REPLACE_BLEND_READ_FRAMEBUFFER;
417
}
418
419
case GE_SRCBLEND_FIXA:
420
default:
421
switch (funcB) {
422
case GE_DSTBLEND_DOUBLESRCALPHA:
423
{
424
// L.A. Rush ends up here (detail textures at the end of the frame). It uses FIXA = 0 (no src color contribution)
425
// but I still can't find a way to replicate the formula.
426
// If our framebuffer was floating point we could make it work (since that turns off clamping before blending)
427
// by just doubling src_alpha in the shader.
428
//
429
// It might be possible to replicate it if we implement a 2-pass decomposition:
430
// * First pass just does:
431
// src=ZERO dst=SRC_ALPHA.
432
// * Second pass renders with white input color. To double the resulting destination color:
433
// src=DST_COLOR dst=ONE
434
return REPLACE_BLEND_READ_FRAMEBUFFER;
435
}
436
437
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
438
// Doubling alpha is safe for the inverse, will clamp to zero either way.
439
return REPLACE_BLEND_2X_ALPHA;
440
441
case GE_DSTBLEND_DOUBLEDSTALPHA:
442
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
443
if (bufferFormat == GE_FORMAT_565) {
444
// Alpha is irrelevant with this format.
445
return REPLACE_BLEND_STANDARD;
446
}
447
return REPLACE_BLEND_READ_FRAMEBUFFER;
448
449
case GE_DSTBLEND_FIXB:
450
default:
451
if (gstate.getFixA() == 0xFFFFFF && gstate.getFixB() == 0x000000) {
452
// Some games specify this. Some GPUs may prefer blending off entirely.
453
return REPLACE_BLEND_NO;
454
} else if (gstate.getFixA() == 0xFFFFFF || gstate.getFixA() == 0x000000 || gstate.getFixB() == 0xFFFFFF || gstate.getFixB() == 0x000000) {
455
// We can represent this with standard factors.
456
return REPLACE_BLEND_STANDARD;
457
} else {
458
// Multiply the src color in the shader, that way it's always accurate.
459
return REPLACE_BLEND_PRE_SRC;
460
}
461
462
case GE_DSTBLEND_SRCCOLOR:
463
case GE_DSTBLEND_INVSRCCOLOR:
464
case GE_DSTBLEND_SRCALPHA:
465
case GE_DSTBLEND_INVSRCALPHA:
466
case GE_DSTBLEND_DSTALPHA:
467
case GE_DSTBLEND_INVDSTALPHA:
468
return REPLACE_BLEND_STANDARD;
469
}
470
471
case GE_SRCBLEND_DSTCOLOR:
472
case GE_SRCBLEND_INVDSTCOLOR:
473
case GE_SRCBLEND_SRCALPHA:
474
case GE_SRCBLEND_INVSRCALPHA:
475
case GE_SRCBLEND_DSTALPHA:
476
case GE_SRCBLEND_INVDSTALPHA:
477
switch (funcB) {
478
case GE_DSTBLEND_DOUBLESRCALPHA:
479
if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
480
// Can't safely double alpha, will clamp. However, a copy may easily be worse due to overlap.
481
if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))
482
return REPLACE_BLEND_READ_FRAMEBUFFER;
483
// Hm, this is similar to the L.A. Rush case above. This will not be accurate.
484
// Wonder in which games we encounter this?
485
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
486
} else {
487
// This means dst alpha/color is used in the src factor.
488
// Unfortunately, copying here causes overlap problems in Silent Hill games (it seems?)
489
// We will just hope that doubling alpha for the dst factor will not clamp too badly.
490
if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))
491
return REPLACE_BLEND_READ_FRAMEBUFFER;
492
// Hm, this is similar to the L.A. Rush case above. This will not be accurate.
493
// Wonder in which games we encounter this? One example is MotorStorm.
494
return REPLACE_BLEND_2X_ALPHA;
495
}
496
497
case GE_DSTBLEND_DOUBLEINVSRCALPHA:
498
// For inverse, things are simpler. Clamping isn't an issue, as long as we avoid
499
// messing with the other factor's components.
500
if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {
501
return REPLACE_BLEND_PRE_SRC_2X_ALPHA;
502
}
503
return REPLACE_BLEND_2X_ALPHA;
504
505
case GE_DSTBLEND_DOUBLEDSTALPHA:
506
case GE_DSTBLEND_DOUBLEINVDSTALPHA:
507
if (bufferFormat == GE_FORMAT_565) {
508
return REPLACE_BLEND_STANDARD;
509
}
510
return REPLACE_BLEND_READ_FRAMEBUFFER;
511
512
default:
513
return REPLACE_BLEND_STANDARD;
514
}
515
}
516
517
// Should never get here.
518
return REPLACE_BLEND_STANDARD;
519
}
520
521
static const float DEPTH_SLICE_FACTOR_HIGH = 4.0f;
522
static const float DEPTH_SLICE_FACTOR_16BIT = 256.0f;
523
524
// The supported flag combinations. TODO: Maybe they should be distilled down into an enum.
525
//
526
// 0 - "Old"-style GL depth.
527
// Or "Non-accurate depth" : effectively ignore minz / maxz. Map Z values based on viewport, which clamps.
528
// This skews depth in many instances. Depth can be inverted in this mode if viewport says.
529
// This is completely wrong, but works in some cases (probably because some game devs assumed it was how it worked)
530
// and avoids some depth clamp issues.
531
//
532
// GPU_USE_ACCURATE_DEPTH:
533
// Accurate depth: Z in the framebuffer matches the range of Z used on the PSP linearly in some way. We choose
534
// a centered range, to simulate clamping by letting otherwise out-of-range pixels survive the 0 and 1 cutoffs.
535
// Clip depth based on minz/maxz, and viewport is just a means to scale and center the value, not clipping or mapping to stored values.
536
//
537
// GPU_USE_ACCURATE_DEPTH | GPU_USE_DEPTH_CLAMP:
538
// Variant of GPU_USE_ACCURATE_DEPTH, just the range is the nice and convenient 0-1 since we can use
539
// hardware depth clamp. only viable in accurate depth mode, clamps depth and therefore uses the full 0-1 range. Using the full 0-1 range is not what accurate means, it's implied by depth clamp (which also means we're clamping.)
540
//
541
// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT:
542
// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT | GPU_USE_DEPTH_CLAMP:
543
// Only viable in accurate depth mode, means to use a range of the 24-bit depth values available
544
// from the GPU to represent the 16-bit values the PSP had, to try to make everything round and
545
// z-fight (close to) the same way as on hardware, cheaply (cheaper than rounding depth in fragment shader).
546
// We automatically switch to this if Z tests for equality are used.
547
// Depth clamp has no effect on the depth scaling here if set, though will still be enabled
548
// and clamp wildly out of line values.
549
//
550
// Any other combinations of these particular flags are bogus (like for example a lonely GPU_USE_DEPTH_CLAMP).
551
552
float DepthSliceFactor(u32 useFlags) {
553
if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
554
// Old style depth.
555
return 1.0f;
556
}
557
if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {
558
// Accurate depth but 16-bit resolution, so squish.
559
return DEPTH_SLICE_FACTOR_16BIT;
560
}
561
if (useFlags & GPU_USE_DEPTH_CLAMP) {
562
// Accurate depth, but we can use the full range since clamping is available.
563
return 1.0f;
564
}
565
566
// Standard accurate depth.
567
return DEPTH_SLICE_FACTOR_HIGH;
568
}
569
570
// See class DepthScaleFactors for how to apply.
571
DepthScaleFactors GetDepthScaleFactors(u32 useFlags) {
572
if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {
573
return DepthScaleFactors(0.0f, 65535.0f);
574
}
575
576
if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {
577
const double offset = 0.5 * (DEPTH_SLICE_FACTOR_16BIT - 1.0) / DEPTH_SLICE_FACTOR_16BIT;
578
// Use one bit for each value, rather than 1.0 / (65535.0 * 256.0).
579
const double scale = 16777215.0;
580
return DepthScaleFactors(offset, scale);
581
} else if (useFlags & GPU_USE_DEPTH_CLAMP) {
582
return DepthScaleFactors(0.0f, 65535.0f);
583
} else {
584
const double offset = 0.5f * (DEPTH_SLICE_FACTOR_HIGH - 1.0f) * (1.0f / DEPTH_SLICE_FACTOR_HIGH);
585
return DepthScaleFactors(offset, (float)(DEPTH_SLICE_FACTOR_HIGH * 65535.0));
586
}
587
}
588
589
void ConvertViewportAndScissor(const DisplayLayoutConfig &config, bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {
590
out.throughMode = gstate.isModeThrough();
591
592
float renderWidthFactor, renderHeightFactor;
593
float renderX = 0.0f, renderY = 0.0f;
594
float displayOffsetX, displayOffsetY;
595
if (useBufferedRendering) {
596
displayOffsetX = 0.0f;
597
displayOffsetY = 0.0f;
598
renderWidthFactor = (float)renderWidth / (float)bufferWidth;
599
renderHeightFactor = (float)renderHeight / (float)bufferHeight;
600
} else {
601
float pixelW = PSP_CoreParameter().pixelWidth;
602
float pixelH = PSP_CoreParameter().pixelHeight;
603
FRect frame = GetScreenFrame(config.bIgnoreScreenInsets, pixelW, pixelH);
604
FRect rc;
605
CalculateDisplayOutputRect(config, &rc, 480, 272, frame, ROTATION_LOCKED_HORIZONTAL);
606
displayOffsetX = rc.x;
607
displayOffsetY = rc.y;
608
renderWidth = rc.w;
609
renderHeight = rc.h;
610
renderWidthFactor = renderWidth / 480.0f;
611
renderHeightFactor = renderHeight / 272.0f;
612
}
613
614
// We take care negative offsets of in the projection matrix.
615
// These come from split framebuffers (Killzone).
616
// TODO: Might be safe to do get rid of this here and do the same for positive offsets?
617
renderX = std::max(gstate_c.curRTOffsetX, 0);
618
renderY = std::max(gstate_c.curRTOffsetY, 0);
619
620
// Scissor
621
int scissorX1 = gstate.getScissorX1();
622
int scissorY1 = gstate.getScissorY1();
623
int scissorX2 = gstate.getScissorX2() + 1;
624
int scissorY2 = gstate.getScissorY2() + 1;
625
626
if (scissorX2 < scissorX1 || scissorY2 < scissorY1) {
627
out.scissorX = 0;
628
out.scissorY = 0;
629
out.scissorW = 0;
630
out.scissorH = 0;
631
} else {
632
out.scissorX = (renderX * renderWidthFactor) + displayOffsetX + scissorX1 * renderWidthFactor;
633
out.scissorY = (renderY * renderHeightFactor) + displayOffsetY + scissorY1 * renderHeightFactor;
634
out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor;
635
out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor;
636
}
637
638
int curRTWidth = gstate_c.curRTWidth;
639
int curRTHeight = gstate_c.curRTHeight;
640
641
float offsetX = gstate.getOffsetX();
642
float offsetY = gstate.getOffsetY();
643
644
DepthScaleFactors depthScale = GetDepthScaleFactors(gstate_c.UseFlags());
645
646
if (out.throughMode) {
647
// If renderX/renderY are offset to compensate for a split framebuffer,
648
// applying the offset to the viewport isn't enough, since the viewport clips.
649
// We need to apply either directly to the vertices, or to the "through" projection matrix.
650
out.viewportX = renderX * renderWidthFactor + displayOffsetX;
651
out.viewportY = renderY * renderHeightFactor + displayOffsetY;
652
out.viewportW = curRTWidth * renderWidthFactor;
653
out.viewportH = curRTHeight * renderHeightFactor;
654
out.depthRangeMin = depthScale.EncodeFromU16(0.0f);
655
out.depthRangeMax = depthScale.EncodeFromU16(65536.0f);
656
} else {
657
// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.
658
float vpXScale = gstate.getViewportXScale();
659
float vpXCenter = gstate.getViewportXCenter();
660
float vpYScale = gstate.getViewportYScale();
661
float vpYCenter = gstate.getViewportYCenter();
662
663
// The viewport transform appears to go like this:
664
// Xscreen = -offsetX + vpXCenter + vpXScale * Xview
665
// Yscreen = -offsetY + vpYCenter + vpYScale * Yview
666
// Zscreen = vpZCenter + vpZScale * Zview
667
668
// The viewport is normally centered at 2048,2048 but can also be centered at other locations.
669
// Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover
670
// the desired screen area ([0-480)x[0-272)), so 1808,1912.
671
672
// This means that to get the analogue glViewport we must:
673
float vpX0 = vpXCenter - offsetX - fabsf(vpXScale);
674
float vpY0 = vpYCenter - offsetY - fabsf(vpYScale);
675
gstate_c.vpWidth = vpXScale * 2.0f;
676
gstate_c.vpHeight = vpYScale * 2.0f;
677
678
float vpWidth = fabsf(gstate_c.vpWidth);
679
float vpHeight = fabsf(gstate_c.vpHeight);
680
681
float left = renderX + vpX0;
682
float top = renderY + vpY0;
683
float right = left + vpWidth;
684
float bottom = top + vpHeight;
685
686
out.widthScale = 1.0f;
687
out.xOffset = 0.0f;
688
out.heightScale = 1.0f;
689
out.yOffset = 0.0f;
690
691
// If we're within the bounds, we want clipping the viewport way. So leave it be.
692
{
693
float overageLeft = std::max(-left, 0.0f);
694
float overageRight = std::max(right - bufferWidth, 0.0f);
695
696
// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
697
if (right < scissorX2) {
698
overageRight -= scissorX2 - right;
699
}
700
if (left > scissorX1) {
701
overageLeft += scissorX1 - left;
702
}
703
704
// Our center drifted by the difference in overages.
705
float drift = overageRight - overageLeft;
706
707
if (overageLeft != 0.0f || overageRight != 0.0f) {
708
left += overageLeft;
709
right -= overageRight;
710
711
// Protect against the viewport being entirely outside the scissor.
712
// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.
713
if (right <= left) {
714
right = left + 1.0f;
715
}
716
717
out.widthScale = vpWidth / (right - left);
718
out.xOffset = drift / (right - left);
719
}
720
}
721
722
{
723
float overageTop = std::max(-top, 0.0f);
724
float overageBottom = std::max(bottom - bufferHeight, 0.0f);
725
726
// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.
727
if (bottom < scissorY2) {
728
overageBottom -= scissorY2 - bottom;
729
}
730
if (top > scissorY1) {
731
overageTop += scissorY1 - top;
732
}
733
// Our center drifted by the difference in overages.
734
float drift = overageBottom - overageTop;
735
736
if (overageTop != 0.0f || overageBottom != 0.0f) {
737
top += overageTop;
738
bottom -= overageBottom;
739
740
// Protect against the viewport being entirely outside the scissor.
741
// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.
742
if (bottom <= top) {
743
bottom = top + 1.0f;
744
}
745
746
out.heightScale = vpHeight / (bottom - top);
747
out.yOffset = drift / (bottom - top);
748
}
749
}
750
751
out.viewportX = left * renderWidthFactor + displayOffsetX;
752
out.viewportY = top * renderHeightFactor + displayOffsetY;
753
out.viewportW = (right - left) * renderWidthFactor;
754
out.viewportH = (bottom - top) * renderHeightFactor;
755
756
// The depth viewport parameters are the same, but we handle it a bit differently.
757
// When clipping is enabled, depth is clamped to [0, 65535]. And minz/maxz discard.
758
// So, we apply the depth range as minz/maxz, and transform for the viewport.
759
float vpZScale = gstate.getViewportZScale();
760
float vpZCenter = gstate.getViewportZCenter();
761
// TODO: This clip the entire draw if minz > maxz.
762
float minz = gstate.getDepthRangeMin();
763
float maxz = gstate.getDepthRangeMax();
764
765
if (gstate.isDepthClampEnabled() && (minz == 0 || maxz == 65535)) {
766
// Here, we should "clamp." But clamping per fragment would be slow.
767
// So, instead, we just increase the available range and hope.
768
// If depthSliceFactor is 4, it means (75% / 2) of the depth lies in each direction.
769
float fullDepthRange = 65535.0f * (depthScale.Scale() - 1.0f) * (1.0f / 2.0f);
770
if (minz == 0) {
771
minz -= fullDepthRange;
772
}
773
if (maxz == 65535) {
774
maxz += fullDepthRange;
775
}
776
} else if (maxz == 65535) {
777
// This means clamp isn't enabled, but we still want to allow values up to 65535.99.
778
// If DepthSliceFactor() is 1.0, though, this would make out.depthRangeMax exceed 1.
779
// Since that would clamp, it would make Z=1234 not match between draws when maxz changes.
780
if (depthScale.Scale() > 1.0f)
781
maxz = 65535.99f;
782
}
783
784
// Okay. So, in our shader, -1 will map to minz, and +1 will map to maxz.
785
float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);
786
out.depthScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;
787
// This adjusts the center from halfActualZRange to vpZCenter.
788
out.zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;
789
790
if (!gstate_c.Use(GPU_USE_ACCURATE_DEPTH)) {
791
out.depthScale = 1.0f;
792
out.zOffset = 0.0f;
793
out.depthRangeMin = depthScale.EncodeFromU16(vpZCenter - vpZScale);
794
out.depthRangeMax = depthScale.EncodeFromU16(vpZCenter + vpZScale);
795
} else {
796
out.depthRangeMin = depthScale.EncodeFromU16(minz);
797
out.depthRangeMax = depthScale.EncodeFromU16(maxz);
798
}
799
800
// OpenGL will clamp these for us anyway, and Direct3D will error if not clamped.
801
// Of course, if this happens we've skewed out.depthScale/out.zOffset and may get z-fighting.
802
out.depthRangeMin = std::max(out.depthRangeMin, 0.0f);
803
out.depthRangeMax = std::min(out.depthRangeMax, 1.0f);
804
}
805
}
806
807
void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor) {
808
if (vpAndScissor.throughMode)
809
return;
810
811
bool scaleChanged = gstate_c.vpWidthScale != vpAndScissor.widthScale || gstate_c.vpHeightScale != vpAndScissor.heightScale;
812
bool offsetChanged = gstate_c.vpXOffset != vpAndScissor.xOffset || gstate_c.vpYOffset != vpAndScissor.yOffset;
813
bool depthChanged = gstate_c.vpDepthScale != vpAndScissor.depthScale || gstate_c.vpZOffset != vpAndScissor.zOffset;
814
if (scaleChanged || offsetChanged || depthChanged) {
815
gstate_c.vpWidthScale = vpAndScissor.widthScale;
816
gstate_c.vpHeightScale = vpAndScissor.heightScale;
817
gstate_c.vpDepthScale = vpAndScissor.depthScale;
818
gstate_c.vpXOffset = vpAndScissor.xOffset;
819
gstate_c.vpYOffset = vpAndScissor.yOffset;
820
gstate_c.vpZOffset = vpAndScissor.zOffset;
821
822
gstate_c.Dirty(DIRTY_PROJMATRIX);
823
if (depthChanged) {
824
gstate_c.Dirty(DIRTY_DEPTHRANGE);
825
}
826
}
827
}
828
829
static const BlendFactor genericALookup[11] = {
830
BlendFactor::DST_COLOR,
831
BlendFactor::ONE_MINUS_DST_COLOR,
832
BlendFactor::SRC_ALPHA,
833
BlendFactor::ONE_MINUS_SRC_ALPHA,
834
BlendFactor::DST_ALPHA,
835
BlendFactor::ONE_MINUS_DST_ALPHA,
836
BlendFactor::SRC_ALPHA, // GE_SRCBLEND_DOUBLESRCALPHA
837
BlendFactor::ONE_MINUS_SRC_ALPHA, // GE_SRCBLEND_DOUBLEINVSRCALPHA
838
BlendFactor::DST_ALPHA, // GE_SRCBLEND_DOUBLEDSTALPHA
839
BlendFactor::ONE_MINUS_DST_ALPHA, // GE_SRCBLEND_DOUBLEINVDSTALPHA
840
BlendFactor::CONSTANT_COLOR, // FIXA
841
};
842
843
static const BlendFactor genericBLookup[11] = {
844
BlendFactor::SRC_COLOR,
845
BlendFactor::ONE_MINUS_SRC_COLOR,
846
BlendFactor::SRC_ALPHA,
847
BlendFactor::ONE_MINUS_SRC_ALPHA,
848
BlendFactor::DST_ALPHA,
849
BlendFactor::ONE_MINUS_DST_ALPHA,
850
BlendFactor::SRC_ALPHA, // GE_SRCBLEND_DOUBLESRCALPHA
851
BlendFactor::ONE_MINUS_SRC_ALPHA, // GE_SRCBLEND_DOUBLEINVSRCALPHA
852
BlendFactor::DST_ALPHA, // GE_SRCBLEND_DOUBLEDSTALPHA
853
BlendFactor::ONE_MINUS_DST_ALPHA, // GE_SRCBLEND_DOUBLEINVDSTALPHA
854
BlendFactor::CONSTANT_COLOR, // FIXB
855
};
856
857
static const BlendEq eqLookupNoMinMax[] = {
858
BlendEq::ADD,
859
BlendEq::SUBTRACT,
860
BlendEq::REVERSE_SUBTRACT,
861
BlendEq::ADD, // GE_BLENDMODE_MIN
862
BlendEq::ADD, // GE_BLENDMODE_MAX
863
BlendEq::ADD, // GE_BLENDMODE_ABSDIFF
864
BlendEq::ADD,
865
BlendEq::ADD,
866
};
867
868
static const BlendEq eqLookup[] = {
869
BlendEq::ADD,
870
BlendEq::SUBTRACT,
871
BlendEq::REVERSE_SUBTRACT,
872
BlendEq::MIN, // GE_BLENDMODE_MIN
873
BlendEq::MAX, // GE_BLENDMODE_MAX
874
BlendEq::MAX, // GE_BLENDMODE_ABSDIFF
875
BlendEq::ADD,
876
BlendEq::ADD,
877
};
878
879
static BlendFactor toDualSource(BlendFactor blendfunc) {
880
switch (blendfunc) {
881
case BlendFactor::SRC_ALPHA:
882
return BlendFactor::SRC1_ALPHA;
883
case BlendFactor::ONE_MINUS_SRC_ALPHA:
884
return BlendFactor::ONE_MINUS_SRC1_ALPHA;
885
default:
886
return blendfunc;
887
}
888
}
889
890
static BlendFactor blendColor2Func(u32 fix, bool &approx) {
891
if (fix == 0xFFFFFF)
892
return BlendFactor::ONE;
893
if (fix == 0)
894
return BlendFactor::ZERO;
895
896
// Otherwise, it's approximate if we pick ONE/ZERO.
897
approx = true;
898
899
const Vec3f fix3 = Vec3f::FromRGB(fix);
900
if (fix3.x >= 0.99 && fix3.y >= 0.99 && fix3.z >= 0.99)
901
return BlendFactor::ONE;
902
else if (fix3.x <= 0.01 && fix3.y <= 0.01 && fix3.z <= 0.01)
903
return BlendFactor::ZERO;
904
return BlendFactor::INVALID;
905
}
906
907
// abs is a quagmire of compiler incompatibilities, so...
908
inline int iabs(int x) {
909
return x >= 0 ? x : -x;
910
}
911
912
static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) { // 25 ~= 0.1 * 255
913
int diffx = iabs((a & 0xff) - (b & 0xff));
914
int diffy = iabs(((a >> 8) & 0xff) - ((b >> 8) & 0xff));
915
int diffz = iabs(((a >> 16) & 0xff) - ((b >> 16) & 0xff));
916
if (diffx <= margin && diffy <= margin && diffz <= margin)
917
return true;
918
return false;
919
}
920
921
// Try to simulate some common logic ops by using blend, if needed.
922
// The shader might also need modification, the below function SimulateLogicOpShaderTypeIfNeeded
923
// takes care of that.
924
static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {
925
if (!gstate.isLogicOpEnabled())
926
return false;
927
928
// Note: our shader solution applies logic ops BEFORE blending, not correctly after.
929
// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.
930
if (!gstate_c.Use(GPU_USE_LOGIC_OP)) {
931
switch (gstate.getLogicOp()) {
932
case GE_LOGIC_CLEAR:
933
srcBlend = BlendFactor::ZERO;
934
dstBlend = BlendFactor::ZERO;
935
blendEq = BlendEq::ADD;
936
return true;
937
case GE_LOGIC_AND:
938
case GE_LOGIC_AND_REVERSE:
939
WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, Log::G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());
940
break;
941
case GE_LOGIC_COPY:
942
// This is the same as off.
943
break;
944
case GE_LOGIC_COPY_INVERTED:
945
// Handled in the shader.
946
break;
947
case GE_LOGIC_AND_INVERTED:
948
case GE_LOGIC_NOR:
949
case GE_LOGIC_NAND:
950
case GE_LOGIC_EQUIV:
951
// Handled in the shader.
952
WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, Log::G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());
953
break;
954
case GE_LOGIC_INVERTED:
955
srcBlend = BlendFactor::ONE;
956
dstBlend = BlendFactor::ONE;
957
blendEq = BlendEq::SUBTRACT;
958
WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, Log::G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());
959
return true;
960
case GE_LOGIC_NOOP:
961
srcBlend = BlendFactor::ZERO;
962
dstBlend = BlendFactor::ONE;
963
blendEq = BlendEq::ADD;
964
return true;
965
case GE_LOGIC_XOR:
966
WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, Log::G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());
967
break;
968
case GE_LOGIC_OR:
969
case GE_LOGIC_OR_INVERTED:
970
// Inverted in shader.
971
srcBlend = BlendFactor::ONE;
972
dstBlend = BlendFactor::ONE;
973
blendEq = BlendEq::ADD;
974
WARN_LOG_REPORT_ONCE(d3dLogicOpOr, Log::G3D, "Attempted or for logic op: %x", gstate.getLogicOp());
975
return true;
976
case GE_LOGIC_OR_REVERSE:
977
WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, Log::G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());
978
break;
979
case GE_LOGIC_SET:
980
srcBlend = BlendFactor::ONE;
981
dstBlend = BlendFactor::ONE;
982
blendEq = BlendEq::ADD;
983
WARN_LOG_REPORT_ONCE(d3dLogicOpSet, Log::G3D, "Attempted set for logic op: %x", gstate.getLogicOp());
984
return true;
985
}
986
} else {
987
// Even if we support hardware logic ops, alpha is handled wrong.
988
// It's better to override blending for the simple cases.
989
switch (gstate.getLogicOp()) {
990
case GE_LOGIC_CLEAR:
991
srcBlend = BlendFactor::ZERO;
992
dstBlend = BlendFactor::ZERO;
993
blendEq = BlendEq::ADD;
994
return true;
995
case GE_LOGIC_NOOP:
996
srcBlend = BlendFactor::ZERO;
997
dstBlend = BlendFactor::ONE;
998
blendEq = BlendEq::ADD;
999
return true;
1000
1001
default:
1002
// Let's hope hardware gets it right.
1003
return false;
1004
}
1005
}
1006
return false;
1007
}
1008
1009
// Choose the shader part of the above logic op fallback simulation.
1010
SimulateLogicOpType SimulateLogicOpShaderTypeIfNeeded() {
1011
if (!gstate_c.Use(GPU_USE_LOGIC_OP) && gstate.isLogicOpEnabled()) {
1012
switch (gstate.getLogicOp()) {
1013
case GE_LOGIC_COPY_INVERTED:
1014
case GE_LOGIC_AND_INVERTED:
1015
case GE_LOGIC_OR_INVERTED:
1016
case GE_LOGIC_NOR:
1017
case GE_LOGIC_NAND:
1018
case GE_LOGIC_EQUIV:
1019
return LOGICOPTYPE_INVERT;
1020
case GE_LOGIC_INVERTED:
1021
return LOGICOPTYPE_ONE;
1022
case GE_LOGIC_SET:
1023
return LOGICOPTYPE_ONE;
1024
default:
1025
return LOGICOPTYPE_NORMAL;
1026
}
1027
}
1028
return LOGICOPTYPE_NORMAL;
1029
}
1030
1031
void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState) {
1032
StencilValueType stencilType = STENCIL_VALUE_KEEP;
1033
if (replaceAlphaWithStencil == REPLACE_ALPHA_YES) {
1034
stencilType = ReplaceAlphaWithStencilType();
1035
}
1036
1037
// Normally, we would add src + 0 with blending off, but the logic op may have us do differently.
1038
BlendFactor srcBlend = BlendFactor::ONE;
1039
BlendFactor dstBlend = BlendFactor::ZERO;
1040
BlendEq blendEq = BlendEq::ADD;
1041
1042
// We're not blending, but we may still want to "blend" for stencil.
1043
// This is only useful for INCR/DECR/INVERT. Others can write directly.
1044
switch (stencilType) {
1045
case STENCIL_VALUE_INCR_4:
1046
case STENCIL_VALUE_INCR_8:
1047
// We'll add the incremented value output by the shader.
1048
blendState.blendEnabled = true;
1049
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
1050
blendState.setEquation(blendEq, BlendEq::ADD);
1051
break;
1052
1053
case STENCIL_VALUE_DECR_4:
1054
case STENCIL_VALUE_DECR_8:
1055
// We'll subtract the incremented value output by the shader.
1056
blendState.blendEnabled = true;
1057
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
1058
blendState.setEquation(blendEq, BlendEq::SUBTRACT);
1059
break;
1060
1061
case STENCIL_VALUE_INVERT:
1062
// The shader will output one, and reverse subtracting will essentially invert.
1063
blendState.blendEnabled = true;
1064
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);
1065
blendState.setEquation(blendEq, BlendEq::REVERSE_SUBTRACT);
1066
break;
1067
1068
default:
1069
if (srcBlend == BlendFactor::ONE && dstBlend == BlendFactor::ZERO && blendEq == BlendEq::ADD) {
1070
blendState.blendEnabled = false;
1071
} else {
1072
blendState.blendEnabled = true;
1073
blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ZERO);
1074
blendState.setEquation(blendEq, BlendEq::ADD);
1075
}
1076
break;
1077
}
1078
}
1079
1080
enum class FBReadSetting {
1081
Forced,
1082
Allowed,
1083
Disallowed,
1084
};
1085
1086
// If we can we emulate the colorMask by simply toggling the full R G B A masks offered
1087
// by modern hardware, we do that. This is 99.9% of the time.
1088
// When that's not enough, we fall back on a technique similar to shader blending,
1089
// we read from the framebuffer (or a copy of it).
1090
// We also prepare uniformMask so that if doing this in the shader gets forced-on,
1091
// we have the right mask already.
1092
static void ConvertMaskState(GenericMaskState &maskState, FBReadSetting useShader) {
1093
if (gstate_c.blueToAlpha) {
1094
maskState.applyFramebufferRead = false;
1095
maskState.uniformMask = 0xFF000000;
1096
maskState.channelMask = 0x8;
1097
return;
1098
}
1099
1100
// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.
1101
uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));
1102
1103
maskState.uniformMask = colorMask;
1104
maskState.applyFramebufferRead = false;
1105
maskState.channelMask = 0;
1106
for (int i = 0; i < 4; i++) {
1107
uint32_t channelMask = (colorMask >> (i * 8)) & 0xFF;
1108
switch (channelMask) {
1109
case 0x0:
1110
break;
1111
case 0xFF:
1112
maskState.channelMask |= 1 << i;
1113
break;
1114
default:
1115
if (useShader != FBReadSetting::Disallowed && PSP_CoreParameter().compat.flags().ShaderColorBitmask) {
1116
// Shaders can emulate masking accurately. Let's make use of that.
1117
maskState.applyFramebufferRead = true;
1118
maskState.channelMask |= 1 << i;
1119
} else {
1120
// Use the old inaccurate heuristic.
1121
if (channelMask >= 128) {
1122
maskState.channelMask |= 1 << i;
1123
}
1124
}
1125
}
1126
}
1127
1128
// Let's not write to alpha if stencil isn't enabled.
1129
// Also if the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.
1130
if (IsStencilTestOutputDisabled() || ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {
1131
maskState.channelMask &= ~8;
1132
maskState.uniformMask &= ~0xFF000000;
1133
}
1134
1135
// For 5551, only the top alpha bit matters. We might even want to swizzle 4444.
1136
// Alpha should correctly read as 255 from a 5551 texture.
1137
if (gstate.FrameBufFormat() == GE_FORMAT_5551) {
1138
if ((maskState.uniformMask & 0x80000000) != 0)
1139
maskState.uniformMask |= 0xFF000000;
1140
else
1141
maskState.uniformMask &= ~0xFF000000;
1142
}
1143
}
1144
1145
// Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state.
1146
static void ConvertBlendState(GenericBlendState &blendState, FBReadSetting useFBRead) {
1147
// Blending is a bit complex to emulate. This is due to several reasons:
1148
//
1149
// * Doubled blend modes (src, dst, inversed) aren't supported in OpenGL.
1150
// If possible, we double the src color or src alpha in the shader to account for these.
1151
// These may clip incorrectly, so we avoid unfortunately.
1152
// * OpenGL only has one arbitrary fixed color. We premultiply the other in the shader.
1153
// * The written output alpha should actually be the stencil value. Alpha is not written.
1154
//
1155
// If we can't apply blending, we make a copy of the framebuffer and do it manually.
1156
1157
if (gstate_c.dstSquared && useFBRead != FBReadSetting::Forced) {
1158
blendState.blendEnabled = true;
1159
blendState.applyFramebufferRead = false;
1160
blendState.dirtyShaderBlendFixValues = false;
1161
blendState.useBlendColor = false;
1162
blendState.replaceBlend = REPLACE_BLEND_NO;
1163
blendState.simulateLogicOpType = SimulateLogicOpShaderTypeIfNeeded();
1164
blendState.replaceAlphaWithStencil = REPLACE_ALPHA_NO;
1165
blendState.setEquation(BlendEq::ADD, BlendEq::ADD);
1166
blendState.setFactors(BlendFactor::ZERO, BlendFactor::DST_COLOR, BlendFactor::ZERO, BlendFactor::ONE);
1167
return;
1168
}
1169
1170
blendState.applyFramebufferRead = false;
1171
blendState.dirtyShaderBlendFixValues = false;
1172
blendState.useBlendColor = false;
1173
1174
ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.framebufFormat);
1175
if (useFBRead == FBReadSetting::Forced) {
1176
// Enforce blend replacement if enabled. If not, shouldn't do anything of course.
1177
replaceBlend = gstate.isAlphaBlendEnabled() ? REPLACE_BLEND_READ_FRAMEBUFFER : REPLACE_BLEND_NO;
1178
}
1179
1180
blendState.replaceBlend = replaceBlend;
1181
1182
blendState.simulateLogicOpType = SimulateLogicOpShaderTypeIfNeeded();
1183
1184
ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend);
1185
blendState.replaceAlphaWithStencil = replaceAlphaWithStencil;
1186
1187
bool usePreSrc = false;
1188
1189
bool blueToAlpha = false;
1190
1191
switch (replaceBlend) {
1192
case REPLACE_BLEND_NO:
1193
// We may still want to do something about stencil -> alpha.
1194
ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState);
1195
1196
if (useFBRead == FBReadSetting::Forced) {
1197
// If this is true, the logic and mask replacements will be applied, at least. In that case,
1198
// we should not apply any logic op simulation.
1199
blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;
1200
}
1201
return;
1202
1203
case REPLACE_BLEND_BLUE_TO_ALPHA:
1204
blueToAlpha = true;
1205
blendState.blendEnabled = gstate.isAlphaBlendEnabled();
1206
// We'll later convert the color blend to blend in the alpha channel.
1207
break;
1208
1209
case REPLACE_BLEND_READ_FRAMEBUFFER:
1210
blendState.blendEnabled = true;
1211
blendState.applyFramebufferRead = true;
1212
blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;
1213
break;
1214
1215
case REPLACE_BLEND_PRE_SRC:
1216
case REPLACE_BLEND_PRE_SRC_2X_ALPHA:
1217
blendState.blendEnabled = true;
1218
usePreSrc = true;
1219
break;
1220
1221
case REPLACE_BLEND_STANDARD:
1222
case REPLACE_BLEND_2X_ALPHA:
1223
case REPLACE_BLEND_2X_SRC:
1224
blendState.blendEnabled = true;
1225
break;
1226
}
1227
1228
const GEBlendMode blendFuncEq = gstate.getBlendEq();
1229
GEBlendSrcFactor blendFuncA = gstate.getBlendFuncA();
1230
GEBlendDstFactor blendFuncB = gstate.getBlendFuncB();
1231
const u32 fixA = gstate.getFixA();
1232
const u32 fixB = gstate.getFixB();
1233
1234
if (blendFuncA > GE_SRCBLEND_FIXA)
1235
blendFuncA = GE_SRCBLEND_FIXA;
1236
if (blendFuncB > GE_DSTBLEND_FIXB)
1237
blendFuncB = GE_DSTBLEND_FIXB;
1238
1239
int constantAlpha = 255;
1240
BlendFactor constantAlphaGL = BlendFactor::ONE;
1241
if (!IsStencilTestOutputDisabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) {
1242
switch (ReplaceAlphaWithStencilType()) {
1243
case STENCIL_VALUE_UNIFORM:
1244
constantAlpha = gstate.getStencilTestRef();
1245
break;
1246
1247
case STENCIL_VALUE_INCR_4:
1248
case STENCIL_VALUE_DECR_4:
1249
constantAlpha = 16;
1250
break;
1251
1252
case STENCIL_VALUE_INCR_8:
1253
case STENCIL_VALUE_DECR_8:
1254
constantAlpha = 1;
1255
break;
1256
1257
default:
1258
break;
1259
}
1260
1261
// Otherwise it will stay GL_ONE.
1262
if (constantAlpha <= 0) {
1263
constantAlphaGL = BlendFactor::ZERO;
1264
} else if (constantAlpha < 255) {
1265
constantAlphaGL = BlendFactor::CONSTANT_ALPHA;
1266
}
1267
}
1268
1269
// Shortcut by using GL_ONE where possible, no need to set blendcolor
1270
bool approxFuncA = false;
1271
BlendFactor glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(fixA, approxFuncA) : genericALookup[blendFuncA];
1272
bool approxFuncB = false;
1273
BlendFactor glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(fixB, approxFuncB) : genericBLookup[blendFuncB];
1274
1275
if (gstate_c.framebufFormat == GE_FORMAT_565) {
1276
if (blendFuncA == GE_SRCBLEND_DSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEDSTALPHA) {
1277
glBlendFuncA = BlendFactor::ZERO;
1278
}
1279
if (blendFuncA == GE_SRCBLEND_INVDSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEINVDSTALPHA) {
1280
glBlendFuncA = BlendFactor::ONE;
1281
}
1282
if (blendFuncB == GE_DSTBLEND_DSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEDSTALPHA) {
1283
glBlendFuncB = BlendFactor::ZERO;
1284
}
1285
if (blendFuncB == GE_DSTBLEND_INVDSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEINVDSTALPHA) {
1286
glBlendFuncB = BlendFactor::ONE;
1287
}
1288
}
1289
1290
if (usePreSrc) {
1291
glBlendFuncA = BlendFactor::ONE;
1292
// Need to pull in the fixed color. TODO: If it hasn't changed, no need to dirty.
1293
if (blendFuncA == GE_SRCBLEND_FIXA) {
1294
blendState.dirtyShaderBlendFixValues = true;
1295
}
1296
}
1297
1298
if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE) {
1299
glBlendFuncA = toDualSource(glBlendFuncA);
1300
glBlendFuncB = toDualSource(glBlendFuncB);
1301
}
1302
1303
if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) {
1304
if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB != BlendFactor::INVALID) {
1305
// Can use blendcolor trivially.
1306
blendState.setBlendColor(fixA, constantAlpha);
1307
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1308
} else if (glBlendFuncA != BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {
1309
// Can use blendcolor trivially.
1310
blendState.setBlendColor(fixB, constantAlpha);
1311
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1312
} else if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {
1313
if (blendColorSimilar(fixA, 0xFFFFFF ^ fixB)) {
1314
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1315
glBlendFuncB = BlendFactor::ONE_MINUS_CONSTANT_COLOR;
1316
blendState.setBlendColor(fixA, constantAlpha);
1317
} else if (blendColorSimilar(fixA, fixB)) {
1318
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1319
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1320
blendState.setBlendColor(fixA, constantAlpha);
1321
} else {
1322
DEBUG_LOG(Log::G3D, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", fixA, fixB, blendFuncA, blendFuncB);
1323
// Let's approximate, at least. Close is better than totally off.
1324
const bool nearZeroA = blendColorSimilar(fixA, 0, 64);
1325
const bool nearZeroB = blendColorSimilar(fixB, 0, 64);
1326
if (nearZeroA || blendColorSimilar(fixA, 0xFFFFFF, 64)) {
1327
glBlendFuncA = nearZeroA ? BlendFactor::ZERO : BlendFactor::ONE;
1328
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1329
blendState.setBlendColor(fixB, constantAlpha);
1330
} else {
1331
// We need to pick something. Let's go with A as the fixed color.
1332
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1333
glBlendFuncB = nearZeroB ? BlendFactor::ZERO : BlendFactor::ONE;
1334
blendState.setBlendColor(fixA, constantAlpha);
1335
}
1336
}
1337
} else {
1338
// We optimized both, but that's probably not necessary, so let's pick one to be constant.
1339
if (blendFuncA == GE_SRCBLEND_FIXA && !usePreSrc && approxFuncA) {
1340
glBlendFuncA = BlendFactor::CONSTANT_COLOR;
1341
blendState.setBlendColor(fixA, constantAlpha);
1342
} else if (approxFuncB) {
1343
glBlendFuncB = BlendFactor::CONSTANT_COLOR;
1344
blendState.setBlendColor(fixB, constantAlpha);
1345
} else {
1346
if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {
1347
blendState.defaultBlendColor(constantAlpha);
1348
}
1349
}
1350
}
1351
} else {
1352
if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {
1353
blendState.defaultBlendColor(constantAlpha);
1354
}
1355
}
1356
1357
// Some Android devices (especially old Mali, it seems) composite badly if there's alpha in the backbuffer.
1358
// So in non-buffered rendering, we will simply consider the dest alpha to be zero in blending equations.
1359
#if PPSSPP_PLATFORM(ANDROID)
1360
if (g_Config.bSkipBufferEffects) {
1361
if (glBlendFuncA == BlendFactor::DST_ALPHA) glBlendFuncA = BlendFactor::ZERO;
1362
if (glBlendFuncB == BlendFactor::DST_ALPHA) glBlendFuncB = BlendFactor::ZERO;
1363
if (glBlendFuncA == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncA = BlendFactor::ONE;
1364
if (glBlendFuncB == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncB = BlendFactor::ONE;
1365
}
1366
#endif
1367
1368
// At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow.
1369
BlendEq colorEq;
1370
if (gstate_c.Use(GPU_USE_BLEND_MINMAX)) {
1371
colorEq = eqLookup[blendFuncEq];
1372
} else {
1373
colorEq = eqLookupNoMinMax[blendFuncEq];
1374
}
1375
1376
// The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't
1377
// do any blending in the alpha channel as that doesn't seem to happen on PSP. So, we attempt to
1378
// apply the stencil to the alpha, since that's what should be stored.
1379
BlendEq alphaEq = BlendEq::ADD;
1380
if (replaceAlphaWithStencil != REPLACE_ALPHA_NO) {
1381
// Let the fragment shader take care of it.
1382
switch (ReplaceAlphaWithStencilType()) {
1383
case STENCIL_VALUE_INCR_4:
1384
case STENCIL_VALUE_INCR_8:
1385
// We'll add the increment value.
1386
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1387
break;
1388
1389
case STENCIL_VALUE_DECR_4:
1390
case STENCIL_VALUE_DECR_8:
1391
// Like add with a small value, but subtracting.
1392
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1393
alphaEq = BlendEq::SUBTRACT;
1394
break;
1395
1396
case STENCIL_VALUE_INVERT:
1397
// This will subtract by one, effectively inverting the bits.
1398
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1399
alphaEq = BlendEq::REVERSE_SUBTRACT;
1400
break;
1401
1402
default:
1403
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ZERO);
1404
break;
1405
}
1406
} else if (!IsStencilTestOutputDisabled()) {
1407
StencilValueType stencilValue = ReplaceAlphaWithStencilType();
1408
if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0x00) {
1409
stencilValue = STENCIL_VALUE_ZERO;
1410
} else if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0xFF) {
1411
stencilValue = STENCIL_VALUE_ONE;
1412
}
1413
switch (stencilValue) {
1414
case STENCIL_VALUE_KEEP:
1415
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);
1416
break;
1417
case STENCIL_VALUE_ONE:
1418
// This won't give one but it's our best shot...
1419
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1420
break;
1421
case STENCIL_VALUE_ZERO:
1422
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ZERO);
1423
break;
1424
case STENCIL_VALUE_UNIFORM:
1425
// This won't give a correct value (it multiplies) but it may be better than random values.
1426
blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ZERO);
1427
break;
1428
case STENCIL_VALUE_INCR_4:
1429
case STENCIL_VALUE_INCR_8:
1430
// This won't give a correct value always, but it will try to increase at least.
1431
blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);
1432
break;
1433
case STENCIL_VALUE_DECR_4:
1434
case STENCIL_VALUE_DECR_8:
1435
// This won't give a correct value always, but it will try to decrease at least.
1436
blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);
1437
alphaEq = BlendEq::SUBTRACT;
1438
break;
1439
case STENCIL_VALUE_INVERT:
1440
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);
1441
// If the output alpha is near 1, this will basically invert. It's our best shot.
1442
alphaEq = BlendEq::REVERSE_SUBTRACT;
1443
break;
1444
}
1445
} else if (blueToAlpha) {
1446
blendState.setFactors(BlendFactor::ZERO, BlendFactor::ZERO, BlendFactor::ONE, glBlendFuncB);
1447
blendState.setEquation(BlendEq::ADD, colorEq);
1448
return;
1449
} else {
1450
// Retain the existing value when stencil testing is off.
1451
blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);
1452
}
1453
1454
blendState.setEquation(colorEq, alphaEq);
1455
}
1456
1457
static void ConvertLogicOpState(GenericLogicState &logicOpState, bool logicSupported, bool shaderBitOpsSupported, FBReadSetting useFBRead) {
1458
// TODO: We can get more detailed with checks here. Some logic ops don't involve the destination at all.
1459
// Several can be trivially supported even without any bitwise logic.
1460
if (!gstate.isLogicOpEnabled() || gstate.getLogicOp() == GE_LOGIC_COPY) {
1461
// No matter what, don't need to do anything.
1462
logicOpState.logicOpEnabled = false;
1463
logicOpState.logicOp = GE_LOGIC_COPY;
1464
logicOpState.applyFramebufferRead = useFBRead == FBReadSetting::Forced;
1465
return;
1466
}
1467
1468
// TODO: Brave story uses GE_INVERTED, this is easy to convert to a blend function - unless blend is also enabled simultaneously.
1469
1470
if (useFBRead == FBReadSetting::Forced && shaderBitOpsSupported) {
1471
// We have to emulate logic ops in the shader.
1472
logicOpState.logicOpEnabled = false; // Don't use any hardware logic op, supported or not.
1473
logicOpState.applyFramebufferRead = true;
1474
logicOpState.logicOp = gstate.getLogicOp();
1475
} else if (logicSupported) {
1476
// We can use hardware logic ops directly, if needed.
1477
logicOpState.applyFramebufferRead = false;
1478
if (gstate.isLogicOpEnabled()) {
1479
logicOpState.logicOpEnabled = true;
1480
logicOpState.logicOp = gstate.getLogicOp();
1481
} else {
1482
logicOpState.logicOpEnabled = false;
1483
logicOpState.logicOp = GE_LOGIC_COPY;
1484
}
1485
} else if (shaderBitOpsSupported && useFBRead != FBReadSetting::Disallowed) {
1486
// D3D11 and some OpenGL versions will end up here.
1487
// Logic ops not support, bitops supported. Let's punt to the shader.
1488
// We should possibly always do this and never use the hardware ops, since they'll mishandle the alpha channel..
1489
logicOpState.logicOpEnabled = false; // Don't use any hardware logic op, supported or not.
1490
logicOpState.applyFramebufferRead = true;
1491
logicOpState.logicOp = gstate.getLogicOp();
1492
} else {
1493
// In this case, the SIMULATE fallback should kick in.
1494
// Need to make sure this is checking for the same things though...
1495
logicOpState.logicOpEnabled = false;
1496
logicOpState.logicOp = GE_LOGIC_COPY;
1497
logicOpState.applyFramebufferRead = false;
1498
}
1499
}
1500
1501
static void ConvertStencilFunc5551(GenericStencilFuncState &state) {
1502
// Flaws:
1503
// - INVERT should convert 1, 5, 0xFF to 0. Currently it won't always.
1504
// - INCR twice shouldn't change the value.
1505
// - REPLACE should write 0 for 0x00 - 0x7F, and non-zero for 0x80 - 0xFF.
1506
// - Write mask may need double checking, but likely only the top bit matters.
1507
1508
const bool usesRef = state.sFail == GE_STENCILOP_REPLACE || state.zFail == GE_STENCILOP_REPLACE || state.zPass == GE_STENCILOP_REPLACE;
1509
const u8 maskedRef = state.testRef & state.testMask;
1510
const u8 usedRef = (state.testRef & 0x80) != 0 ? 0xFF : 0x00;
1511
1512
auto rewriteFunc = [&](GEComparison func, u8 ref) {
1513
// We can only safely rewrite if it doesn't use the ref, or if the ref is the same.
1514
if (!usesRef || usedRef == ref) {
1515
state.testFunc = func;
1516
state.testRef = ref;
1517
state.testMask = 0xFF;
1518
}
1519
};
1520
auto rewriteRef = [&](bool always) {
1521
state.testFunc = always ? GE_COMP_ALWAYS : GE_COMP_NEVER;
1522
if (usesRef) {
1523
// Rewrite the ref (for REPLACE) to 0x00 or 0xFF (the "best" values) if safe.
1524
// This will only be called if the test doesn't need the ref.
1525
state.testRef = usedRef;
1526
// Nuke the mask as well, since this is always/never, just for consistency.
1527
state.testMask = 0xFF;
1528
} else {
1529
// Not used, so let's make the ref 0xFF which is a useful value later.
1530
state.testRef = 0xFF;
1531
state.testMask = 0xFF;
1532
}
1533
};
1534
1535
// For 5551, we treat any non-zero value in the buffer as 255. Only zero is treated as zero.
1536
// See: https://github.com/hrydgard/ppsspp/pull/4150#issuecomment-26211193
1537
switch (state.testFunc) {
1538
case GE_COMP_NEVER:
1539
case GE_COMP_ALWAYS:
1540
// Fine as is.
1541
rewriteRef(state.testFunc == GE_COMP_ALWAYS);
1542
break;
1543
case GE_COMP_EQUAL: // maskedRef == maskedBuffer
1544
if (maskedRef == 0) {
1545
// Remove any mask, we might have bits less than 255 but that should not match.
1546
rewriteFunc(GE_COMP_EQUAL, 0);
1547
} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1548
// Equal to 255, for our buffer, means not equal to zero.
1549
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1550
} else {
1551
// This should never pass, regardless of buffer value. Only 0 and 255 are directly equal.
1552
rewriteRef(false);
1553
}
1554
break;
1555
case GE_COMP_NOTEQUAL: // maskedRef != maskedBuffer
1556
if (maskedRef == 0) {
1557
// Remove the mask, since our buffer might not be exactly 255.
1558
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1559
} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1560
// The only value != 255 is 0, in our buffer.
1561
rewriteFunc(GE_COMP_EQUAL, 0);
1562
} else {
1563
// Every other value evaluates as not equal, always.
1564
rewriteRef(true);
1565
}
1566
break;
1567
case GE_COMP_LESS: // maskedRef < maskedBuffer
1568
if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1569
// No possible value is less than 255.
1570
rewriteRef(false);
1571
} else {
1572
// "0 < (0 or 255)" and "254 < (0 or 255)" can only work for non zero.
1573
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1574
}
1575
break;
1576
case GE_COMP_LEQUAL: // maskedRef <= maskedBuffer
1577
if (maskedRef == 0) {
1578
// 0 is <= every possible value.
1579
rewriteRef(true);
1580
} else {
1581
// "1 <= (0 or 255)" and "255 <= (0 or 255)" simply mean, anything but zero.
1582
rewriteFunc(GE_COMP_NOTEQUAL, 0);
1583
}
1584
break;
1585
case GE_COMP_GREATER: // maskedRef > maskedBuffer
1586
if (maskedRef > 0) {
1587
// "1 > (0 or 255)" and "255 > (0 or 255)" can only match 0.
1588
rewriteFunc(GE_COMP_EQUAL, 0);
1589
} else {
1590
// 0 is never greater than any possible value.
1591
rewriteRef(false);
1592
}
1593
break;
1594
case GE_COMP_GEQUAL: // maskedRef >= maskedBuffer
1595
if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {
1596
// 255 is >= every possible value.
1597
rewriteRef(true);
1598
} else {
1599
// "0 >= (0 or 255)" and "254 >= "(0 or 255)" are the same, equal to zero.
1600
rewriteFunc(GE_COMP_EQUAL, 0);
1601
}
1602
break;
1603
}
1604
1605
auto rewriteOps = [&](GEStencilOp from, GEStencilOp to) {
1606
if (state.sFail == from)
1607
state.sFail = to;
1608
if (state.zFail == from)
1609
state.zFail = to;
1610
if (state.zPass == from)
1611
state.zPass = to;
1612
};
1613
1614
// Decrement always zeros, so let's rewrite those to be safe (even if it's not 1.)
1615
rewriteOps(GE_STENCILOP_DECR, GE_STENCILOP_ZERO);
1616
1617
if (state.testFunc == GE_COMP_NOTEQUAL && state.testRef == 0 && state.testMask != 0) {
1618
// If it's != 0 (as optimized above), then we can rewrite INVERT to ZERO.
1619
// With 1 bit of stencil, INVERT != 0 can only make it 0.
1620
rewriteOps(GE_STENCILOP_INVERT, GE_STENCILOP_ZERO);
1621
}
1622
if (state.testFunc == GE_COMP_EQUAL && state.testRef == 0 && state.testMask != 0) {
1623
// If it's == 0 (as optimized above), then we can rewrite INCR to INVERT.
1624
// Otherwise we get 1, which we mostly handle, but won't INVERT correctly.
1625
rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_INVERT);
1626
}
1627
if (!usesRef && state.testRef == 0xFF) {
1628
// Safe to use REPLACE instead of INCR.
1629
rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_REPLACE);
1630
}
1631
}
1632
1633
static void ConvertStencilMask5551(GenericStencilFuncState &state) {
1634
state.writeMask = state.writeMask >= 0x80 ? 0xff : 0x00;
1635
}
1636
1637
void ConvertStencilFuncState(GenericStencilFuncState &state) {
1638
// The PSP's mask is reversed (bits not to write.) Ignore enabled, used for clears too.
1639
state.writeMask = (~gstate.getStencilWriteMask()) & 0xFF;
1640
state.enabled = gstate.isStencilTestEnabled();
1641
if (!state.enabled) {
1642
if (gstate_c.framebufFormat == GE_FORMAT_5551)
1643
ConvertStencilMask5551(state);
1644
return;
1645
}
1646
1647
state.sFail = gstate.getStencilOpSFail();
1648
state.zFail = gstate.getStencilOpZFail();
1649
state.zPass = gstate.getStencilOpZPass();
1650
1651
state.testFunc = gstate.getStencilTestFunction();
1652
state.testRef = gstate.getStencilTestRef();
1653
state.testMask = gstate.getStencilTestMask();
1654
1655
bool depthTest = gstate.isDepthTestEnabled();
1656
if ((state.sFail == state.zFail || !depthTest) && state.sFail == state.zPass) {
1657
// Common case: we're writing only to stencil (usually REPLACE/REPLACE/REPLACE.)
1658
// We want to write stencil to alpha in this case, so switch to ALWAYS if already masked.
1659
bool depthWrite = gstate.isDepthWriteEnabled();
1660
if ((gstate.getColorMask() & 0x00FFFFFF) == 0x00FFFFFF && (!depthTest || !depthWrite)) {
1661
state.testFunc = GE_COMP_ALWAYS;
1662
}
1663
}
1664
1665
switch (gstate_c.framebufFormat) {
1666
case GE_FORMAT_565:
1667
state.writeMask = 0;
1668
break;
1669
1670
case GE_FORMAT_5551:
1671
ConvertStencilMask5551(state);
1672
ConvertStencilFunc5551(state);
1673
break;
1674
1675
default:
1676
// Hard to do anything useful for 4444, and 8888 is fine.
1677
break;
1678
}
1679
}
1680
1681
void GenericMaskState::Log() {
1682
WARN_LOG(Log::G3D, "Mask: %08x %01X readfb=%d", uniformMask, channelMask, applyFramebufferRead);
1683
}
1684
1685
void GenericBlendState::Log() {
1686
WARN_LOG(Log::G3D, "Blend: hwenable=%d readfb=%d replblend=%d replalpha=%d",
1687
blendEnabled, applyFramebufferRead, replaceBlend, (int)replaceAlphaWithStencil);
1688
}
1689
1690
void ComputedPipelineState::Convert(bool shaderBitOpsSupported, bool fbReadAllowed) {
1691
// Passing on the previous applyFramebufferRead as forceFrameBuffer read in the next one,
1692
// thus propagating forward.
1693
FBReadSetting readFB = (fbReadAllowed && shaderBitOpsSupported) ? FBReadSetting::Allowed : FBReadSetting::Disallowed;
1694
ConvertMaskState(maskState, readFB);
1695
readFB = maskState.applyFramebufferRead ? FBReadSetting::Forced : (fbReadAllowed ? FBReadSetting::Allowed : FBReadSetting::Disallowed);
1696
ConvertLogicOpState(logicState, gstate_c.Use(GPU_USE_LOGIC_OP), shaderBitOpsSupported, readFB);
1697
readFB = logicState.applyFramebufferRead ? FBReadSetting::Forced : (fbReadAllowed ? FBReadSetting::Allowed : FBReadSetting::Disallowed);
1698
ConvertBlendState(blendState, readFB);
1699
1700
// Note: If the blend state decided it had to use framebuffer reads,
1701
// we need to make sure that both mask and logic also use it, otherwise things will go wrong.
1702
if (blendState.applyFramebufferRead || logicState.applyFramebufferRead) {
1703
_dbg_assert_(fbReadAllowed);
1704
maskState.ConvertToShaderBlend();
1705
logicState.ConvertToShaderBlend();
1706
} else {
1707
// If it isn't a read, we may need to change blending to apply the logic op.
1708
logicState.ApplyToBlendState(blendState);
1709
}
1710
}
1711
1712
void GenericLogicState::ApplyToBlendState(GenericBlendState &blendState) {
1713
if (SimulateLogicOpIfNeeded(blendState.srcColor, blendState.dstColor, blendState.eqColor)) {
1714
if (!blendState.blendEnabled) {
1715
// If it wasn't turned on, make sure it is now.
1716
blendState.blendEnabled = true;
1717
blendState.srcAlpha = BlendFactor::ONE;
1718
blendState.dstAlpha = BlendFactor::ZERO;
1719
blendState.eqAlpha = BlendEq::ADD;
1720
}
1721
logicOpEnabled = false;
1722
logicOp = GE_LOGIC_COPY;
1723
}
1724
}
1725
1726