CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Common/GPUStateUtils.cpp
Views: 1401
// Copyright (c) 2015- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include <algorithm>18#include <limits>1920#include "Common/System/Display.h"2122#include "Common/StringUtils.h"23#include "Core/Config.h"24#include "Core/ConfigValues.h"25#include "Core/System.h"2627#include "GPU/ge_constants.h"28#include "GPU/GPUState.h"29#include "GPU/Math3D.h"30#include "GPU/Common/FramebufferManagerCommon.h"31#include "GPU/Common/PresentationCommon.h"32#include "GPU/Common/ShaderId.h"33#include "GPU/Common/VertexDecoderCommon.h"3435#include "GPU/Common/GPUStateUtils.h"3637bool IsStencilTestOutputDisabled() {38// The mask applies on all stencil ops.39if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF) {40if (gstate_c.framebufFormat == GE_FORMAT_565) {41return true;42}43return gstate.getStencilOpZPass() == GE_STENCILOP_KEEP && gstate.getStencilOpZFail() == GE_STENCILOP_KEEP && gstate.getStencilOpSFail() == GE_STENCILOP_KEEP;44}45return true;46}4748bool NeedsTestDiscard() {49// We assume this is called only when enabled and not trivially true (may also be for color testing.)50if (gstate.isStencilTestEnabled() && (gstate.pmska & 0xFF) != 0xFF)51return true;52if (gstate.isDepthTestEnabled() && gstate.isDepthWriteEnabled())53return true;54if (!gstate.isAlphaBlendEnabled())55return true;56if (gstate.getBlendFuncA() != GE_SRCBLEND_SRCALPHA && gstate.getBlendFuncA() != GE_SRCBLEND_DOUBLESRCALPHA)57return true;58// GE_DSTBLEND_DOUBLEINVSRCALPHA is actually inverse double src alpha, and doubling zero is still zero.59if (gstate.getBlendFuncB() != GE_DSTBLEND_INVSRCALPHA && gstate.getBlendFuncB() != GE_DSTBLEND_DOUBLEINVSRCALPHA) {60if (gstate.getBlendFuncB() != GE_DSTBLEND_FIXB || gstate.getFixB() != 0xFFFFFF)61return true;62}63if (gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_ADD && gstate.getBlendEq() != GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE)64return true;65if (gstate.isLogicOpEnabled() && gstate.getLogicOp() != GE_LOGIC_COPY)66return true;6768return false;69}7071bool IsAlphaTestTriviallyTrue() {72switch (gstate.getAlphaTestFunction()) {73case GE_COMP_NEVER:74return false;7576case GE_COMP_ALWAYS:77return true;7879case GE_COMP_GEQUAL:80if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))81return true; // If alpha is full, it doesn't matter what the ref value is.82return gstate.getAlphaTestRef() == 0;8384// Non-zero check. If we have no depth testing (and thus no depth writing), and an alpha func that will result in no change if zero alpha, get rid of the alpha test.85// Speeds up Lumines by a LOT on PowerVR.86case GE_COMP_NOTEQUAL:87if (gstate.getAlphaTestRef() == 255) {88// Likely to be rare. Let's just skip the vertexFullAlpha optimization here instead of adding89// complicated code to discard the draw or whatnot.90return false;91}92// Fallthrough on purpose9394case GE_COMP_GREATER:95{96// If the texture and vertex only use 1.0 alpha, then the ref value doesn't matter.97if (gstate_c.vertexFullAlpha && (gstate_c.textureFullAlpha || !gstate.isTextureAlphaUsed()))98return true;99return gstate.getAlphaTestRef() == 0 && !NeedsTestDiscard();100}101102case GE_COMP_LEQUAL:103return gstate.getAlphaTestRef() == 255;104105case GE_COMP_EQUAL:106case GE_COMP_LESS:107return false;108109default:110return false;111}112}113114bool IsAlphaTestAgainstZero() {115return gstate.getAlphaTestRef() == 0 && gstate.getAlphaTestMask() == 0xFF;116}117118bool IsColorTestAgainstZero() {119return gstate.getColorTestRef() == 0 && gstate.getColorTestMask() == 0xFFFFFF;120}121122bool IsColorTestTriviallyTrue() {123switch (gstate.getColorTestFunction()) {124case GE_COMP_NEVER:125return false;126127case GE_COMP_ALWAYS:128return true;129130case GE_COMP_EQUAL:131case GE_COMP_NOTEQUAL:132return false;133default:134return false;135}136}137138bool IsDepthTestEffectivelyDisabled() {139if (!gstate.isDepthTestEnabled())140return true;141// We can ignore stencil, because ALWAYS and disabled choose the same stencil path.142if (gstate.getDepthTestFunction() != GE_COMP_ALWAYS)143return false;144return !gstate.isDepthWriteEnabled();145}146147const bool nonAlphaSrcFactors[16] = {148true, // GE_SRCBLEND_DSTCOLOR,149true, // GE_SRCBLEND_INVDSTCOLOR,150false, // GE_SRCBLEND_SRCALPHA,151false, // GE_SRCBLEND_INVSRCALPHA,152true, // GE_SRCBLEND_DSTALPHA,153true, // GE_SRCBLEND_INVDSTALPHA,154false, // GE_SRCBLEND_DOUBLESRCALPHA,155false, // GE_SRCBLEND_DOUBLEINVSRCALPHA,156true, // GE_SRCBLEND_DOUBLEDSTALPHA,157true, // GE_SRCBLEND_DOUBLEINVDSTALPHA,158true, // GE_SRCBLEND_FIXA,159true,160true,161true,162true,163true,164};165166const bool nonAlphaDestFactors[16] = {167true, // GE_DSTBLEND_SRCCOLOR,168true, // GE_DSTBLEND_INVSRCCOLOR,169false, // GE_DSTBLEND_SRCALPHA,170false, // GE_DSTBLEND_INVSRCALPHA,171true, // GE_DSTBLEND_DSTALPHA,172true, // GE_DSTBLEND_INVDSTALPHA,173false, // GE_DSTBLEND_DOUBLESRCALPHA,174false, // GE_DSTBLEND_DOUBLEINVSRCALPHA,175true, // GE_DSTBLEND_DOUBLEDSTALPHA,176true, // GE_DSTBLEND_DOUBLEINVDSTALPHA,177true, // GE_DSTBLEND_FIXB,178true,179true,180true,181true,182true,183};184185ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend) {186if (IsStencilTestOutputDisabled() || gstate.isModeClear()) {187return REPLACE_ALPHA_NO;188}189190if (replaceBlend != REPLACE_BLEND_NO && replaceBlend != REPLACE_BLEND_READ_FRAMEBUFFER) {191if (nonAlphaSrcFactors[gstate.getBlendFuncA()] && nonAlphaDestFactors[gstate.getBlendFuncB()]) {192return REPLACE_ALPHA_YES;193} else {194if (gstate_c.Use(GPU_USE_DUALSOURCE_BLEND)) {195return REPLACE_ALPHA_DUALSOURCE;196} else {197return REPLACE_ALPHA_NO;198}199}200}201202if (replaceBlend == ReplaceBlendType::REPLACE_BLEND_BLUE_TO_ALPHA) {203return REPLACE_ALPHA_NO; // irrelevant204}205206return REPLACE_ALPHA_YES;207}208209StencilValueType ReplaceAlphaWithStencilType() {210switch (gstate_c.framebufFormat) {211case GE_FORMAT_565:212// There's never a stencil value. Maybe the right alpha is 1?213return STENCIL_VALUE_ONE;214215case GE_FORMAT_5551:216switch (gstate.getStencilOpZPass()) {217// Technically, this should only ever use zero/one.218case GE_STENCILOP_REPLACE:219return (gstate.getStencilTestRef() & 0x80) != 0 ? STENCIL_VALUE_ONE : STENCIL_VALUE_ZERO;220221// Decrementing always zeros, since there's only one bit.222case GE_STENCILOP_DECR:223case GE_STENCILOP_ZERO:224return STENCIL_VALUE_ZERO;225226// Incrementing always fills, since there's only one bit.227case GE_STENCILOP_INCR:228return STENCIL_VALUE_ONE;229230case GE_STENCILOP_INVERT:231return STENCIL_VALUE_INVERT;232233case GE_STENCILOP_KEEP:234return STENCIL_VALUE_KEEP;235}236break;237238case GE_FORMAT_4444:239case GE_FORMAT_8888:240case GE_FORMAT_INVALID:241case GE_FORMAT_DEPTH16:242case GE_FORMAT_CLUT8:243switch (gstate.getStencilOpZPass()) {244case GE_STENCILOP_REPLACE:245// TODO: Could detect zero here and force ZERO - less uniform updates?246return STENCIL_VALUE_UNIFORM;247248case GE_STENCILOP_ZERO:249return STENCIL_VALUE_ZERO;250251case GE_STENCILOP_DECR:252return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_DECR_4 : STENCIL_VALUE_DECR_8;253254case GE_STENCILOP_INCR:255return gstate_c.framebufFormat == GE_FORMAT_4444 ? STENCIL_VALUE_INCR_4 : STENCIL_VALUE_INCR_8;256257case GE_STENCILOP_INVERT:258return STENCIL_VALUE_INVERT;259260case GE_STENCILOP_KEEP:261return STENCIL_VALUE_KEEP;262}263break;264}265266return STENCIL_VALUE_KEEP;267}268269ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat) {270if (gstate_c.blueToAlpha) {271return REPLACE_BLEND_BLUE_TO_ALPHA;272}273274if (!gstate.isAlphaBlendEnabled() || gstate.isModeClear()) {275return REPLACE_BLEND_NO;276}277278GEBlendMode eq = gstate.getBlendEq();279// Let's get the non-factor modes out of the way first.280switch (eq) {281case GE_BLENDMODE_ABSDIFF:282return REPLACE_BLEND_READ_FRAMEBUFFER;283284case GE_BLENDMODE_MIN:285case GE_BLENDMODE_MAX:286if (gstate_c.Use(GPU_USE_BLEND_MINMAX)) {287return REPLACE_BLEND_STANDARD;288} else {289return REPLACE_BLEND_READ_FRAMEBUFFER;290}291292case GE_BLENDMODE_MUL_AND_ADD:293case GE_BLENDMODE_MUL_AND_SUBTRACT:294case GE_BLENDMODE_MUL_AND_SUBTRACT_REVERSE:295// Handled below.296break;297298default:299// Other blend equations simply don't blend on hardware.300return REPLACE_BLEND_NO;301}302303GEBlendSrcFactor funcA = gstate.getBlendFuncA();304GEBlendDstFactor funcB = gstate.getBlendFuncB();305306switch (funcA) {307case GE_SRCBLEND_DOUBLESRCALPHA:308case GE_SRCBLEND_DOUBLEINVSRCALPHA:309// 2x alpha in the source function and not in the dest = source color doubling.310// Even dest alpha is safe, since we're moving the * 2.0 into the src color.311switch (funcB) {312case GE_DSTBLEND_SRCCOLOR:313case GE_DSTBLEND_INVSRCCOLOR:314// When inversing, alpha clamping isn't an issue.315if (funcA == GE_SRCBLEND_DOUBLEINVSRCALPHA)316return REPLACE_BLEND_2X_ALPHA;317// Can't double, we need the source color to be correct.318// Doubling only alpha would clamp the src alpha incorrectly.319return REPLACE_BLEND_READ_FRAMEBUFFER;320321case GE_DSTBLEND_DOUBLEDSTALPHA:322case GE_DSTBLEND_DOUBLEINVDSTALPHA:323if (bufferFormat == GE_FORMAT_565)324return REPLACE_BLEND_2X_ALPHA;325return REPLACE_BLEND_READ_FRAMEBUFFER;326327case GE_DSTBLEND_DOUBLESRCALPHA:328// We can't technically do this correctly (due to clamping) without reading the dst color.329// Using a copy isn't accurate either, though, when there's overlap.330if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))331return REPLACE_BLEND_READ_FRAMEBUFFER;332return REPLACE_BLEND_PRE_SRC_2X_ALPHA;333334case GE_DSTBLEND_DOUBLEINVSRCALPHA:335// For the inverse, doubling alpha is safe, because it will clamp correctly.336return REPLACE_BLEND_PRE_SRC_2X_ALPHA;337338case GE_DSTBLEND_SRCALPHA:339case GE_DSTBLEND_INVSRCALPHA:340case GE_DSTBLEND_DSTALPHA:341case GE_DSTBLEND_INVDSTALPHA:342case GE_DSTBLEND_FIXB:343default:344// TODO: Could use vertexFullAlpha, but it's not calculated yet.345// This outputs the original alpha for the dest factor.346return REPLACE_BLEND_PRE_SRC;347}348349case GE_SRCBLEND_DOUBLEDSTALPHA:350switch (funcB) {351case GE_DSTBLEND_SRCCOLOR:352case GE_DSTBLEND_INVSRCCOLOR:353if (bufferFormat == GE_FORMAT_565) {354// Dest alpha should be zero.355return REPLACE_BLEND_STANDARD;356}357// Can't double, we need the source color to be correct.358return REPLACE_BLEND_READ_FRAMEBUFFER;359360case GE_DSTBLEND_DOUBLEDSTALPHA:361case GE_DSTBLEND_DOUBLEINVDSTALPHA:362if (bufferFormat == GE_FORMAT_565) {363// Both blend factors are 0 or 1, no need to read it, since it's known.364// Doubling will have no effect here.365return REPLACE_BLEND_STANDARD;366}367return REPLACE_BLEND_READ_FRAMEBUFFER;368369case GE_DSTBLEND_DOUBLESRCALPHA:370case GE_DSTBLEND_DOUBLEINVSRCALPHA:371if (bufferFormat == GE_FORMAT_565) {372return REPLACE_BLEND_2X_ALPHA;373}374// Double both src (for dst alpha) and alpha (for dst factor.)375// But to be accurate (clamping), we need to read the dst color.376return REPLACE_BLEND_READ_FRAMEBUFFER;377378case GE_DSTBLEND_SRCALPHA:379case GE_DSTBLEND_INVSRCALPHA:380case GE_DSTBLEND_DSTALPHA:381case GE_DSTBLEND_INVDSTALPHA:382case GE_DSTBLEND_FIXB:383default:384if (bufferFormat == GE_FORMAT_565) {385return REPLACE_BLEND_STANDARD;386}387// We can't technically do this correctly (due to clamping) without reading the dst alpha.388return REPLACE_BLEND_READ_FRAMEBUFFER;389}390391case GE_SRCBLEND_DOUBLEINVDSTALPHA:392// Inverse double dst alpha is tricky. Doubling the src color is probably the wrong direction,393// halving might be more correct. We really need to read the dst color.394switch (funcB) {395case GE_DSTBLEND_SRCCOLOR:396case GE_DSTBLEND_INVSRCCOLOR:397case GE_DSTBLEND_DOUBLEDSTALPHA:398case GE_DSTBLEND_DOUBLEINVDSTALPHA:399if (bufferFormat == GE_FORMAT_565) {400return REPLACE_BLEND_STANDARD;401}402return REPLACE_BLEND_READ_FRAMEBUFFER;403404case GE_DSTBLEND_DOUBLESRCALPHA:405case GE_DSTBLEND_DOUBLEINVSRCALPHA:406if (bufferFormat == GE_FORMAT_565) {407return REPLACE_BLEND_2X_ALPHA;408}409return REPLACE_BLEND_READ_FRAMEBUFFER;410411case GE_DSTBLEND_SRCALPHA:412case GE_DSTBLEND_INVSRCALPHA:413case GE_DSTBLEND_DSTALPHA:414case GE_DSTBLEND_INVDSTALPHA:415case GE_DSTBLEND_FIXB:416default:417if (bufferFormat == GE_FORMAT_565) {418return REPLACE_BLEND_STANDARD;419}420return REPLACE_BLEND_READ_FRAMEBUFFER;421}422423case GE_SRCBLEND_FIXA:424default:425switch (funcB) {426case GE_DSTBLEND_DOUBLESRCALPHA:427// Can't safely double alpha, will clamp.428return REPLACE_BLEND_READ_FRAMEBUFFER;429430case GE_DSTBLEND_DOUBLEINVSRCALPHA:431// Doubling alpha is safe for the inverse, will clamp to zero either way.432return REPLACE_BLEND_2X_ALPHA;433434case GE_DSTBLEND_DOUBLEDSTALPHA:435case GE_DSTBLEND_DOUBLEINVDSTALPHA:436if (bufferFormat == GE_FORMAT_565) {437return REPLACE_BLEND_STANDARD;438}439return REPLACE_BLEND_READ_FRAMEBUFFER;440441case GE_DSTBLEND_FIXB:442default:443if (gstate.getFixA() == 0xFFFFFF && gstate.getFixB() == 0x000000) {444// Some games specify this. Some cards may prefer blending off entirely.445return REPLACE_BLEND_NO;446} else if (gstate.getFixA() == 0xFFFFFF || gstate.getFixA() == 0x000000 || gstate.getFixB() == 0xFFFFFF || gstate.getFixB() == 0x000000) {447return REPLACE_BLEND_STANDARD;448} else {449// Multiply the src color in the shader, that way it's always accurate.450return REPLACE_BLEND_PRE_SRC;451}452453case GE_DSTBLEND_SRCCOLOR:454case GE_DSTBLEND_INVSRCCOLOR:455case GE_DSTBLEND_SRCALPHA:456case GE_DSTBLEND_INVSRCALPHA:457case GE_DSTBLEND_DSTALPHA:458case GE_DSTBLEND_INVDSTALPHA:459return REPLACE_BLEND_STANDARD;460}461462case GE_SRCBLEND_DSTCOLOR:463case GE_SRCBLEND_INVDSTCOLOR:464case GE_SRCBLEND_SRCALPHA:465case GE_SRCBLEND_INVSRCALPHA:466case GE_SRCBLEND_DSTALPHA:467case GE_SRCBLEND_INVDSTALPHA:468switch (funcB) {469case GE_DSTBLEND_DOUBLESRCALPHA:470if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {471// Can't safely double alpha, will clamp. However, a copy may easily be worse due to overlap.472if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))473return REPLACE_BLEND_READ_FRAMEBUFFER;474return REPLACE_BLEND_PRE_SRC_2X_ALPHA;475} else {476// This means dst alpha/color is used in the src factor.477// Unfortunately, copying here causes overlap problems in Silent Hill games (it seems?)478// We will just hope that doubling alpha for the dst factor will not clamp too badly.479if (gstate_c.Use(GPU_USE_FRAMEBUFFER_FETCH))480return REPLACE_BLEND_READ_FRAMEBUFFER;481return REPLACE_BLEND_2X_ALPHA;482}483484case GE_DSTBLEND_DOUBLEINVSRCALPHA:485// For inverse, things are simpler. Clamping isn't an issue, as long as we avoid486// messing with the other factor's components.487if (funcA == GE_SRCBLEND_SRCALPHA || funcA == GE_SRCBLEND_INVSRCALPHA) {488return REPLACE_BLEND_PRE_SRC_2X_ALPHA;489}490return REPLACE_BLEND_2X_ALPHA;491492case GE_DSTBLEND_DOUBLEDSTALPHA:493case GE_DSTBLEND_DOUBLEINVDSTALPHA:494if (bufferFormat == GE_FORMAT_565) {495return REPLACE_BLEND_STANDARD;496}497return REPLACE_BLEND_READ_FRAMEBUFFER;498499default:500return REPLACE_BLEND_STANDARD;501}502}503504// Should never get here.505return REPLACE_BLEND_STANDARD;506}507508static const float DEPTH_SLICE_FACTOR_HIGH = 4.0f;509static const float DEPTH_SLICE_FACTOR_16BIT = 256.0f;510511// The supported flag combinations. TODO: Maybe they should be distilled down into an enum.512//513// 0 - "Old"-style GL depth.514// Or "Non-accurate depth" : effectively ignore minz / maxz. Map Z values based on viewport, which clamps.515// This skews depth in many instances. Depth can be inverted in this mode if viewport says.516// This is completely wrong, but works in some cases (probably because some game devs assumed it was how it worked)517// and avoids some depth clamp issues.518//519// GPU_USE_ACCURATE_DEPTH:520// Accurate depth: Z in the framebuffer matches the range of Z used on the PSP linearly in some way. We choose521// a centered range, to simulate clamping by letting otherwise out-of-range pixels survive the 0 and 1 cutoffs.522// Clip depth based on minz/maxz, and viewport is just a means to scale and center the value, not clipping or mapping to stored values.523//524// GPU_USE_ACCURATE_DEPTH | GPU_USE_DEPTH_CLAMP:525// Variant of GPU_USE_ACCURATE_DEPTH, just the range is the nice and convenient 0-1 since we can use526// hardware depth clamp. only viable in accurate depth mode, clamps depth and therefore uses the full 0-1 range. Using the full 0-1 range is not what accurate means, it's implied by depth clamp (which also means we're clamping.)527//528// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT:529// GPU_USE_ACCURATE_DEPTH | GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT | GPU_USE_DEPTH_CLAMP:530// Only viable in accurate depth mode, means to use a range of the 24-bit depth values available531// from the GPU to represent the 16-bit values the PSP had, to try to make everything round and532// z-fight (close to) the same way as on hardware, cheaply (cheaper than rounding depth in fragment shader).533// We automatically switch to this if Z tests for equality are used.534// Depth clamp has no effect on the depth scaling here if set, though will still be enabled535// and clamp wildly out of line values.536//537// Any other combinations of these particular flags are bogus (like for example a lonely GPU_USE_DEPTH_CLAMP).538539float DepthSliceFactor(u32 useFlags) {540if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {541// Old style depth.542return 1.0f;543}544if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {545// Accurate depth but 16-bit resolution, so squish.546return DEPTH_SLICE_FACTOR_16BIT;547}548if (useFlags & GPU_USE_DEPTH_CLAMP) {549// Accurate depth, but we can use the full range since clamping is available.550return 1.0f;551}552553// Standard accurate depth.554return DEPTH_SLICE_FACTOR_HIGH;555}556557// See class DepthScaleFactors for how to apply.558DepthScaleFactors GetDepthScaleFactors(u32 useFlags) {559if (!(useFlags & GPU_USE_ACCURATE_DEPTH)) {560return DepthScaleFactors(0.0f, 65535.0f);561}562563if (useFlags & GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT) {564const double offset = 0.5 * (DEPTH_SLICE_FACTOR_16BIT - 1.0) / DEPTH_SLICE_FACTOR_16BIT;565// Use one bit for each value, rather than 1.0 / (65535.0 * 256.0).566const double scale = 16777215.0;567return DepthScaleFactors(offset, scale);568} else if (useFlags & GPU_USE_DEPTH_CLAMP) {569return DepthScaleFactors(0.0f, 65535.0f);570} else {571const double offset = 0.5f * (DEPTH_SLICE_FACTOR_HIGH - 1.0f) * (1.0f / DEPTH_SLICE_FACTOR_HIGH);572return DepthScaleFactors(offset, (float)(DEPTH_SLICE_FACTOR_HIGH * 65535.0));573}574}575576void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out) {577out.throughMode = gstate.isModeThrough();578579float renderWidthFactor, renderHeightFactor;580float renderX = 0.0f, renderY = 0.0f;581float displayOffsetX, displayOffsetY;582if (useBufferedRendering) {583displayOffsetX = 0.0f;584displayOffsetY = 0.0f;585renderWidthFactor = (float)renderWidth / (float)bufferWidth;586renderHeightFactor = (float)renderHeight / (float)bufferHeight;587} else {588float pixelW = PSP_CoreParameter().pixelWidth;589float pixelH = PSP_CoreParameter().pixelHeight;590FRect frame = GetScreenFrame(pixelW, pixelH);591FRect rc;592CalculateDisplayOutputRect(&rc, 480, 272, frame, ROTATION_LOCKED_HORIZONTAL);593displayOffsetX = rc.x;594displayOffsetY = rc.y;595renderWidth = rc.w;596renderHeight = rc.h;597renderWidthFactor = renderWidth / 480.0f;598renderHeightFactor = renderHeight / 272.0f;599}600601// We take care negative offsets of in the projection matrix.602// These come from split framebuffers (Killzone).603// TODO: Might be safe to do get rid of this here and do the same for positive offsets?604renderX = std::max(gstate_c.curRTOffsetX, 0);605renderY = std::max(gstate_c.curRTOffsetY, 0);606607// Scissor608int scissorX1 = gstate.getScissorX1();609int scissorY1 = gstate.getScissorY1();610int scissorX2 = gstate.getScissorX2() + 1;611int scissorY2 = gstate.getScissorY2() + 1;612613if (scissorX2 < scissorX1 || scissorY2 < scissorY1) {614out.scissorX = 0;615out.scissorY = 0;616out.scissorW = 0;617out.scissorH = 0;618} else {619out.scissorX = (renderX * renderWidthFactor) + displayOffsetX + scissorX1 * renderWidthFactor;620out.scissorY = (renderY * renderHeightFactor) + displayOffsetY + scissorY1 * renderHeightFactor;621out.scissorW = (scissorX2 - scissorX1) * renderWidthFactor;622out.scissorH = (scissorY2 - scissorY1) * renderHeightFactor;623}624625int curRTWidth = gstate_c.curRTWidth;626int curRTHeight = gstate_c.curRTHeight;627628float offsetX = gstate.getOffsetX();629float offsetY = gstate.getOffsetY();630631DepthScaleFactors depthScale = GetDepthScaleFactors(gstate_c.UseFlags());632633if (out.throughMode) {634// If renderX/renderY are offset to compensate for a split framebuffer,635// applying the offset to the viewport isn't enough, since the viewport clips.636// We need to apply either directly to the vertices, or to the "through" projection matrix.637out.viewportX = renderX * renderWidthFactor + displayOffsetX;638out.viewportY = renderY * renderHeightFactor + displayOffsetY;639out.viewportW = curRTWidth * renderWidthFactor;640out.viewportH = curRTHeight * renderHeightFactor;641out.depthRangeMin = depthScale.EncodeFromU16(0.0f);642out.depthRangeMax = depthScale.EncodeFromU16(65536.0f);643} else {644// These we can turn into a glViewport call, offset by offsetX and offsetY. Math after.645float vpXScale = gstate.getViewportXScale();646float vpXCenter = gstate.getViewportXCenter();647float vpYScale = gstate.getViewportYScale();648float vpYCenter = gstate.getViewportYCenter();649650// The viewport transform appears to go like this:651// Xscreen = -offsetX + vpXCenter + vpXScale * Xview652// Yscreen = -offsetY + vpYCenter + vpYScale * Yview653// Zscreen = vpZCenter + vpZScale * Zview654655// The viewport is normally centered at 2048,2048 but can also be centered at other locations.656// Offset is subtracted from the viewport center and is also set to values in those ranges, and is set so that the viewport will cover657// the desired screen area ([0-480)x[0-272)), so 1808,1912.658659// This means that to get the analogue glViewport we must:660float vpX0 = vpXCenter - offsetX - fabsf(vpXScale);661float vpY0 = vpYCenter - offsetY - fabsf(vpYScale);662gstate_c.vpWidth = vpXScale * 2.0f;663gstate_c.vpHeight = vpYScale * 2.0f;664665float vpWidth = fabsf(gstate_c.vpWidth);666float vpHeight = fabsf(gstate_c.vpHeight);667668float left = renderX + vpX0;669float top = renderY + vpY0;670float right = left + vpWidth;671float bottom = top + vpHeight;672673out.widthScale = 1.0f;674out.xOffset = 0.0f;675out.heightScale = 1.0f;676out.yOffset = 0.0f;677678// If we're within the bounds, we want clipping the viewport way. So leave it be.679{680float overageLeft = std::max(-left, 0.0f);681float overageRight = std::max(right - bufferWidth, 0.0f);682683// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.684if (right < scissorX2) {685overageRight -= scissorX2 - right;686}687if (left > scissorX1) {688overageLeft += scissorX1 - left;689}690691// Our center drifted by the difference in overages.692float drift = overageRight - overageLeft;693694if (overageLeft != 0.0f || overageRight != 0.0f) {695left += overageLeft;696right -= overageRight;697698// Protect against the viewport being entirely outside the scissor.699// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.700if (right <= left) {701right = left + 1.0f;702}703704out.widthScale = vpWidth / (right - left);705out.xOffset = drift / (right - left);706}707}708709{710float overageTop = std::max(-top, 0.0f);711float overageBottom = std::max(bottom - bufferHeight, 0.0f);712713// Expand viewport to cover scissor region. The viewport doesn't clip on the PSP.714if (bottom < scissorY2) {715overageBottom -= scissorY2 - bottom;716}717if (top > scissorY1) {718overageTop += scissorY1 - top;719}720// Our center drifted by the difference in overages.721float drift = overageBottom - overageTop;722723if (overageTop != 0.0f || overageBottom != 0.0f) {724top += overageTop;725bottom -= overageBottom;726727// Protect against the viewport being entirely outside the scissor.728// Emit a tiny but valid viewport. Really, we should probably emit a flag to ignore draws.729if (bottom <= top) {730bottom = top + 1.0f;731}732733out.heightScale = vpHeight / (bottom - top);734out.yOffset = drift / (bottom - top);735}736}737738out.viewportX = left * renderWidthFactor + displayOffsetX;739out.viewportY = top * renderHeightFactor + displayOffsetY;740out.viewportW = (right - left) * renderWidthFactor;741out.viewportH = (bottom - top) * renderHeightFactor;742743// The depth viewport parameters are the same, but we handle it a bit differently.744// When clipping is enabled, depth is clamped to [0, 65535]. And minz/maxz discard.745// So, we apply the depth range as minz/maxz, and transform for the viewport.746float vpZScale = gstate.getViewportZScale();747float vpZCenter = gstate.getViewportZCenter();748// TODO: This clip the entire draw if minz > maxz.749float minz = gstate.getDepthRangeMin();750float maxz = gstate.getDepthRangeMax();751752if (gstate.isDepthClampEnabled() && (minz == 0 || maxz == 65535)) {753// Here, we should "clamp." But clamping per fragment would be slow.754// So, instead, we just increase the available range and hope.755// If depthSliceFactor is 4, it means (75% / 2) of the depth lies in each direction.756float fullDepthRange = 65535.0f * (depthScale.Scale() - 1.0f) * (1.0f / 2.0f);757if (minz == 0) {758minz -= fullDepthRange;759}760if (maxz == 65535) {761maxz += fullDepthRange;762}763} else if (maxz == 65535) {764// This means clamp isn't enabled, but we still want to allow values up to 65535.99.765// If DepthSliceFactor() is 1.0, though, this would make out.depthRangeMax exceed 1.766// Since that would clamp, it would make Z=1234 not match between draws when maxz changes.767if (depthScale.Scale() > 1.0f)768maxz = 65535.99f;769}770771// Okay. So, in our shader, -1 will map to minz, and +1 will map to maxz.772float halfActualZRange = (maxz - minz) * (1.0f / 2.0f);773out.depthScale = halfActualZRange < std::numeric_limits<float>::epsilon() ? 1.0f : vpZScale / halfActualZRange;774// This adjusts the center from halfActualZRange to vpZCenter.775out.zOffset = halfActualZRange < std::numeric_limits<float>::epsilon() ? 0.0f : (vpZCenter - (minz + halfActualZRange)) / halfActualZRange;776777if (!gstate_c.Use(GPU_USE_ACCURATE_DEPTH)) {778out.depthScale = 1.0f;779out.zOffset = 0.0f;780out.depthRangeMin = depthScale.EncodeFromU16(vpZCenter - vpZScale);781out.depthRangeMax = depthScale.EncodeFromU16(vpZCenter + vpZScale);782} else {783out.depthRangeMin = depthScale.EncodeFromU16(minz);784out.depthRangeMax = depthScale.EncodeFromU16(maxz);785}786787// OpenGL will clamp these for us anyway, and Direct3D will error if not clamped.788// Of course, if this happens we've skewed out.depthScale/out.zOffset and may get z-fighting.789out.depthRangeMin = std::max(out.depthRangeMin, 0.0f);790out.depthRangeMax = std::min(out.depthRangeMax, 1.0f);791}792}793794void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor) {795if (vpAndScissor.throughMode)796return;797798bool scaleChanged = gstate_c.vpWidthScale != vpAndScissor.widthScale || gstate_c.vpHeightScale != vpAndScissor.heightScale;799bool offsetChanged = gstate_c.vpXOffset != vpAndScissor.xOffset || gstate_c.vpYOffset != vpAndScissor.yOffset;800bool depthChanged = gstate_c.vpDepthScale != vpAndScissor.depthScale || gstate_c.vpZOffset != vpAndScissor.zOffset;801if (scaleChanged || offsetChanged || depthChanged) {802gstate_c.vpWidthScale = vpAndScissor.widthScale;803gstate_c.vpHeightScale = vpAndScissor.heightScale;804gstate_c.vpDepthScale = vpAndScissor.depthScale;805gstate_c.vpXOffset = vpAndScissor.xOffset;806gstate_c.vpYOffset = vpAndScissor.yOffset;807gstate_c.vpZOffset = vpAndScissor.zOffset;808809gstate_c.Dirty(DIRTY_PROJMATRIX);810if (depthChanged) {811gstate_c.Dirty(DIRTY_DEPTHRANGE);812}813}814}815816static const BlendFactor genericALookup[11] = {817BlendFactor::DST_COLOR,818BlendFactor::ONE_MINUS_DST_COLOR,819BlendFactor::SRC_ALPHA,820BlendFactor::ONE_MINUS_SRC_ALPHA,821BlendFactor::DST_ALPHA,822BlendFactor::ONE_MINUS_DST_ALPHA,823BlendFactor::SRC_ALPHA, // GE_SRCBLEND_DOUBLESRCALPHA824BlendFactor::ONE_MINUS_SRC_ALPHA, // GE_SRCBLEND_DOUBLEINVSRCALPHA825BlendFactor::DST_ALPHA, // GE_SRCBLEND_DOUBLEDSTALPHA826BlendFactor::ONE_MINUS_DST_ALPHA, // GE_SRCBLEND_DOUBLEINVDSTALPHA827BlendFactor::CONSTANT_COLOR, // FIXA828};829830static const BlendFactor genericBLookup[11] = {831BlendFactor::SRC_COLOR,832BlendFactor::ONE_MINUS_SRC_COLOR,833BlendFactor::SRC_ALPHA,834BlendFactor::ONE_MINUS_SRC_ALPHA,835BlendFactor::DST_ALPHA,836BlendFactor::ONE_MINUS_DST_ALPHA,837BlendFactor::SRC_ALPHA, // GE_SRCBLEND_DOUBLESRCALPHA838BlendFactor::ONE_MINUS_SRC_ALPHA, // GE_SRCBLEND_DOUBLEINVSRCALPHA839BlendFactor::DST_ALPHA, // GE_SRCBLEND_DOUBLEDSTALPHA840BlendFactor::ONE_MINUS_DST_ALPHA, // GE_SRCBLEND_DOUBLEINVDSTALPHA841BlendFactor::CONSTANT_COLOR, // FIXB842};843844static const BlendEq eqLookupNoMinMax[] = {845BlendEq::ADD,846BlendEq::SUBTRACT,847BlendEq::REVERSE_SUBTRACT,848BlendEq::ADD, // GE_BLENDMODE_MIN849BlendEq::ADD, // GE_BLENDMODE_MAX850BlendEq::ADD, // GE_BLENDMODE_ABSDIFF851};852853static const BlendEq eqLookup[] = {854BlendEq::ADD,855BlendEq::SUBTRACT,856BlendEq::REVERSE_SUBTRACT,857BlendEq::MIN, // GE_BLENDMODE_MIN858BlendEq::MAX, // GE_BLENDMODE_MAX859BlendEq::MAX, // GE_BLENDMODE_ABSDIFF860};861862static BlendFactor toDualSource(BlendFactor blendfunc) {863switch (blendfunc) {864case BlendFactor::SRC_ALPHA:865return BlendFactor::SRC1_ALPHA;866case BlendFactor::ONE_MINUS_SRC_ALPHA:867return BlendFactor::ONE_MINUS_SRC1_ALPHA;868default:869return blendfunc;870}871}872873static BlendFactor blendColor2Func(u32 fix, bool &approx) {874if (fix == 0xFFFFFF)875return BlendFactor::ONE;876if (fix == 0)877return BlendFactor::ZERO;878879// Otherwise, it's approximate if we pick ONE/ZERO.880approx = true;881882const Vec3f fix3 = Vec3f::FromRGB(fix);883if (fix3.x >= 0.99 && fix3.y >= 0.99 && fix3.z >= 0.99)884return BlendFactor::ONE;885else if (fix3.x <= 0.01 && fix3.y <= 0.01 && fix3.z <= 0.01)886return BlendFactor::ZERO;887return BlendFactor::INVALID;888}889890// abs is a quagmire of compiler incompatibilities, so...891inline int iabs(int x) {892return x >= 0 ? x : -x;893}894895static inline bool blendColorSimilar(uint32_t a, uint32_t b, int margin = 25) { // 25 ~= 0.1 * 255896int diffx = iabs((a & 0xff) - (b & 0xff));897int diffy = iabs(((a >> 8) & 0xff) - ((b >> 8) & 0xff));898int diffz = iabs(((a >> 16) & 0xff) - ((b >> 16) & 0xff));899if (diffx <= margin && diffy <= margin && diffz <= margin)900return true;901return false;902}903904// Try to simulate some common logic ops by using blend, if needed.905// The shader might also need modification, the below function SimulateLogicOpShaderTypeIfNeeded906// takes care of that.907static bool SimulateLogicOpIfNeeded(BlendFactor &srcBlend, BlendFactor &dstBlend, BlendEq &blendEq) {908if (!gstate.isLogicOpEnabled())909return false;910911// Note: our shader solution applies logic ops BEFORE blending, not correctly after.912// This is however fine for the most common ones, like CLEAR/NOOP/SET, etc.913if (!gstate_c.Use(GPU_USE_LOGIC_OP)) {914switch (gstate.getLogicOp()) {915case GE_LOGIC_CLEAR:916srcBlend = BlendFactor::ZERO;917dstBlend = BlendFactor::ZERO;918blendEq = BlendEq::ADD;919return true;920case GE_LOGIC_AND:921case GE_LOGIC_AND_REVERSE:922WARN_LOG_REPORT_ONCE(d3dLogicOpAnd, Log::G3D, "Unsupported AND logic op: %x", gstate.getLogicOp());923break;924case GE_LOGIC_COPY:925// This is the same as off.926break;927case GE_LOGIC_COPY_INVERTED:928// Handled in the shader.929break;930case GE_LOGIC_AND_INVERTED:931case GE_LOGIC_NOR:932case GE_LOGIC_NAND:933case GE_LOGIC_EQUIV:934// Handled in the shader.935WARN_LOG_REPORT_ONCE(d3dLogicOpAndInverted, Log::G3D, "Attempted invert for logic op: %x", gstate.getLogicOp());936break;937case GE_LOGIC_INVERTED:938srcBlend = BlendFactor::ONE;939dstBlend = BlendFactor::ONE;940blendEq = BlendEq::SUBTRACT;941WARN_LOG_REPORT_ONCE(d3dLogicOpInverted, Log::G3D, "Attempted inverse for logic op: %x", gstate.getLogicOp());942return true;943case GE_LOGIC_NOOP:944srcBlend = BlendFactor::ZERO;945dstBlend = BlendFactor::ONE;946blendEq = BlendEq::ADD;947return true;948case GE_LOGIC_XOR:949WARN_LOG_REPORT_ONCE(d3dLogicOpOrXor, Log::G3D, "Unsupported XOR logic op: %x", gstate.getLogicOp());950break;951case GE_LOGIC_OR:952case GE_LOGIC_OR_INVERTED:953// Inverted in shader.954srcBlend = BlendFactor::ONE;955dstBlend = BlendFactor::ONE;956blendEq = BlendEq::ADD;957WARN_LOG_REPORT_ONCE(d3dLogicOpOr, Log::G3D, "Attempted or for logic op: %x", gstate.getLogicOp());958return true;959case GE_LOGIC_OR_REVERSE:960WARN_LOG_REPORT_ONCE(d3dLogicOpOrReverse, Log::G3D, "Unsupported OR REVERSE logic op: %x", gstate.getLogicOp());961break;962case GE_LOGIC_SET:963srcBlend = BlendFactor::ONE;964dstBlend = BlendFactor::ONE;965blendEq = BlendEq::ADD;966WARN_LOG_REPORT_ONCE(d3dLogicOpSet, Log::G3D, "Attempted set for logic op: %x", gstate.getLogicOp());967return true;968}969} else {970// Even if we support hardware logic ops, alpha is handled wrong.971// It's better to override blending for the simple cases.972switch (gstate.getLogicOp()) {973case GE_LOGIC_CLEAR:974srcBlend = BlendFactor::ZERO;975dstBlend = BlendFactor::ZERO;976blendEq = BlendEq::ADD;977return true;978case GE_LOGIC_NOOP:979srcBlend = BlendFactor::ZERO;980dstBlend = BlendFactor::ONE;981blendEq = BlendEq::ADD;982return true;983984default:985// Let's hope hardware gets it right.986return false;987}988}989return false;990}991992// Choose the shader part of the above logic op fallback simulation.993SimulateLogicOpType SimulateLogicOpShaderTypeIfNeeded() {994if (!gstate_c.Use(GPU_USE_LOGIC_OP) && gstate.isLogicOpEnabled()) {995switch (gstate.getLogicOp()) {996case GE_LOGIC_COPY_INVERTED:997case GE_LOGIC_AND_INVERTED:998case GE_LOGIC_OR_INVERTED:999case GE_LOGIC_NOR:1000case GE_LOGIC_NAND:1001case GE_LOGIC_EQUIV:1002return LOGICOPTYPE_INVERT;1003case GE_LOGIC_INVERTED:1004return LOGICOPTYPE_ONE;1005case GE_LOGIC_SET:1006return LOGICOPTYPE_ONE;1007default:1008return LOGICOPTYPE_NORMAL;1009}1010}1011return LOGICOPTYPE_NORMAL;1012}10131014void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState) {1015StencilValueType stencilType = STENCIL_VALUE_KEEP;1016if (replaceAlphaWithStencil == REPLACE_ALPHA_YES) {1017stencilType = ReplaceAlphaWithStencilType();1018}10191020// Normally, we would add src + 0 with blending off, but the logic op may have us do differently.1021BlendFactor srcBlend = BlendFactor::ONE;1022BlendFactor dstBlend = BlendFactor::ZERO;1023BlendEq blendEq = BlendEq::ADD;10241025// We're not blending, but we may still want to "blend" for stencil.1026// This is only useful for INCR/DECR/INVERT. Others can write directly.1027switch (stencilType) {1028case STENCIL_VALUE_INCR_4:1029case STENCIL_VALUE_INCR_8:1030// We'll add the incremented value output by the shader.1031blendState.blendEnabled = true;1032blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);1033blendState.setEquation(blendEq, BlendEq::ADD);1034break;10351036case STENCIL_VALUE_DECR_4:1037case STENCIL_VALUE_DECR_8:1038// We'll subtract the incremented value output by the shader.1039blendState.blendEnabled = true;1040blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);1041blendState.setEquation(blendEq, BlendEq::SUBTRACT);1042break;10431044case STENCIL_VALUE_INVERT:1045// The shader will output one, and reverse subtracting will essentially invert.1046blendState.blendEnabled = true;1047blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ONE);1048blendState.setEquation(blendEq, BlendEq::REVERSE_SUBTRACT);1049break;10501051default:1052if (srcBlend == BlendFactor::ONE && dstBlend == BlendFactor::ZERO && blendEq == BlendEq::ADD) {1053blendState.blendEnabled = false;1054} else {1055blendState.blendEnabled = true;1056blendState.setFactors(srcBlend, dstBlend, BlendFactor::ONE, BlendFactor::ZERO);1057blendState.setEquation(blendEq, BlendEq::ADD);1058}1059break;1060}1061}10621063// If we can we emulate the colorMask by simply toggling the full R G B A masks offered1064// by modern hardware, we do that. This is 99.9% of the time.1065// When that's not enough, we fall back on a technique similar to shader blending,1066// we read from the framebuffer (or a copy of it).1067// We also prepare uniformMask so that if doing this in the shader gets forced-on,1068// we have the right mask already.1069static void ConvertMaskState(GenericMaskState &maskState, bool shaderBitOpsSupported) {1070if (gstate_c.blueToAlpha) {1071maskState.applyFramebufferRead = false;1072maskState.uniformMask = 0xFF000000;1073maskState.channelMask = 0x8;1074return;1075}10761077// Invert to convert masks from the PSP's format where 1 is don't draw to PC where 1 is draw.1078uint32_t colorMask = ~((gstate.pmskc & 0xFFFFFF) | (gstate.pmska << 24));10791080maskState.uniformMask = colorMask;1081maskState.applyFramebufferRead = false;1082maskState.channelMask = 0;1083for (int i = 0; i < 4; i++) {1084uint32_t channelMask = (colorMask >> (i * 8)) & 0xFF;1085switch (channelMask) {1086case 0x0:1087break;1088case 0xFF:1089maskState.channelMask |= 1 << i;1090break;1091default:1092if (shaderBitOpsSupported && PSP_CoreParameter().compat.flags().ShaderColorBitmask) {1093// Shaders can emulate masking accurately. Let's make use of that.1094maskState.applyFramebufferRead = true;1095maskState.channelMask |= 1 << i;1096} else {1097// Use the old inaccurate heuristic.1098if (channelMask >= 128) {1099maskState.channelMask |= 1 << i;1100}1101}1102}1103}11041105// Let's not write to alpha if stencil isn't enabled.1106// Also if the stencil type is set to KEEP, we shouldn't write to the stencil/alpha channel.1107if (IsStencilTestOutputDisabled() || ReplaceAlphaWithStencilType() == STENCIL_VALUE_KEEP) {1108maskState.channelMask &= ~8;1109maskState.uniformMask &= ~0xFF000000;1110}11111112// For 5551, only the top alpha bit matters. We might even want to swizzle 4444.1113// Alpha should correctly read as 255 from a 5551 texture.1114if (gstate.FrameBufFormat() == GE_FORMAT_5551) {1115if ((maskState.uniformMask & 0x80000000) != 0)1116maskState.uniformMask |= 0xFF000000;1117else1118maskState.uniformMask &= ~0xFF000000;1119}1120}11211122// Called even if AlphaBlendEnable == false - it also deals with stencil-related blend state.1123static void ConvertBlendState(GenericBlendState &blendState, bool forceReplaceBlend) {1124// Blending is a bit complex to emulate. This is due to several reasons:1125//1126// * Doubled blend modes (src, dst, inversed) aren't supported in OpenGL.1127// If possible, we double the src color or src alpha in the shader to account for these.1128// These may clip incorrectly, so we avoid unfortunately.1129// * OpenGL only has one arbitrary fixed color. We premultiply the other in the shader.1130// * The written output alpha should actually be the stencil value. Alpha is not written.1131//1132// If we can't apply blending, we make a copy of the framebuffer and do it manually.11331134blendState.applyFramebufferRead = false;1135blendState.dirtyShaderBlendFixValues = false;1136blendState.useBlendColor = false;11371138ReplaceBlendType replaceBlend = ReplaceBlendWithShader(gstate_c.framebufFormat);1139if (forceReplaceBlend) {1140// Enforce blend replacement if enabled. If not, shouldn't do anything of course.1141replaceBlend = gstate.isAlphaBlendEnabled() ? REPLACE_BLEND_READ_FRAMEBUFFER : REPLACE_BLEND_NO;1142}11431144blendState.replaceBlend = replaceBlend;11451146blendState.simulateLogicOpType = SimulateLogicOpShaderTypeIfNeeded();11471148ReplaceAlphaType replaceAlphaWithStencil = ReplaceAlphaWithStencil(replaceBlend);1149blendState.replaceAlphaWithStencil = replaceAlphaWithStencil;11501151bool usePreSrc = false;11521153bool blueToAlpha = false;11541155switch (replaceBlend) {1156case REPLACE_BLEND_NO:1157// We may still want to do something about stencil -> alpha.1158ApplyStencilReplaceAndLogicOpIgnoreBlend(replaceAlphaWithStencil, blendState);11591160if (forceReplaceBlend) {1161// If this is true, the logic and mask replacements will be applied, at least. In that case,1162// we should not apply any logic op simulation.1163blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;1164}1165return;11661167case REPLACE_BLEND_BLUE_TO_ALPHA:1168blueToAlpha = true;1169blendState.blendEnabled = gstate.isAlphaBlendEnabled();1170// We'll later convert the color blend to blend in the alpha channel.1171break;11721173case REPLACE_BLEND_READ_FRAMEBUFFER:1174blendState.blendEnabled = true;1175blendState.applyFramebufferRead = true;1176blendState.simulateLogicOpType = LOGICOPTYPE_NORMAL;1177break;11781179case REPLACE_BLEND_PRE_SRC:1180case REPLACE_BLEND_PRE_SRC_2X_ALPHA:1181blendState.blendEnabled = true;1182usePreSrc = true;1183break;11841185case REPLACE_BLEND_STANDARD:1186case REPLACE_BLEND_2X_ALPHA:1187case REPLACE_BLEND_2X_SRC:1188blendState.blendEnabled = true;1189break;1190}11911192const GEBlendMode blendFuncEq = gstate.getBlendEq();1193GEBlendSrcFactor blendFuncA = gstate.getBlendFuncA();1194GEBlendDstFactor blendFuncB = gstate.getBlendFuncB();1195const u32 fixA = gstate.getFixA();1196const u32 fixB = gstate.getFixB();11971198if (blendFuncA > GE_SRCBLEND_FIXA)1199blendFuncA = GE_SRCBLEND_FIXA;1200if (blendFuncB > GE_DSTBLEND_FIXB)1201blendFuncB = GE_DSTBLEND_FIXB;12021203int constantAlpha = 255;1204BlendFactor constantAlphaGL = BlendFactor::ONE;1205if (!IsStencilTestOutputDisabled() && replaceAlphaWithStencil == REPLACE_ALPHA_NO) {1206switch (ReplaceAlphaWithStencilType()) {1207case STENCIL_VALUE_UNIFORM:1208constantAlpha = gstate.getStencilTestRef();1209break;12101211case STENCIL_VALUE_INCR_4:1212case STENCIL_VALUE_DECR_4:1213constantAlpha = 16;1214break;12151216case STENCIL_VALUE_INCR_8:1217case STENCIL_VALUE_DECR_8:1218constantAlpha = 1;1219break;12201221default:1222break;1223}12241225// Otherwise it will stay GL_ONE.1226if (constantAlpha <= 0) {1227constantAlphaGL = BlendFactor::ZERO;1228} else if (constantAlpha < 255) {1229constantAlphaGL = BlendFactor::CONSTANT_ALPHA;1230}1231}12321233// Shortcut by using GL_ONE where possible, no need to set blendcolor1234bool approxFuncA = false;1235BlendFactor glBlendFuncA = blendFuncA == GE_SRCBLEND_FIXA ? blendColor2Func(fixA, approxFuncA) : genericALookup[blendFuncA];1236bool approxFuncB = false;1237BlendFactor glBlendFuncB = blendFuncB == GE_DSTBLEND_FIXB ? blendColor2Func(fixB, approxFuncB) : genericBLookup[blendFuncB];12381239if (gstate_c.framebufFormat == GE_FORMAT_565) {1240if (blendFuncA == GE_SRCBLEND_DSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEDSTALPHA) {1241glBlendFuncA = BlendFactor::ZERO;1242}1243if (blendFuncA == GE_SRCBLEND_INVDSTALPHA || blendFuncA == GE_SRCBLEND_DOUBLEINVDSTALPHA) {1244glBlendFuncA = BlendFactor::ONE;1245}1246if (blendFuncB == GE_DSTBLEND_DSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEDSTALPHA) {1247glBlendFuncB = BlendFactor::ZERO;1248}1249if (blendFuncB == GE_DSTBLEND_INVDSTALPHA || blendFuncB == GE_DSTBLEND_DOUBLEINVDSTALPHA) {1250glBlendFuncB = BlendFactor::ONE;1251}1252}12531254if (usePreSrc) {1255glBlendFuncA = BlendFactor::ONE;1256// Need to pull in the fixed color. TODO: If it hasn't changed, no need to dirty.1257if (blendFuncA == GE_SRCBLEND_FIXA) {1258blendState.dirtyShaderBlendFixValues = true;1259}1260}12611262if (replaceAlphaWithStencil == REPLACE_ALPHA_DUALSOURCE) {1263glBlendFuncA = toDualSource(glBlendFuncA);1264glBlendFuncB = toDualSource(glBlendFuncB);1265}12661267if (blendFuncA == GE_SRCBLEND_FIXA || blendFuncB == GE_DSTBLEND_FIXB) {1268if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB != BlendFactor::INVALID) {1269// Can use blendcolor trivially.1270blendState.setBlendColor(fixA, constantAlpha);1271glBlendFuncA = BlendFactor::CONSTANT_COLOR;1272} else if (glBlendFuncA != BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {1273// Can use blendcolor trivially.1274blendState.setBlendColor(fixB, constantAlpha);1275glBlendFuncB = BlendFactor::CONSTANT_COLOR;1276} else if (glBlendFuncA == BlendFactor::INVALID && glBlendFuncB == BlendFactor::INVALID) {1277if (blendColorSimilar(fixA, 0xFFFFFF ^ fixB)) {1278glBlendFuncA = BlendFactor::CONSTANT_COLOR;1279glBlendFuncB = BlendFactor::ONE_MINUS_CONSTANT_COLOR;1280blendState.setBlendColor(fixA, constantAlpha);1281} else if (blendColorSimilar(fixA, fixB)) {1282glBlendFuncA = BlendFactor::CONSTANT_COLOR;1283glBlendFuncB = BlendFactor::CONSTANT_COLOR;1284blendState.setBlendColor(fixA, constantAlpha);1285} else {1286DEBUG_LOG(Log::G3D, "ERROR INVALID blendcolorstate: FixA=%06x FixB=%06x FuncA=%i FuncB=%i", fixA, fixB, blendFuncA, blendFuncB);1287// Let's approximate, at least. Close is better than totally off.1288const bool nearZeroA = blendColorSimilar(fixA, 0, 64);1289const bool nearZeroB = blendColorSimilar(fixB, 0, 64);1290if (nearZeroA || blendColorSimilar(fixA, 0xFFFFFF, 64)) {1291glBlendFuncA = nearZeroA ? BlendFactor::ZERO : BlendFactor::ONE;1292glBlendFuncB = BlendFactor::CONSTANT_COLOR;1293blendState.setBlendColor(fixB, constantAlpha);1294} else {1295// We need to pick something. Let's go with A as the fixed color.1296glBlendFuncA = BlendFactor::CONSTANT_COLOR;1297glBlendFuncB = nearZeroB ? BlendFactor::ZERO : BlendFactor::ONE;1298blendState.setBlendColor(fixA, constantAlpha);1299}1300}1301} else {1302// We optimized both, but that's probably not necessary, so let's pick one to be constant.1303if (blendFuncA == GE_SRCBLEND_FIXA && !usePreSrc && approxFuncA) {1304glBlendFuncA = BlendFactor::CONSTANT_COLOR;1305blendState.setBlendColor(fixA, constantAlpha);1306} else if (approxFuncB) {1307glBlendFuncB = BlendFactor::CONSTANT_COLOR;1308blendState.setBlendColor(fixB, constantAlpha);1309} else {1310if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {1311blendState.defaultBlendColor(constantAlpha);1312}1313}1314}1315} else {1316if (constantAlphaGL == BlendFactor::CONSTANT_ALPHA) {1317blendState.defaultBlendColor(constantAlpha);1318}1319}13201321// Some Android devices (especially old Mali, it seems) composite badly if there's alpha in the backbuffer.1322// So in non-buffered rendering, we will simply consider the dest alpha to be zero in blending equations.1323#ifdef __ANDROID__1324if (g_Config.bSkipBufferEffects) {1325if (glBlendFuncA == BlendFactor::DST_ALPHA) glBlendFuncA = BlendFactor::ZERO;1326if (glBlendFuncB == BlendFactor::DST_ALPHA) glBlendFuncB = BlendFactor::ZERO;1327if (glBlendFuncA == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncA = BlendFactor::ONE;1328if (glBlendFuncB == BlendFactor::ONE_MINUS_DST_ALPHA) glBlendFuncB = BlendFactor::ONE;1329}1330#endif13311332// At this point, through all paths above, glBlendFuncA and glBlendFuncB will be set right somehow.1333BlendEq colorEq;1334if (gstate_c.Use(GPU_USE_BLEND_MINMAX)) {1335colorEq = eqLookup[blendFuncEq];1336} else {1337colorEq = eqLookupNoMinMax[blendFuncEq];1338}13391340// The stencil-to-alpha in fragment shader doesn't apply here (blending is enabled), and we shouldn't1341// do any blending in the alpha channel as that doesn't seem to happen on PSP. So, we attempt to1342// apply the stencil to the alpha, since that's what should be stored.1343BlendEq alphaEq = BlendEq::ADD;1344if (replaceAlphaWithStencil != REPLACE_ALPHA_NO) {1345// Let the fragment shader take care of it.1346switch (ReplaceAlphaWithStencilType()) {1347case STENCIL_VALUE_INCR_4:1348case STENCIL_VALUE_INCR_8:1349// We'll add the increment value.1350blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);1351break;13521353case STENCIL_VALUE_DECR_4:1354case STENCIL_VALUE_DECR_8:1355// Like add with a small value, but subtracting.1356blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);1357alphaEq = BlendEq::SUBTRACT;1358break;13591360case STENCIL_VALUE_INVERT:1361// This will subtract by one, effectively inverting the bits.1362blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);1363alphaEq = BlendEq::REVERSE_SUBTRACT;1364break;13651366default:1367blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ZERO);1368break;1369}1370} else if (!IsStencilTestOutputDisabled()) {1371StencilValueType stencilValue = ReplaceAlphaWithStencilType();1372if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0x00) {1373stencilValue = STENCIL_VALUE_ZERO;1374} else if (stencilValue == STENCIL_VALUE_UNIFORM && constantAlpha == 0xFF) {1375stencilValue = STENCIL_VALUE_ONE;1376}1377switch (stencilValue) {1378case STENCIL_VALUE_KEEP:1379blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);1380break;1381case STENCIL_VALUE_ONE:1382// This won't give one but it's our best shot...1383blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);1384break;1385case STENCIL_VALUE_ZERO:1386blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ZERO);1387break;1388case STENCIL_VALUE_UNIFORM:1389// This won't give a correct value (it multiplies) but it may be better than random values.1390blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ZERO);1391break;1392case STENCIL_VALUE_INCR_4:1393case STENCIL_VALUE_INCR_8:1394// This won't give a correct value always, but it will try to increase at least.1395blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);1396break;1397case STENCIL_VALUE_DECR_4:1398case STENCIL_VALUE_DECR_8:1399// This won't give a correct value always, but it will try to decrease at least.1400blendState.setFactors(glBlendFuncA, glBlendFuncB, constantAlphaGL, BlendFactor::ONE);1401alphaEq = BlendEq::SUBTRACT;1402break;1403case STENCIL_VALUE_INVERT:1404blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ONE, BlendFactor::ONE);1405// If the output alpha is near 1, this will basically invert. It's our best shot.1406alphaEq = BlendEq::REVERSE_SUBTRACT;1407break;1408}1409} else if (blueToAlpha) {1410blendState.setFactors(BlendFactor::ZERO, BlendFactor::ZERO, BlendFactor::ONE, glBlendFuncB);1411blendState.setEquation(BlendEq::ADD, colorEq);1412return;1413} else {1414// Retain the existing value when stencil testing is off.1415blendState.setFactors(glBlendFuncA, glBlendFuncB, BlendFactor::ZERO, BlendFactor::ONE);1416}14171418blendState.setEquation(colorEq, alphaEq);1419}14201421static void ConvertLogicOpState(GenericLogicState &logicOpState, bool logicSupported, bool shaderBitOpsSupported, bool forceApplyFramebuffer) {1422// TODO: We can get more detailed with checks here. Some logic ops don't involve the destination at all.1423// Several can be trivially supported even without any bitwise logic.1424if (!gstate.isLogicOpEnabled() || gstate.getLogicOp() == GE_LOGIC_COPY) {1425// No matter what, don't need to do anything.1426logicOpState.logicOpEnabled = false;1427logicOpState.logicOp = GE_LOGIC_COPY;1428logicOpState.applyFramebufferRead = forceApplyFramebuffer;1429return;1430}14311432if (forceApplyFramebuffer && shaderBitOpsSupported) {1433// We have to emulate logic ops in the shader.1434logicOpState.logicOpEnabled = false; // Don't use any hardware logic op, supported or not.1435logicOpState.applyFramebufferRead = true;1436logicOpState.logicOp = gstate.getLogicOp();1437} else if (logicSupported) {1438// We can use hardware logic ops, if needed.1439logicOpState.applyFramebufferRead = false;1440if (gstate.isLogicOpEnabled()) {1441logicOpState.logicOpEnabled = true;1442logicOpState.logicOp = gstate.getLogicOp();1443} else {1444logicOpState.logicOpEnabled = false;1445logicOpState.logicOp = GE_LOGIC_COPY;1446}1447} else if (shaderBitOpsSupported) {1448// D3D11 and some OpenGL versions will end up here.1449// Logic ops not support, bitops supported. Let's punt to the shader.1450// We should possibly always do this and never use the hardware ops, since they'll mishandle the alpha channel..1451logicOpState.logicOpEnabled = false; // Don't use any hardware logic op, supported or not.1452logicOpState.applyFramebufferRead = true;1453logicOpState.logicOp = gstate.getLogicOp();1454} else {1455// In this case, the SIMULATE fallback should kick in.1456// Need to make sure this is checking for the same things though...1457logicOpState.logicOpEnabled = false;1458logicOpState.logicOp = GE_LOGIC_COPY;1459logicOpState.applyFramebufferRead = false;1460}1461}14621463static void ConvertStencilFunc5551(GenericStencilFuncState &state) {1464// Flaws:1465// - INVERT should convert 1, 5, 0xFF to 0. Currently it won't always.1466// - INCR twice shouldn't change the value.1467// - REPLACE should write 0 for 0x00 - 0x7F, and non-zero for 0x80 - 0xFF.1468// - Write mask may need double checking, but likely only the top bit matters.14691470const bool usesRef = state.sFail == GE_STENCILOP_REPLACE || state.zFail == GE_STENCILOP_REPLACE || state.zPass == GE_STENCILOP_REPLACE;1471const u8 maskedRef = state.testRef & state.testMask;1472const u8 usedRef = (state.testRef & 0x80) != 0 ? 0xFF : 0x00;14731474auto rewriteFunc = [&](GEComparison func, u8 ref) {1475// We can only safely rewrite if it doesn't use the ref, or if the ref is the same.1476if (!usesRef || usedRef == ref) {1477state.testFunc = func;1478state.testRef = ref;1479state.testMask = 0xFF;1480}1481};1482auto rewriteRef = [&](bool always) {1483state.testFunc = always ? GE_COMP_ALWAYS : GE_COMP_NEVER;1484if (usesRef) {1485// Rewrite the ref (for REPLACE) to 0x00 or 0xFF (the "best" values) if safe.1486// This will only be called if the test doesn't need the ref.1487state.testRef = usedRef;1488// Nuke the mask as well, since this is always/never, just for consistency.1489state.testMask = 0xFF;1490} else {1491// Not used, so let's make the ref 0xFF which is a useful value later.1492state.testRef = 0xFF;1493state.testMask = 0xFF;1494}1495};14961497// For 5551, we treat any non-zero value in the buffer as 255. Only zero is treated as zero.1498// See: https://github.com/hrydgard/ppsspp/pull/4150#issuecomment-262111931499switch (state.testFunc) {1500case GE_COMP_NEVER:1501case GE_COMP_ALWAYS:1502// Fine as is.1503rewriteRef(state.testFunc == GE_COMP_ALWAYS);1504break;1505case GE_COMP_EQUAL: // maskedRef == maskedBuffer1506if (maskedRef == 0) {1507// Remove any mask, we might have bits less than 255 but that should not match.1508rewriteFunc(GE_COMP_EQUAL, 0);1509} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {1510// Equal to 255, for our buffer, means not equal to zero.1511rewriteFunc(GE_COMP_NOTEQUAL, 0);1512} else {1513// This should never pass, regardless of buffer value. Only 0 and 255 are directly equal.1514rewriteRef(false);1515}1516break;1517case GE_COMP_NOTEQUAL: // maskedRef != maskedBuffer1518if (maskedRef == 0) {1519// Remove the mask, since our buffer might not be exactly 255.1520rewriteFunc(GE_COMP_NOTEQUAL, 0);1521} else if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {1522// The only value != 255 is 0, in our buffer.1523rewriteFunc(GE_COMP_EQUAL, 0);1524} else {1525// Every other value evaluates as not equal, always.1526rewriteRef(true);1527}1528break;1529case GE_COMP_LESS: // maskedRef < maskedBuffer1530if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {1531// No possible value is less than 255.1532rewriteRef(false);1533} else {1534// "0 < (0 or 255)" and "254 < (0 or 255)" can only work for non zero.1535rewriteFunc(GE_COMP_NOTEQUAL, 0);1536}1537break;1538case GE_COMP_LEQUAL: // maskedRef <= maskedBuffer1539if (maskedRef == 0) {1540// 0 is <= every possible value.1541rewriteRef(true);1542} else {1543// "1 <= (0 or 255)" and "255 <= (0 or 255)" simply mean, anything but zero.1544rewriteFunc(GE_COMP_NOTEQUAL, 0);1545}1546break;1547case GE_COMP_GREATER: // maskedRef > maskedBuffer1548if (maskedRef > 0) {1549// "1 > (0 or 255)" and "255 > (0 or 255)" can only match 0.1550rewriteFunc(GE_COMP_EQUAL, 0);1551} else {1552// 0 is never greater than any possible value.1553rewriteRef(false);1554}1555break;1556case GE_COMP_GEQUAL: // maskedRef >= maskedBuffer1557if (maskedRef == (0xFF & state.testMask) && state.testMask != 0) {1558// 255 is >= every possible value.1559rewriteRef(true);1560} else {1561// "0 >= (0 or 255)" and "254 >= "(0 or 255)" are the same, equal to zero.1562rewriteFunc(GE_COMP_EQUAL, 0);1563}1564break;1565}15661567auto rewriteOps = [&](GEStencilOp from, GEStencilOp to) {1568if (state.sFail == from)1569state.sFail = to;1570if (state.zFail == from)1571state.zFail = to;1572if (state.zPass == from)1573state.zPass = to;1574};15751576// Decrement always zeros, so let's rewrite those to be safe (even if it's not 1.)1577rewriteOps(GE_STENCILOP_DECR, GE_STENCILOP_ZERO);15781579if (state.testFunc == GE_COMP_NOTEQUAL && state.testRef == 0 && state.testMask != 0) {1580// If it's != 0 (as optimized above), then we can rewrite INVERT to ZERO.1581// With 1 bit of stencil, INVERT != 0 can only make it 0.1582rewriteOps(GE_STENCILOP_INVERT, GE_STENCILOP_ZERO);1583}1584if (state.testFunc == GE_COMP_EQUAL && state.testRef == 0 && state.testMask != 0) {1585// If it's == 0 (as optimized above), then we can rewrite INCR to INVERT.1586// Otherwise we get 1, which we mostly handle, but won't INVERT correctly.1587rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_INVERT);1588}1589if (!usesRef && state.testRef == 0xFF) {1590// Safe to use REPLACE instead of INCR.1591rewriteOps(GE_STENCILOP_INCR, GE_STENCILOP_REPLACE);1592}1593}15941595static void ConvertStencilMask5551(GenericStencilFuncState &state) {1596state.writeMask = state.writeMask >= 0x80 ? 0xff : 0x00;1597}15981599void ConvertStencilFuncState(GenericStencilFuncState &state) {1600// The PSP's mask is reversed (bits not to write.) Ignore enabled, used for clears too.1601state.writeMask = (~gstate.getStencilWriteMask()) & 0xFF;1602state.enabled = gstate.isStencilTestEnabled();1603if (!state.enabled) {1604if (gstate_c.framebufFormat == GE_FORMAT_5551)1605ConvertStencilMask5551(state);1606return;1607}16081609state.sFail = gstate.getStencilOpSFail();1610state.zFail = gstate.getStencilOpZFail();1611state.zPass = gstate.getStencilOpZPass();16121613state.testFunc = gstate.getStencilTestFunction();1614state.testRef = gstate.getStencilTestRef();1615state.testMask = gstate.getStencilTestMask();16161617bool depthTest = gstate.isDepthTestEnabled();1618if ((state.sFail == state.zFail || !depthTest) && state.sFail == state.zPass) {1619// Common case: we're writing only to stencil (usually REPLACE/REPLACE/REPLACE.)1620// We want to write stencil to alpha in this case, so switch to ALWAYS if already masked.1621bool depthWrite = gstate.isDepthWriteEnabled();1622if ((gstate.getColorMask() & 0x00FFFFFF) == 0x00FFFFFF && (!depthTest || !depthWrite)) {1623state.testFunc = GE_COMP_ALWAYS;1624}1625}16261627switch (gstate_c.framebufFormat) {1628case GE_FORMAT_565:1629state.writeMask = 0;1630break;16311632case GE_FORMAT_5551:1633ConvertStencilMask5551(state);1634ConvertStencilFunc5551(state);1635break;16361637default:1638// Hard to do anything useful for 4444, and 8888 is fine.1639break;1640}1641}16421643void GenericMaskState::Log() {1644WARN_LOG(Log::G3D, "Mask: %08x %01X readfb=%d", uniformMask, channelMask, applyFramebufferRead);1645}16461647void GenericBlendState::Log() {1648WARN_LOG(Log::G3D, "Blend: hwenable=%d readfb=%d replblend=%d replalpha=%d",1649blendEnabled, applyFramebufferRead, replaceBlend, (int)replaceAlphaWithStencil);1650}16511652void ComputedPipelineState::Convert(bool shaderBitOpsSuppported) {1653// Passing on the previous applyFramebufferRead as forceFrameBuffer read in the next one,1654// thus propagating forward.1655ConvertMaskState(maskState, shaderBitOpsSuppported);1656ConvertLogicOpState(logicState, gstate_c.Use(GPU_USE_LOGIC_OP), shaderBitOpsSuppported, maskState.applyFramebufferRead);1657ConvertBlendState(blendState, logicState.applyFramebufferRead);16581659// Note: If the blend state decided it had to use framebuffer reads,1660// we need to make sure that both mask and logic also use it, otherwise things will go wrong.1661if (blendState.applyFramebufferRead || logicState.applyFramebufferRead) {1662maskState.ConvertToShaderBlend();1663logicState.ConvertToShaderBlend();1664} else {1665// If it isn't a read, we may need to change blending to apply the logic op.1666logicState.ApplyToBlendState(blendState);1667}1668}16691670void GenericLogicState::ApplyToBlendState(GenericBlendState &blendState) {1671if (SimulateLogicOpIfNeeded(blendState.srcColor, blendState.dstColor, blendState.eqColor)) {1672if (!blendState.blendEnabled) {1673// If it wasn't turned on, make sure it is now.1674blendState.blendEnabled = true;1675blendState.srcAlpha = BlendFactor::ONE;1676blendState.dstAlpha = BlendFactor::ZERO;1677blendState.eqAlpha = BlendEq::ADD;1678}1679logicOpEnabled = false;1680logicOp = GE_LOGIC_COPY;1681}1682}168316841685