CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Common/GPUStateUtils.h
Views: 1401
#pragma once12#include <cstdint>3#include "Common/CommonTypes.h"45#include "GPU/ge_constants.h"6#include "GPU/GPUState.h"78// TODO: Replace enums and structs with same from thin3d.h, for convenient mapping.910enum StencilValueType {11STENCIL_VALUE_UNIFORM,12STENCIL_VALUE_ZERO,13STENCIL_VALUE_ONE,14STENCIL_VALUE_KEEP,15STENCIL_VALUE_INVERT,16STENCIL_VALUE_INCR_4,17STENCIL_VALUE_INCR_8,18STENCIL_VALUE_DECR_4,19STENCIL_VALUE_DECR_8,20};2122enum ReplaceAlphaType {23REPLACE_ALPHA_NO = 0,24REPLACE_ALPHA_YES = 1,25REPLACE_ALPHA_DUALSOURCE = 2,26};2728enum ReplaceBlendType {29REPLACE_BLEND_NO, // Blend function handled directly with blend states.3031REPLACE_BLEND_STANDARD,3233// SRC part of blend function handled in-shader.34REPLACE_BLEND_PRE_SRC,35REPLACE_BLEND_PRE_SRC_2X_ALPHA,36REPLACE_BLEND_2X_ALPHA,37REPLACE_BLEND_2X_SRC,3839// Full blend equation runs in shader.40// We might have to make a copy of the framebuffer target to read from.41REPLACE_BLEND_READ_FRAMEBUFFER,4243// Color blend mode and color gets copied to alpha blend mode.44REPLACE_BLEND_BLUE_TO_ALPHA,45};4647enum SimulateLogicOpType {48LOGICOPTYPE_NORMAL,49LOGICOPTYPE_ONE,50LOGICOPTYPE_INVERT,51};5253bool IsAlphaTestTriviallyTrue();54bool IsColorTestAgainstZero();55bool IsColorTestTriviallyTrue();56bool IsAlphaTestAgainstZero();57bool NeedsTestDiscard();58bool IsDepthTestEffectivelyDisabled();59bool IsStencilTestOutputDisabled();6061StencilValueType ReplaceAlphaWithStencilType();62ReplaceAlphaType ReplaceAlphaWithStencil(ReplaceBlendType replaceBlend);63ReplaceBlendType ReplaceBlendWithShader(GEBufferFormat bufferFormat);6465// This is for the fallback path if real logic ops are not available.66SimulateLogicOpType SimulateLogicOpShaderTypeIfNeeded();6768// Common representation, should be able to set this directly with any modern API.69struct ViewportAndScissor {70int scissorX;71int scissorY;72int scissorW;73int scissorH;74float viewportX;75float viewportY;76float viewportW;77float viewportH;78float depthRangeMin;79float depthRangeMax;80float widthScale;81float heightScale;82float depthScale;83float xOffset;84float yOffset;85float zOffset;86bool throughMode;87};8889void ConvertViewportAndScissor(bool useBufferedRendering, float renderWidth, float renderHeight, int bufferWidth, int bufferHeight, ViewportAndScissor &out);90void UpdateCachedViewportState(const ViewportAndScissor &vpAndScissor);9192// NOTE: See the .cpp file for detailed comment about how the use flags are interpreted.93class DepthScaleFactors {94public:95// This should only be used from GetDepthScaleFactors.96DepthScaleFactors(double offset, double scale) : offset_(offset), scale_(scale) {}9798// Decodes a value from a depth buffer to a value of range 0..6553699float DecodeToU16(float z) const {100return (float)((z - offset_) * scale_);101}102103// Encodes a value from the range 0..65536 to a normalized depth value (0-1), in the104// range that we write to the depth buffer.105float EncodeFromU16(float z_u16) const {106return (float)(((double)z_u16 / scale_) + offset_);107}108109float Offset() const { return (float)offset_; }110111float ScaleU16() const { return (float)scale_; }112float Scale() const { return (float)(scale_ / 65535.0); }113114private:115// Doubles hardly cost anything these days, and precision matters here.116double offset_;117double scale_;118};119120DepthScaleFactors GetDepthScaleFactors(u32 useFlags);121122// These are common to all modern APIs and can be easily converted with a lookup table.123enum class BlendFactor : uint8_t {124ZERO,125ONE,126SRC_COLOR,127ONE_MINUS_SRC_COLOR,128DST_COLOR,129ONE_MINUS_DST_COLOR,130SRC_ALPHA,131ONE_MINUS_SRC_ALPHA,132DST_ALPHA,133ONE_MINUS_DST_ALPHA,134CONSTANT_COLOR,135ONE_MINUS_CONSTANT_COLOR,136CONSTANT_ALPHA,137ONE_MINUS_CONSTANT_ALPHA,138SRC1_COLOR,139ONE_MINUS_SRC1_COLOR,140SRC1_ALPHA,141ONE_MINUS_SRC1_ALPHA,142INVALID,143COUNT,144};145146enum class BlendEq : uint8_t {147ADD,148SUBTRACT,149REVERSE_SUBTRACT,150MIN,151MAX,152COUNT153};154155// Computed blend setup, including shader stuff.156struct GenericBlendState {157bool applyFramebufferRead;158bool dirtyShaderBlendFixValues;159160// Shader generation state161ReplaceAlphaType replaceAlphaWithStencil;162ReplaceBlendType replaceBlend;163SimulateLogicOpType simulateLogicOpType;164165// Resulting hardware blend state166bool blendEnabled;167168BlendFactor srcColor;169BlendFactor dstColor;170BlendFactor srcAlpha;171BlendFactor dstAlpha;172173BlendEq eqColor;174BlendEq eqAlpha;175176bool useBlendColor;177u32 blendColor;178179void setFactors(BlendFactor srcC, BlendFactor dstC, BlendFactor srcA, BlendFactor dstA) {180srcColor = srcC;181dstColor = dstC;182srcAlpha = srcA;183dstAlpha = dstA;184}185void setEquation(BlendEq eqC, BlendEq eqA) {186eqColor = eqC;187eqAlpha = eqA;188}189void setBlendColor(uint32_t color, uint8_t alpha) {190blendColor = color | ((uint32_t)alpha << 24);191useBlendColor = true;192}193void defaultBlendColor(uint8_t alpha) {194blendColor = 0xFFFFFF | ((uint32_t)alpha << 24);195useBlendColor = true;196}197198void Log();199};200201void ApplyStencilReplaceAndLogicOpIgnoreBlend(ReplaceAlphaType replaceAlphaWithStencil, GenericBlendState &blendState);202203struct GenericMaskState {204bool applyFramebufferRead;205uint32_t uniformMask; // For each bit, opposite to the PSP.206207// The hardware channel masks, 1 bit per color component. From bit 0, order is RGBA like in all APIs!208uint8_t channelMask;209210void ConvertToShaderBlend() {211// If we have to do it in the shader, we simply pass through all channels but mask only in the shader instead.212// Some GPUs have minor penalties for masks that are not all-channels-on or all-channels-off.213channelMask = 0xF;214applyFramebufferRead = true;215}216217void Log();218};219220struct GenericStencilFuncState {221bool enabled;222GEComparison testFunc;223u8 testRef;224u8 testMask;225u8 writeMask;226GEStencilOp sFail;227GEStencilOp zFail;228GEStencilOp zPass;229};230void ConvertStencilFuncState(GenericStencilFuncState &stencilFuncState);231232struct GenericLogicState {233// If set, logic op is applied in the shader INSTEAD of in hardware.234// In this case, simulateLogicOpType and all that should be off.235bool applyFramebufferRead;236237// Hardware238bool logicOpEnabled;239240// Hardware and shader generation241GELogicOp logicOp;242243void ApplyToBlendState(GenericBlendState &blendState);244void ConvertToShaderBlend() {245if (logicOp != GE_LOGIC_COPY) {246logicOpEnabled = false;247applyFramebufferRead = true;248// Same logicOp is kept.249}250}251};252253struct ComputedPipelineState {254GenericBlendState blendState;255GenericMaskState maskState;256GenericLogicState logicState;257258void Convert(bool shaderBitOpsSupported);259260bool FramebufferRead() const {261// If blending is off, its applyFramebufferRead can be false even after state propagation.262// So it's not enough to check just that one.263return blendState.applyFramebufferRead || maskState.applyFramebufferRead || logicState.applyFramebufferRead;264}265};266267// See issue #15898268inline bool SpongebobDepthInverseConditions(const GenericStencilFuncState &stencilState) {269// Check that the depth/stencil state matches the conditions exactly.270// Always with a depth test that's not writing to the depth buffer (only stencil.)271if (!gstate.isDepthTestEnabled() || gstate.isDepthWriteEnabled())272return false;273// Always GREATER_EQUAL, which we flip to LESS.274if (gstate.getDepthTestFunction() != GE_COMP_GEQUAL)275return false;276277// The whole purpose here is a depth fail that we need to write to alpha.278if (stencilState.zFail != GE_STENCILOP_ZERO || stencilState.sFail != GE_STENCILOP_KEEP || stencilState.zPass != GE_STENCILOP_KEEP)279return false;280if (stencilState.testFunc != GE_COMP_ALWAYS || stencilState.writeMask != 0xFF)281return false;282283// Lastly, verify no color is written. Natural way is a mask, in case another game uses it.284// Note that the PSP masks are reversed compared to typical APIs.285if (gstate.getColorMask() == 0xFFFFFF00)286return true;287288// These games specifically use simple alpha blending with a constant zero alpha.289if (!gstate.isAlphaBlendEnabled() || gstate.getBlendFuncA() != GE_SRCBLEND_SRCALPHA || gstate.getBlendFuncB() != GE_DSTBLEND_INVSRCALPHA)290return false;291292// Also make sure there's no texture, in case its alpha gets involved.293if (gstate.isTextureMapEnabled())294return false;295296// Spongebob uses material alpha.297if (gstate.getMaterialAmbientA() == 0x00 && gstate.getMaterialUpdate() == 0)298return true;299// MX vs ATV : Reflex uses vertex colors, should really check them...300if (gstate.getMaterialUpdate() == 1)301return true;302303// Okay, color is most likely being used if we didn't hit the above.304return false;305}306307308