CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/GPUState.h
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#pragma once1819#include "ppsspp_config.h"2021#include "Common/CommonTypes.h"22#include "Common/Swap.h"23#include "GPU/GPU.h"24#include "GPU/ge_constants.h"25#include "GPU/Common/ShaderCommon.h"2627#if defined(_M_SSE)28#include <emmintrin.h>29#endif30#if PPSSPP_ARCH(ARM_NEON)31#if defined(_MSC_VER) && PPSSPP_ARCH(ARM64)32#include <arm64_neon.h>33#else34#include <arm_neon.h>35#endif36#endif3738class PointerWrap;3940struct GPUgstate {41// Getting rid of this ugly union in favor of the accessor functions42// might be a good idea....43union {44u32 cmdmem[256];45struct {46u32 nop,47vaddr,48iaddr,49pad00,50prim,51bezier,52spline,53boundBox,54jump,55bjump,56call,57ret,58end,59pad01,60signal,61finish,62base,63pad02,64vertType,65offsetAddr,66origin,67region1,68region2,69lightingEnable,70lightEnable[4],71depthClampEnable,72cullfaceEnable,73textureMapEnable, // 0x1E GE_CMD_TEXTUREMAPENABLE74fogEnable,75ditherEnable,76alphaBlendEnable,77alphaTestEnable,78zTestEnable,79stencilTestEnable,80antiAliasEnable,81patchCullEnable,82colorTestEnable,83logicOpEnable,84pad03,85boneMatrixNumber,86boneMatrixData,87morphwgt[8], //dont use88pad04[2],89patchdivision,90patchprimitive,91patchfacing,92pad04_a,9394worldmtxnum, // 0x3A95worldmtxdata, // 0x3B96viewmtxnum, // 0x3C97viewmtxdata, // 0x3D98projmtxnum, // 0x3E99projmtxdata, // 0x3F100texmtxnum, // 0x40101texmtxdata, // 0x41102103viewportxscale, // 0x42104viewportyscale, // 0x43105viewportzscale, // 0x44106viewportxcenter, // 0x45107viewportycenter, // 0x46108viewportzcenter, // 0x47109texscaleu, // 0x48110texscalev, // 0x49111texoffsetu, // 0x4A112texoffsetv, // 0x4B113offsetx, // 0x4C114offsety, // 0x4D115pad111[2],116shademodel, // 0x50117reversenormals, // 0x51118pad222,119materialupdate, // 0x53120materialemissive, // 0x54121materialambient, // 0x55122materialdiffuse, // 0x56123materialspecular, // 0x57124materialalpha, // 0x58125pad333[2],126materialspecularcoef, // 0x5B127ambientcolor, // 0x5C128ambientalpha, // 0x5D129lmode, // 0x5E GE_CMD_LIGHTMODE130ltype[4], // 0x5F-0x62 GE_CMD_LIGHTTYPEx131lpos[12], // 0x63-0x6E132ldir[12], // 0x6F-0x7A133latt[12], // 0x7B-0x86134lconv[4], // 0x87-0x8A135lcutoff[4], // 0x8B-0x8E136lcolor[12], // 0x8F-0x9A137cullmode, // 0x9B138fbptr, // 0x9C139fbwidth, // 0x9D140zbptr, // 0x9E141zbwidth, // 0x9F142texaddr[8], // 0xA0-0xA7143texbufwidth[8], // 0xA8-0xAF144clutaddr, // 0xB0145clutaddrupper, // 0xB1146transfersrc, // 0xB2147transfersrcw, // 0xB3148transferdst, // 0xB4149transferdstw, // 0xB5150padxxx[2],151texsize[8], // 0xB8-BF152texmapmode, // 0xC0153texshade, // 0xC1154texmode, // 0xC2 GE_CMD_TEXMODE155texformat, // 0xC3156loadclut, // 0xC4157clutformat, // 0xC5158texfilter, // 0xC6159texwrap, // 0xC7160texlevel, // 0xC8161texfunc, // 0xC9162texenvcolor, // 0xCA163texflush, // 0xCB164texsync, // 0xCC165fog1, // 0xCD166fog2, // 0xCE167fogcolor, // 0xCF168texlodslope, // 0xD0169padxxxxxx, // 0xD1170framebufpixformat, // 0xD2171clearmode, // 0xD3 GE_CMD_CLEARMODE172scissor1,173scissor2,174minz,175maxz,176colortest,177colorref,178colortestmask,179alphatest,180stenciltest,181stencilop,182ztestfunc,183blend,184blendfixa,185blendfixb,186dithmtx[4],187lop, // 0xE6188zmsk,189pmskc,190pmska,191transferstart,192transfersrcpos,193transferdstpos,194pad99,195transfersize, // 0xEE196pad100, // 0xEF197imm_vscx, // 0xF0198imm_vscy,199imm_vscz,200imm_vtcs,201imm_vtct,202imm_vtcq,203imm_cv,204imm_ap,205imm_fc,206imm_scv; // 0xF9207// In the unlikely case we ever add anything else here, don't forget to update the padding on the next line!208u32 pad05[0xFF- 0xF9];209};210};211212// These are not directly mapped, instead these are loaded one-by-one through special commands.213// However, these are actual state, and can be read back.214float worldMatrix[12]; // 4x3215float viewMatrix[12]; // 4x3216float projMatrix[16]; // 4x4217float tgenMatrix[12]; // 4x3218float boneMatrix[12 * 8]; // Eight 4x3 bone matrices.219220// We ignore the high bits of the framebuffer in fbwidth - even 0x08000000 renders to vRAM.221// The top bits of mirroring are also not respected, so we mask them away.222u32 getFrameBufRawAddress() const { return fbptr & 0x1FFFF0; }223// 0x44000000 is uncached VRAM.224u32 getFrameBufAddress() const { return 0x44000000 | getFrameBufRawAddress(); }225GEBufferFormat FrameBufFormat() const { return static_cast<GEBufferFormat>(framebufpixformat & 3); }226int FrameBufStride() const { return fbwidth&0x7FC; }227u32 getDepthBufRawAddress() const { return zbptr & 0x1FFFF0; }228u32 getDepthBufAddress() const { return 0x44600000 | getDepthBufRawAddress(); }229int DepthBufStride() const { return zbwidth&0x7FC; }230231// Pixel Pipeline232bool isModeClear() const { return clearmode & 1; }233bool isFogEnabled() const { return fogEnable & 1; }234float getFogCoef1() const { return getFloat24(fog1); }235float getFogCoef2() const { return getFloat24(fog2); }236237// Cull238bool isCullEnabled() const { return cullfaceEnable & 1; }239int getCullMode() const { return cullmode & 1; }240241// Color Mask242bool isClearModeColorMask() const { return (clearmode&0x100) != 0; }243bool isClearModeAlphaMask() const { return (clearmode&0x200) != 0; }244bool isClearModeDepthMask() const { return (clearmode&0x400) != 0; }245u32 getClearModeColorMask() const { return ((clearmode&0x100) ? 0 : 0xFFFFFF) | ((clearmode&0x200) ? 0 : 0xFF000000); }246247// Blend248GEBlendSrcFactor getBlendFuncA() const { return (GEBlendSrcFactor)(blend & 0xF); }249GEBlendDstFactor getBlendFuncB() const { return (GEBlendDstFactor)((blend >> 4) & 0xF); }250u32 getFixA() const { return blendfixa & 0xFFFFFF; }251u32 getFixB() const { return blendfixb & 0xFFFFFF; }252GEBlendMode getBlendEq() const { return static_cast<GEBlendMode>((blend >> 8) & 0x7); }253bool isAlphaBlendEnabled() const { return alphaBlendEnable & 1; }254255// AntiAlias256bool isAntiAliasEnabled() const { return antiAliasEnable & 1; }257258// Dither259bool isDitherEnabled() const { return ditherEnable & 1; }260int getDitherValue(int x, int y) const {261u8 raw = (dithmtx[y & 3] >> ((x & 3) * 4)) & 0xF;262// Apply sign extension to make 8-F negative, 0-7 positive.263return ((s8)(raw << 4)) >> 4;264}265266// Color Mask267u32 getColorMask() const { return (pmskc & 0xFFFFFF) | ((pmska & 0xFF) << 24); }268u8 getStencilWriteMask() const { return pmska & 0xFF; }269bool isLogicOpEnabled() const { return logicOpEnable & 1; }270GELogicOp getLogicOp() const { return static_cast<GELogicOp>(lop & 0xF); }271272// Depth Test273bool isDepthTestEnabled() const { return zTestEnable & 1; }274bool isDepthWriteEnabled() const { return !(zmsk & 1); }275GEComparison getDepthTestFunction() const { return static_cast<GEComparison>(ztestfunc & 0x7); }276u16 getDepthRangeMin() const { return minz & 0xFFFF; }277u16 getDepthRangeMax() const { return maxz & 0xFFFF; }278279// Stencil Test280bool isStencilTestEnabled() const { return stencilTestEnable & 1; }281GEComparison getStencilTestFunction() const { return static_cast<GEComparison>(stenciltest & 0x7); }282int getStencilTestRef() const { return (stenciltest>>8) & 0xFF; }283int getStencilTestMask() const { return (stenciltest>>16) & 0xFF; }284GEStencilOp getStencilOpSFail() const { return static_cast<GEStencilOp>(stencilop & 0x7); }285GEStencilOp getStencilOpZFail() const { return static_cast<GEStencilOp>((stencilop>>8) & 0x7); }286GEStencilOp getStencilOpZPass() const { return static_cast<GEStencilOp>((stencilop>>16) & 0x7); }287288// Alpha Test289bool isAlphaTestEnabled() const { return alphaTestEnable & 1; }290GEComparison getAlphaTestFunction() const { return static_cast<GEComparison>(alphatest & 0x7); }291int getAlphaTestRef() const { return (alphatest >> 8) & 0xFF; }292int getAlphaTestMask() const { return (alphatest >> 16) & 0xFF; }293294// Color Test295bool isColorTestEnabled() const { return colorTestEnable & 1; }296GEComparison getColorTestFunction() const { return static_cast<GEComparison>(colortest & 0x3); }297u32 getColorTestRef() const { return colorref & 0xFFFFFF; }298u32 getColorTestMask() const { return colortestmask & 0xFFFFFF; }299300// Texturing301// TODO: Verify getTextureAddress() alignment?302u32 getTextureAddress(int level) const { return (texaddr[level] & 0xFFFFF0) | ((texbufwidth[level] << 8) & 0x0F000000); }303int getTextureWidth(int level) const { return 1 << (texsize[level] & 0xf);}304int getTextureHeight(int level) const { return 1 << ((texsize[level] >> 8) & 0xf);}305u16 getTextureDimension(int level) const { return texsize[level] & 0xf0f;}306GETexLevelMode getTexLevelMode() const { return static_cast<GETexLevelMode>(texlevel & 0x3); }307int getTexLevelOffset16() const { return (int)(s8)((texlevel >> 16) & 0xFF); }308bool isTextureMapEnabled() const { return textureMapEnable & 1; }309GETexFunc getTextureFunction() const { return static_cast<GETexFunc>(texfunc & 0x7); }310bool isColorDoublingEnabled() const { return (texfunc & 0x10000) != 0; }311bool isTextureAlphaUsed() const { return (texfunc & 0x100) != 0; }312GETextureFormat getTextureFormat() const { return static_cast<GETextureFormat>(texformat & 0xF); }313bool isTextureFormatIndexed() const { return (texformat & 4) != 0; } // GE_TFMT_CLUT4 - GE_TFMT_CLUT32 are 0b1xx.314int getTextureEnvColRGB() const { return texenvcolor & 0x00FFFFFF; }315u32 getClutAddress() const { return (clutaddr & 0x00FFFFF0) | ((clutaddrupper << 8) & 0x0F000000); }316int getClutLoadBytes() const { return getClutLoadBlocks() * 32; }317int getClutLoadBlocks() const {318// The PSP only supports 0x3F, but Misshitsu no Sacrifice has extra color data (see #15727.)319// 0x40 would be 0, which would be a no-op, so we allow it.320if ((loadclut & 0x7F) == 0x40)321return 0x40;322return loadclut & 0x3F;323}324GEPaletteFormat getClutPaletteFormat() const { return static_cast<GEPaletteFormat>(clutformat & 3); }325int getClutIndexShift() const { return (clutformat >> 2) & 0x1F; }326int getClutIndexMask() const { return (clutformat >> 8) & 0xFF; }327int getClutIndexStartPos() const { return ((clutformat >> 16) & 0x1F) << 4; }328u32 transformClutIndex(u32 index) const {329// We need to wrap any entries beyond the first 1024 bytes.330u32 mask = getClutPaletteFormat() == GE_CMODE_32BIT_ABGR8888 ? 0xFF : 0x1FF;331return ((index >> getClutIndexShift()) & getClutIndexMask()) | (getClutIndexStartPos() & mask);332}333bool isClutIndexSimple() const { return (clutformat & ~3) == 0xC500FF00; } // Meaning, no special mask, shift, or start pos.334bool isTextureSwizzled() const { return texmode & 1; }335bool isClutSharedForMipmaps() const { return (texmode & 0x100) == 0; }336bool isMipmapEnabled() const { return (texfilter & 4) != 0; }337bool isMipmapFilteringEnabled() const { return (texfilter & 2) != 0; }338bool isMinifyFilteringEnabled() const { return (texfilter & 1) != 0; }339bool isMagnifyFilteringEnabled() const { return (texfilter >> 8) & 1; }340int getTextureMaxLevel() const { return (texmode >> 16) & 0x7; }341float getTextureLodSlope() const { return getFloat24(texlodslope); }342343// Lighting344bool isLightingEnabled() const { return lightingEnable & 1; }345bool isLightChanEnabled(int chan) const { return lightEnable[chan] & 1; }346GELightComputation getLightComputation(int chan) const { return static_cast<GELightComputation>(ltype[chan] & 0x3); }347bool isUsingPoweredDiffuseLight(int chan) const { return getLightComputation(chan) == GE_LIGHTCOMP_ONLYPOWDIFFUSE; }348bool isUsingSpecularLight(int chan) const { return getLightComputation(chan) == GE_LIGHTCOMP_BOTH; }349bool isUsingSecondaryColor() const { return lmode & 1; }350GELightType getLightType(int chan) const { return static_cast<GELightType>((ltype[chan] >> 8) & 3); }351bool isDirectionalLight(int chan) const { return getLightType(chan) == GE_LIGHTTYPE_DIRECTIONAL; }352bool isPointLight(int chan) const { return getLightType(chan) == GE_LIGHTTYPE_POINT; }353bool isSpotLight(int chan) const { return getLightType(chan) >= GE_LIGHTTYPE_SPOT; }354GEShadeMode getShadeMode() const { return static_cast<GEShadeMode>(shademodel & 1); }355unsigned int getAmbientR() const { return ambientcolor&0xFF; }356unsigned int getAmbientG() const { return (ambientcolor>>8)&0xFF; }357unsigned int getAmbientB() const { return (ambientcolor>>16)&0xFF; }358unsigned int getAmbientA() const { return ambientalpha&0xFF; }359unsigned int getAmbientRGBA() const { return (ambientcolor&0xFFFFFF) | ((ambientalpha&0xFF)<<24); }360unsigned int getMaterialUpdate() const { return materialupdate & 7; }361unsigned int getMaterialAmbientR() const { return materialambient&0xFF; }362unsigned int getMaterialAmbientG() const { return (materialambient>>8)&0xFF; }363unsigned int getMaterialAmbientB() const { return (materialambient>>16)&0xFF; }364unsigned int getMaterialAmbientA() const { return materialalpha&0xFF; }365unsigned int getMaterialAmbientRGBA() const { return (materialambient & 0x00FFFFFF) | (materialalpha << 24); }366unsigned int getMaterialDiffuseR() const { return materialdiffuse&0xFF; }367unsigned int getMaterialDiffuseG() const { return (materialdiffuse>>8)&0xFF; }368unsigned int getMaterialDiffuseB() const { return (materialdiffuse>>16)&0xFF; }369unsigned int getMaterialDiffuse() const { return materialdiffuse & 0xffffff; }370unsigned int getMaterialEmissiveR() const { return materialemissive&0xFF; }371unsigned int getMaterialEmissiveG() const { return (materialemissive>>8)&0xFF; }372unsigned int getMaterialEmissiveB() const { return (materialemissive>>16)&0xFF; }373unsigned int getMaterialEmissive() const { return materialemissive & 0xffffff; }374unsigned int getMaterialSpecularR() const { return materialspecular&0xFF; }375unsigned int getMaterialSpecularG() const { return (materialspecular>>8)&0xFF; }376unsigned int getMaterialSpecularB() const { return (materialspecular>>16)&0xFF; }377unsigned int getMaterialSpecular() const { return materialspecular & 0xffffff; }378float getMaterialSpecularCoef() const { return getFloat24(materialspecularcoef); }379unsigned int getLightAmbientColorR(int chan) const { return lcolor[chan*3]&0xFF; }380unsigned int getLightAmbientColorG(int chan) const { return (lcolor[chan*3]>>8)&0xFF; }381unsigned int getLightAmbientColorB(int chan) const { return (lcolor[chan*3]>>16)&0xFF; }382unsigned int getLightAmbientColor(int chan) const { return lcolor[chan*3]&0xFFFFFF; }383unsigned int getDiffuseColorR(int chan) const { return lcolor[1+chan*3]&0xFF; }384unsigned int getDiffuseColorG(int chan) const { return (lcolor[1+chan*3]>>8)&0xFF; }385unsigned int getDiffuseColorB(int chan) const { return (lcolor[1+chan*3]>>16)&0xFF; }386unsigned int getDiffuseColor(int chan) const { return lcolor[1+chan*3]&0xFFFFFF; }387unsigned int getSpecularColorR(int chan) const { return lcolor[2+chan*3]&0xFF; }388unsigned int getSpecularColorG(int chan) const { return (lcolor[2+chan*3]>>8)&0xFF; }389unsigned int getSpecularColorB(int chan) const { return (lcolor[2+chan*3]>>16)&0xFF; }390unsigned int getSpecularColor(int chan) const { return lcolor[2+chan*3]&0xFFFFFF; }391392int getPatchDivisionU() const { return patchdivision & 0x7F; }393int getPatchDivisionV() const { return (patchdivision >> 8) & 0x7F; }394395// UV gen396GETexMapMode getUVGenMode() const { return static_cast<GETexMapMode>(texmapmode & 3);} // 2 bits397GETexProjMapMode getUVProjMode() const { return static_cast<GETexProjMapMode>((texmapmode >> 8) & 3);} // 2 bits398int getUVLS0() const { return texshade & 0x3; } // 2 bits399int getUVLS1() const { return (texshade >> 8) & 0x3; } // 2 bits400401bool isTexCoordClampedS() const { return texwrap & 1; }402bool isTexCoordClampedT() const { return (texwrap >> 8) & 1; }403404int getScissorX1() const { return scissor1 & 0x3FF; }405int getScissorY1() const { return (scissor1 >> 10) & 0x3FF; }406int getScissorX2() const { return scissor2 & 0x3FF; }407int getScissorY2() const { return (scissor2 >> 10) & 0x3FF; }408int getRegionRateX() const { return 0x100 + (region1 & 0x3FF); }409int getRegionRateY() const { return 0x100 + ((region1 >> 10) & 0x3FF); }410int getRegionX2() const { return (region2 & 0x3FF); }411int getRegionY2() const { return (region2 >> 10) & 0x3FF; }412413bool isDepthClampEnabled() const { return depthClampEnable & 1; }414415// Note that the X1/Y1/Z1 here does not mean the upper-left corner, but half the dimensions. X2/Y2/Z2 are the center.416float getViewportXScale() const { return getFloat24(viewportxscale); }417float getViewportYScale() const { return getFloat24(viewportyscale); }418float getViewportZScale() const { return getFloat24(viewportzscale); }419float getViewportXCenter() const { return getFloat24(viewportxcenter); }420float getViewportYCenter() const { return getFloat24(viewportycenter); }421float getViewportZCenter() const { return getFloat24(viewportzcenter); }422423// Fixed 12.4 point.424int getOffsetX16() const { return offsetx & 0xFFFF; }425int getOffsetY16() const { return offsety & 0xFFFF; }426float getOffsetX() const { return (float)getOffsetX16() / 16.0f; }427float getOffsetY() const { return (float)getOffsetY16() / 16.0f; }428429// Vertex type430bool isModeThrough() const { return (vertType & GE_VTYPE_THROUGH) != 0; }431bool areNormalsReversed() const { return reversenormals & 1; }432bool isSkinningEnabled() const { return ((vertType & GE_VTYPE_WEIGHT_MASK) != GE_VTYPE_WEIGHT_NONE); }433int getNumMorphWeights() const { return ((vertType & GE_VTYPE_MORPHCOUNT_MASK) >> GE_VTYPE_MORPHCOUNT_SHIFT) + 1; }434435GEPatchPrimType getPatchPrimitiveType() const { return static_cast<GEPatchPrimType>(patchprimitive & 3); }436bool isPatchNormalsReversed() const { return patchfacing & 1; }437438// Transfers439u32 getTransferSrcAddress() const { return (transfersrc & 0xFFFFF0) | ((transfersrcw & 0xFF0000) << 8); }440// Bits 0xf800 are ignored, > 0x400 is treated as 0.441u32 getTransferSrcStride() const { int stride = transfersrcw & 0x7F8; return stride > 0x400 ? 0 : stride; }442int getTransferSrcX() const { return (transfersrcpos >> 0) & 0x3FF; }443int getTransferSrcY() const { return (transfersrcpos >> 10) & 0x3FF; }444u32 getTransferDstAddress() const { return (transferdst & 0xFFFFF0) | ((transferdstw & 0xFF0000) << 8); }445// Bits 0xf800 are ignored, > 0x400 is treated as 0.446u32 getTransferDstStride() const { int stride = transferdstw & 0x7F8; return stride > 0x400 ? 0 : stride; }447int getTransferDstX() const { return (transferdstpos >> 0) & 0x3FF; }448int getTransferDstY() const { return (transferdstpos >> 10) & 0x3FF; }449int getTransferWidth() const { return ((transfersize >> 0) & 0x3FF) + 1; }450int getTransferHeight() const { return ((transfersize >> 10) & 0x3FF) + 1; }451int getTransferBpp() const { return (transferstart & 1) ? 4 : 2; }452453454void FastLoadBoneMatrix(u32 addr);455456// Real data in the context ends here457458static void Reset();459void Save(u32_le *ptr);460void Restore(const u32_le *ptr);461};462463bool vertTypeIsSkinningEnabled(u32 vertType);464465inline int vertTypeGetNumBoneWeights(u32 vertType) { return 1 + ((vertType & GE_VTYPE_WEIGHTCOUNT_MASK) >> GE_VTYPE_WEIGHTCOUNT_SHIFT); }466inline int vertTypeGetWeightMask(u32 vertType) { return vertType & GE_VTYPE_WEIGHT_MASK; }467468// The rest is cached simplified/converted data for fast access.469// Does not need to be saved when saving/restoring context.470//471// Lots of this, however, is actual emulator state which must be saved when savestating.472// vertexAddr, indexAddr, offsetAddr for example.473474struct UVScale {475float uScale, vScale;476float uOff, vOff;477};478479#define FLAG_BIT(x) (1 << x)480481// These flags are mainly to make sure that we make decisions on code path in a single482// location. Sometimes we need to take things into account in multiple places, it helps483// to centralize into flags like this. They're also fast to check since the cache line484// will be hot.485// NOTE: Do not forget to update the string array at the end of GPUState.cpp!486enum {487GPU_USE_DUALSOURCE_BLEND = FLAG_BIT(0),488GPU_USE_LIGHT_UBERSHADER = FLAG_BIT(1),489GPU_USE_FRAGMENT_TEST_CACHE = FLAG_BIT(2),490GPU_USE_VS_RANGE_CULLING = FLAG_BIT(3),491GPU_USE_BLEND_MINMAX = FLAG_BIT(4),492GPU_USE_LOGIC_OP = FLAG_BIT(5),493GPU_USE_FRAGMENT_UBERSHADER = FLAG_BIT(6),494GPU_USE_TEXTURE_NPOT = FLAG_BIT(7),495GPU_USE_ANISOTROPY = FLAG_BIT(8),496GPU_USE_CLEAR_RAM_HACK = FLAG_BIT(9),497GPU_USE_INSTANCE_RENDERING = FLAG_BIT(10),498GPU_USE_VERTEX_TEXTURE_FETCH = FLAG_BIT(11),499GPU_USE_TEXTURE_FLOAT = FLAG_BIT(12),500GPU_USE_16BIT_FORMATS = FLAG_BIT(13),501GPU_USE_DEPTH_CLAMP = FLAG_BIT(14),502GPU_USE_TEXTURE_LOD_CONTROL = FLAG_BIT(15),503GPU_USE_DEPTH_TEXTURE = FLAG_BIT(16),504GPU_USE_ACCURATE_DEPTH = FLAG_BIT(17),505GPU_USE_GS_CULLING = FLAG_BIT(18), // Geometry shader506GPU_USE_FRAMEBUFFER_ARRAYS = FLAG_BIT(19),507GPU_USE_FRAMEBUFFER_FETCH = FLAG_BIT(20),508GPU_SCALE_DEPTH_FROM_24BIT_TO_16BIT = FLAG_BIT(21),509GPU_ROUND_FRAGMENT_DEPTH_TO_16BIT = FLAG_BIT(22),510GPU_ROUND_DEPTH_TO_16BIT = FLAG_BIT(23), // Can be disabled either per game or if we use a real 16-bit depth buffer511GPU_USE_CLIP_DISTANCE = FLAG_BIT(24),512GPU_USE_CULL_DISTANCE = FLAG_BIT(25),513514// VR flags (reserved or in-use)515GPU_USE_VIRTUAL_REALITY = FLAG_BIT(29),516GPU_USE_SINGLE_PASS_STEREO = FLAG_BIT(30),517GPU_USE_SIMPLE_STEREO_PERSPECTIVE = FLAG_BIT(31),518};519520// Note that this take a flag index, not the bit value.521const char *GpuUseFlagToString(int useFlag);522523struct KnownVertexBounds {524u16 minU;525u16 minV;526u16 maxU;527u16 maxV;528};529530enum class SubmitType {531DRAW,532BEZIER,533SPLINE,534HW_BEZIER,535HW_SPLINE,536};537538extern GPUgstate gstate;539540struct GPUStateCache {541bool Use(u32 flags) const { return (useFlags_ & flags) != 0; } // Return true if ANY of flags are true.542bool UseAll(u32 flags) const { return (useFlags_ & flags) == flags; } // Return true if ALL flags are true.543544u32 UseFlags() const { return useFlags_; }545546uint64_t GetDirtyUniforms() { return dirty & DIRTY_ALL_UNIFORMS; }547void Dirty(u64 what) {548dirty |= what;549}550void CleanUniforms() {551dirty &= ~DIRTY_ALL_UNIFORMS;552}553void Clean(u64 what) {554dirty &= ~what;555}556bool IsDirty(u64 what) const {557return (dirty & what) != 0ULL;558}559void SetUseShaderDepal(ShaderDepalMode mode) {560if (mode != shaderDepalMode) {561shaderDepalMode = mode;562Dirty(DIRTY_FRAGMENTSHADER_STATE);563}564}565void SetTextureFullAlpha(bool fullAlpha) {566if (fullAlpha != textureFullAlpha) {567textureFullAlpha = fullAlpha;568Dirty(DIRTY_FRAGMENTSHADER_STATE | DIRTY_TEX_ALPHA_MUL);569}570}571void SetNeedShaderTexclamp(bool need) {572if (need != needShaderTexClamp) {573needShaderTexClamp = need;574Dirty(DIRTY_FRAGMENTSHADER_STATE);575if (need)576Dirty(DIRTY_TEXCLAMP);577}578}579void SetTextureIs3D(bool is3D) {580if (is3D != curTextureIs3D) {581curTextureIs3D = is3D;582Dirty(DIRTY_FRAGMENTSHADER_STATE | (is3D ? DIRTY_MIPBIAS : 0));583}584}585void SetTextureIsArray(bool isArrayTexture) { // VK only586if (textureIsArray != isArrayTexture) {587textureIsArray = isArrayTexture;588Dirty(DIRTY_FRAGMENTSHADER_STATE);589}590}591void SetTextureIsVideo(bool isVideo) {592textureIsVideo = isVideo;593}594void SetTextureIsBGRA(bool isBGRA) {595if (bgraTexture != isBGRA) {596bgraTexture = isBGRA;597Dirty(DIRTY_FRAGMENTSHADER_STATE);598}599}600void SetTextureIsFramebuffer(bool isFramebuffer) {601if (textureIsFramebuffer != isFramebuffer) {602textureIsFramebuffer = isFramebuffer;603Dirty(DIRTY_UVSCALEOFFSET);604} else if (isFramebuffer) {605// Always dirty if it's a framebuffer, since the uniform value depends both606// on the specified texture size and the bound texture size. Makes things easier.607// TODO: Look at this again later.608Dirty(DIRTY_UVSCALEOFFSET);609}610}611void SetUseFlags(u32 newFlags) {612if (newFlags != useFlags_) {613if (useFlags_ != 0)614useFlagsChanged = true;615useFlags_ = newFlags;616}617}618619// When checking for a single flag, use Use()/UseAll().620u32 GetUseFlags() const {621return useFlags_;622}623624void UpdateUVScaleOffset() {625#if defined(_M_SSE)626__m128i values = _mm_slli_epi32(_mm_load_si128((const __m128i *)&gstate.texscaleu), 8);627_mm_storeu_si128((__m128i *)&uv, values);628#elif PPSSPP_ARCH(ARM_NEON)629const uint32x4_t values = vshlq_n_u32(vld1q_u32((const u32 *)&gstate.texscaleu), 8);630vst1q_u32((u32 *)&uv, values);631#else632uv.uScale = getFloat24(gstate.texscaleu);633uv.vScale = getFloat24(gstate.texscalev);634uv.uOff = getFloat24(gstate.texoffsetu);635uv.vOff = getFloat24(gstate.texoffsetv);636#endif637}638639private:640u32 useFlags_;641public:642u32 vertexAddr;643u32 indexAddr;644u32 offsetAddr;645646uint64_t dirty;647648bool usingDepth; // For deferred depth copies.649bool clearingDepth;650651bool textureFullAlpha;652bool vertexFullAlpha;653654int skipDrawReason;655656UVScale uv;657658bool bgraTexture;659bool needShaderTexClamp;660bool textureIsArray;661bool textureIsFramebuffer;662bool textureIsVideo;663bool useFlagsChanged;664665float morphWeights[8];666u32 deferredVertTypeDirty;667668u32 curTextureWidth;669u32 curTextureHeight;670u32 actualTextureHeight;671// Only applied when needShaderTexClamp = true.672int curTextureXOffset;673int curTextureYOffset;674bool curTextureIs3D;675676float vpWidth;677float vpHeight;678679float vpXOffset;680float vpYOffset;681float vpZOffset;682float vpWidthScale;683float vpHeightScale;684float vpDepthScale;685686KnownVertexBounds vertBounds;687688GEBufferFormat framebufFormat;689// Some games use a very specific masking setup to draw into the alpha channel of a 4444 target using the blue channel of a 565 target.690// This is done because on PSP you can't write to destination alpha, other than stencil values, which can't be set from a texture.691// Examples of games that do this: Outrun, Split/Second.692// We detect this case and go into a special drawing mode.693bool blueToAlpha;694695// U/V is 1:1 to pixels. Can influence texture sampling.696bool pixelMapped;697698// TODO: These should be accessed from the current VFB object directly.699u32 curRTWidth;700u32 curRTHeight;701u32 curRTRenderWidth;702u32 curRTRenderHeight;703704void SetCurRTOffset(int xoff, int yoff) {705if (xoff != curRTOffsetX || yoff != curRTOffsetY) {706curRTOffsetX = xoff;707curRTOffsetY = yoff;708Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_PROJTHROUGHMATRIX);709}710}711int curRTOffsetX;712int curRTOffsetY;713714// Set if we are doing hardware bezier/spline.715SubmitType submitType;716int spline_num_points_u;717718ShaderDepalMode shaderDepalMode;719GEBufferFormat depalFramebufferFormat;720721u32 getRelativeAddress(u32 data) const;722static void Reset();723void DoState(PointerWrap &p);724};725726class GPUInterface;727class GPUDebugInterface;728729extern GPUStateCache gstate_c;730731inline u32 GPUStateCache::getRelativeAddress(u32 data) const {732u32 baseExtended = ((gstate.base & 0x000F0000) << 8) | data;733return (gstate_c.offsetAddr + baseExtended) & 0x0FFFFFFF;734}735736737