CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Common/ShaderUniforms.cpp
Views: 1401
#include <algorithm>1#include <cmath>23#include "ShaderUniforms.h"4#include "Common/System/Display.h"5#include "Common/Data/Convert/SmallDataConvert.h"6#include "Common/Math/lin/matrix4x4.h"7#include "Common/Math/math_util.h"8#include "Common/Math/lin/vec3.h"9#include "GPU/GPUState.h"10#include "GPU/Common/FramebufferManagerCommon.h"11#include "GPU/Common/GPUStateUtils.h"12#include "GPU/Math3D.h"1314using namespace Lin;1516static void ConvertProjMatrixToVulkan(Matrix4x4 &in) {17const Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);18const Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);19in.translateAndScale(trans, scale);20}2122static void ConvertProjMatrixToD3D11(Matrix4x4 &in) {23const Vec3 trans(gstate_c.vpXOffset, -gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);24const Vec3 scale(gstate_c.vpWidthScale, -gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);25in.translateAndScale(trans, scale);26}2728void CalcCullRange(float minValues[4], float maxValues[4], bool flipViewport, bool hasNegZ) {29// Account for the projection viewport adjustment when viewport is too large.30auto reverseViewportX = [](float x) {31float pspViewport = (x - gstate.getViewportXCenter()) * (1.0f / gstate.getViewportXScale());32return (pspViewport * gstate_c.vpWidthScale) - gstate_c.vpXOffset;33};34auto reverseViewportY = [flipViewport](float y) {35float heightScale = gstate_c.vpHeightScale;36float yOffset = gstate_c.vpYOffset;37if (flipViewport) {38// For D3D11 and GLES non-buffered.39heightScale = -heightScale;40yOffset = -yOffset;41}42float pspViewport = (y - gstate.getViewportYCenter()) * (1.0f / gstate.getViewportYScale());43return (pspViewport * heightScale) - yOffset;44};45auto transformZ = [hasNegZ](float z) {46// Z culling ignores the viewport, so we just redo the projection matrix adjustments.47if (hasNegZ) {48return (z * gstate_c.vpDepthScale) + gstate_c.vpZOffset;49}50return (z * gstate_c.vpDepthScale * 0.5f) + gstate_c.vpZOffset * 0.5f + 0.5f;51};52auto sortPair = [](float a, float b) {53return a > b ? std::make_pair(b, a) : std::make_pair(a, b);54};5556// The PSP seems to use 0.12.4 for X and Y, and 0.16.0 for Z.57// Any vertex outside this range (unless depth clamp enabled) is discarded.58auto x = sortPair(reverseViewportX(0.0f), reverseViewportX(4096.0f));59auto y = sortPair(reverseViewportY(0.0f), reverseViewportY(4096.0f));60auto z = sortPair(transformZ(-1.000030517578125f), transformZ(1.000030517578125f));61// Since we have space in w, use it to pass the depth clamp flag. We also pass NAN for w "discard".62float clampEnable = gstate.isDepthClampEnabled() ? 1.0f : 0.0f;6364minValues[0] = x.first;65minValues[1] = y.first;66minValues[2] = z.first;67minValues[3] = clampEnable;68maxValues[0] = x.second;69maxValues[1] = y.second;70maxValues[2] = z.second;71maxValues[3] = NAN;72}7374void BaseUpdateUniforms(UB_VS_FS_Base *ub, uint64_t dirtyUniforms, bool flipViewport, bool useBufferedRendering) {75if (dirtyUniforms & DIRTY_TEXENV) {76Uint8x3ToFloat3(ub->texEnvColor, gstate.texenvcolor);77}78if (dirtyUniforms & DIRTY_ALPHACOLORREF) {79ub->alphaColorRef = gstate.getColorTestRef() | ((gstate.getAlphaTestRef() & gstate.getAlphaTestMask()) << 24);80}81if (dirtyUniforms & DIRTY_ALPHACOLORMASK) {82ub->colorTestMask = gstate.getColorTestMask() | (gstate.getAlphaTestMask() << 24);83}84if (dirtyUniforms & DIRTY_FOGCOLOR) {85Uint8x3ToFloat3(ub->fogColor, gstate.fogcolor);86}87if (dirtyUniforms & DIRTY_SHADERBLEND) {88Uint8x3ToFloat3(ub->blendFixA, gstate.getFixA());89Uint8x3ToFloat3(ub->blendFixB, gstate.getFixB());90}91if (dirtyUniforms & DIRTY_TEXCLAMP) {92const float invW = 1.0f / (float)gstate_c.curTextureWidth;93const float invH = 1.0f / (float)gstate_c.curTextureHeight;94const int w = gstate.getTextureWidth(0);95const int h = gstate.getTextureHeight(0);96const float widthFactor = (float)w * invW;97const float heightFactor = (float)h * invH;9899// First wrap xy, then half texel xy (for clamp.)100ub->texClamp[0] = widthFactor;101ub->texClamp[1] = heightFactor;102ub->texClamp[2] = invW * 0.5f;103ub->texClamp[3] = invH * 0.5f;104ub->texClampOffset[0] = gstate_c.curTextureXOffset * invW;105ub->texClampOffset[1] = gstate_c.curTextureYOffset * invH;106}107108if (dirtyUniforms & DIRTY_MIPBIAS) {109float mipBias = (float)gstate.getTexLevelOffset16() * (1.0 / 16.0f);110ub->mipBias = (mipBias + 0.5f) / (float)(gstate.getTextureMaxLevel() + 1);111}112113if (dirtyUniforms & DIRTY_PROJMATRIX) {114Matrix4x4 flippedMatrix;115memcpy(&flippedMatrix, gstate.projMatrix, 16 * sizeof(float));116117const bool invertedY = gstate_c.vpHeight < 0;118if (invertedY) {119flippedMatrix[1] = -flippedMatrix[1];120flippedMatrix[5] = -flippedMatrix[5];121flippedMatrix[9] = -flippedMatrix[9];122flippedMatrix[13] = -flippedMatrix[13];123}124const bool invertedX = gstate_c.vpWidth < 0;125if (invertedX) {126flippedMatrix[0] = -flippedMatrix[0];127flippedMatrix[4] = -flippedMatrix[4];128flippedMatrix[8] = -flippedMatrix[8];129flippedMatrix[12] = -flippedMatrix[12];130}131if (flipViewport) {132ConvertProjMatrixToD3D11(flippedMatrix);133} else {134ConvertProjMatrixToVulkan(flippedMatrix);135}136137if (!useBufferedRendering && g_display.rotation != DisplayRotation::ROTATE_0) {138flippedMatrix = flippedMatrix * g_display.rot_matrix;139}140CopyMatrix4x4(ub->proj, flippedMatrix.getReadPtr());141142ub->rotation = useBufferedRendering ? 0 : (float)g_display.rotation;143}144145if (dirtyUniforms & DIRTY_PROJTHROUGHMATRIX) {146Matrix4x4 proj_through;147if (flipViewport) {148proj_through.setOrthoD3D(0.0f, gstate_c.curRTWidth, gstate_c.curRTHeight, 0, 0, 1);149} else {150proj_through.setOrthoVulkan(0.0f, gstate_c.curRTWidth, 0, gstate_c.curRTHeight, 0, 1);151}152if (!useBufferedRendering && g_display.rotation != DisplayRotation::ROTATE_0) {153proj_through = proj_through * g_display.rot_matrix;154}155156// Negative RT offsets come from split framebuffers (Killzone)157if (gstate_c.curRTOffsetX < 0 || gstate_c.curRTOffsetY < 0) {158proj_through.wx += 2.0f * (float)gstate_c.curRTOffsetX / (float)gstate_c.curRTWidth;159proj_through.wy += 2.0f * (float)gstate_c.curRTOffsetY / (float)gstate_c.curRTHeight;160}161162CopyMatrix4x4(ub->proj_through, proj_through.getReadPtr());163}164165// Transform166if (dirtyUniforms & DIRTY_WORLDMATRIX) {167ConvertMatrix4x3To3x4Transposed(ub->world, gstate.worldMatrix);168}169if (dirtyUniforms & DIRTY_VIEWMATRIX) {170ConvertMatrix4x3To3x4Transposed(ub->view, gstate.viewMatrix);171}172if (dirtyUniforms & DIRTY_TEXMATRIX) {173ConvertMatrix4x3To3x4Transposed(ub->tex, gstate.tgenMatrix);174}175176if (dirtyUniforms & DIRTY_FOGCOEF) {177float fogcoef[2] = {178getFloat24(gstate.fog1),179getFloat24(gstate.fog2),180};181// The PSP just ignores infnan here (ignoring IEEE), so take it down to a valid float.182// Workaround for https://github.com/hrydgard/ppsspp/issues/5384#issuecomment-38365988183if (my_isnanorinf(fogcoef[0])) {184// Not really sure what a sensible value might be, but let's try 64k.185fogcoef[0] = std::signbit(fogcoef[0]) ? -65535.0f : 65535.0f;186}187if (my_isnanorinf(fogcoef[1])) {188fogcoef[1] = std::signbit(fogcoef[1]) ? -65535.0f : 65535.0f;189}190CopyFloat2(ub->fogCoef, fogcoef);191}192193if (dirtyUniforms & DIRTY_TEX_ALPHA_MUL) {194bool doTextureAlpha = gstate.isTextureAlphaUsed();195if (gstate_c.textureFullAlpha && gstate.getTextureFunction() != GE_TEXFUNC_REPLACE) {196doTextureAlpha = false;197}198ub->texNoAlpha = doTextureAlpha ? 0.0f : 1.0f;199ub->texMul = gstate.isColorDoublingEnabled() ? 2.0f : 1.0f;200}201202if (dirtyUniforms & DIRTY_STENCILREPLACEVALUE) {203ub->stencilReplaceValue = (float)gstate.getStencilTestRef() * (1.0 / 255.0);204}205206// Note - this one is not in lighting but in transformCommon as it has uses beyond lighting207if (dirtyUniforms & DIRTY_MATAMBIENTALPHA) {208Uint8x3ToFloat4_AlphaUint8(ub->matAmbient, gstate.materialambient, gstate.getMaterialAmbientA());209}210211if (dirtyUniforms & DIRTY_COLORWRITEMASK) {212ub->colorWriteMask = ~((gstate.pmska << 24) | (gstate.pmskc & 0xFFFFFF));213}214215// Texturing216if (dirtyUniforms & DIRTY_UVSCALEOFFSET) {217float widthFactor = 1.0f;218float heightFactor = 1.0f;219if (gstate_c.textureIsFramebuffer) {220const float invW = 1.0f / (float)gstate_c.curTextureWidth;221const float invH = 1.0f / (float)gstate_c.curTextureHeight;222const int w = gstate.getTextureWidth(0);223const int h = gstate.getTextureHeight(0);224widthFactor = (float)w * invW;225heightFactor = (float)h * invH;226}227if (gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE) {228// When we are generating UV coordinates through the bezier/spline, we need to apply the scaling.229// However, this is missing a check that we're not getting our UV:s supplied for us in the vertices.230ub->uvScaleOffset[0] = gstate_c.uv.uScale * widthFactor;231ub->uvScaleOffset[1] = gstate_c.uv.vScale * heightFactor;232ub->uvScaleOffset[2] = gstate_c.uv.uOff * widthFactor;233ub->uvScaleOffset[3] = gstate_c.uv.vOff * heightFactor;234} else {235ub->uvScaleOffset[0] = widthFactor;236ub->uvScaleOffset[1] = heightFactor;237ub->uvScaleOffset[2] = 0.0f;238ub->uvScaleOffset[3] = 0.0f;239}240}241242if (dirtyUniforms & DIRTY_DEPTHRANGE) {243// Same formulas as D3D9 now. Should work for both Vulkan and D3D11.244245// Depth is [0, 1] mapping to [minz, maxz], not too hard.246float vpZScale = gstate.getViewportZScale();247float vpZCenter = gstate.getViewportZCenter();248249// These are just the reverse of the formulas in GPUStateUtils.250float halfActualZRange = InfToZero(gstate_c.vpDepthScale != 0.0f ? vpZScale / gstate_c.vpDepthScale : 0.0f);251float inverseDepthScale = InfToZero(gstate_c.vpDepthScale != 0.0f ? 1.0f / gstate_c.vpDepthScale : 0.0f);252253float minz = -((gstate_c.vpZOffset * halfActualZRange) - vpZCenter) - halfActualZRange;254float viewZScale = halfActualZRange * 2.0f;255float viewZCenter = minz;256257ub->depthRange[0] = viewZScale;258ub->depthRange[1] = viewZCenter;259ub->depthRange[2] = gstate_c.vpZOffset * 0.5f + 0.5f;260ub->depthRange[3] = 2.0f * inverseDepthScale;261}262263if (dirtyUniforms & DIRTY_CULLRANGE) {264CalcCullRange(ub->cullRangeMin, ub->cullRangeMax, flipViewport, false);265}266267if (dirtyUniforms & DIRTY_BEZIERSPLINE) {268ub->spline_counts = gstate_c.spline_num_points_u;269}270271if (dirtyUniforms & DIRTY_DEPAL) {272int indexMask = gstate.getClutIndexMask();273int indexShift = gstate.getClutIndexShift();274int indexOffset = gstate.getClutIndexStartPos() >> 4;275int format = gstate_c.depalFramebufferFormat;276uint32_t val = BytesToUint32(indexMask, indexShift, indexOffset, format);277// Poke in a bilinear filter flag in the top bit.278if (gstate.isMagnifyFilteringEnabled())279val |= 0x80000000;280ub->depal_mask_shift_off_fmt = val;281}282}283284// For "light ubershader" bits.285// TODO: We pack these bits even when not using ubershader lighting. Maybe not bother.286uint32_t PackLightControlBits() {287// Bit organization288// Bottom 4 bits are enable bits for each light.289// Then, for each light, comes 2 bits for "comp" and 2 bits for "type".290// At the end, at bit 20, we put the three material update bits.291292uint32_t lightControl = 0;293for (int i = 0; i < 4; i++) {294if (gstate.isLightChanEnabled(i)) {295lightControl |= 1 << i;296}297298u32 computation = (u32)gstate.getLightComputation(i); // 2 bits299u32 type = (u32)gstate.getLightType(i); // 2 bits300if (type == 3) { type = 0; } // Don't want to handle this degenerate case in the shader.301lightControl |= computation << (4 + i * 4);302lightControl |= type << (4 + i * 4 + 2);303}304305// Material update is 3 bits.306lightControl |= gstate.getMaterialUpdate() << 20;307return lightControl;308}309310void LightUpdateUniforms(UB_VS_Lights *ub, uint64_t dirtyUniforms) {311// Lighting312if (dirtyUniforms & DIRTY_AMBIENT) {313Uint8x3ToFloat4_AlphaUint8(ub->ambientColor, gstate.ambientcolor, gstate.getAmbientA());314}315if (dirtyUniforms & DIRTY_MATDIFFUSE) {316Uint8x3ToFloat4(ub->materialDiffuse, gstate.materialdiffuse);317}318if (dirtyUniforms & DIRTY_MATSPECULAR) {319Uint8x3ToFloat4_Alpha(ub->materialSpecular, gstate.materialspecular, std::max(0.0f, getFloat24(gstate.materialspecularcoef)));320}321if (dirtyUniforms & DIRTY_MATEMISSIVE) {322// We're not touching the fourth f32 here, because we store an u32 of control bits in it.323Uint8x3ToFloat3(ub->materialEmissive, gstate.materialemissive);324}325if (dirtyUniforms & DIRTY_LIGHT_CONTROL) {326ub->lightControl = PackLightControlBits();327}328for (int i = 0; i < 4; i++) {329if (dirtyUniforms & (DIRTY_LIGHT0 << i)) {330if (gstate.isDirectionalLight(i)) {331// Prenormalize332ExpandFloat24x3ToFloat4AndNormalize(ub->lpos[i], &gstate.lpos[i * 3]);333} else {334ExpandFloat24x3ToFloat4(ub->lpos[i], &gstate.lpos[i * 3]);335}336// ldir is only used for spotlights. Prenormalize it.337ExpandFloat24x3ToFloat4AndNormalize(ub->ldir[i], &gstate.ldir[i * 3]);338ExpandFloat24x3ToFloat4(ub->latt[i], &gstate.latt[i * 3]);339float lightAngle_spotCoef[2] = { getFloat24(gstate.lcutoff[i]), getFloat24(gstate.lconv[i]) };340CopyFloat2To4(ub->lightAngle_SpotCoef[i], lightAngle_spotCoef);341Uint8x3ToFloat4(ub->lightAmbient[i], gstate.lcolor[i * 3]);342Uint8x3ToFloat4(ub->lightDiffuse[i], gstate.lcolor[i * 3 + 1]);343Uint8x3ToFloat4(ub->lightSpecular[i], gstate.lcolor[i * 3 + 2]);344}345}346}347348void BoneUpdateUniforms(UB_VS_Bones *ub, uint64_t dirtyUniforms) {349for (int i = 0; i < 8; i++) {350if (dirtyUniforms & (DIRTY_BONEMATRIX0 << i)) {351ConvertMatrix4x3To3x4Transposed(ub->bones[i], gstate.boneMatrix + 12 * i);352}353}354}355356357