CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Common/DepalettizeShaderCommon.cpp
Views: 1401
// Copyright (c) 2014- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include <cstdio>1819#include "Common/GPU/Shader.h"20#include "Common/GPU/ShaderWriter.h"2122#include "GPU/Common/ShaderCommon.h"23#include "Common/StringUtils.h"24#include "Common/Log.h"25#include "Common/LogReporting.h"26#include "GPU/Common/GPUStateUtils.h"27#include "GPU/Common/DepalettizeShaderCommon.h"28#include "GPU/Common/Draw2D.h"2930static const InputDef vsInputs[2] = {31{ "vec2", "a_position", Draw::SEM_POSITION, },32{ "vec2", "a_texcoord0", Draw::SEM_TEXCOORD0, },33};3435// TODO: Deduplicate with TextureShaderCommon.cpp36static const SamplerDef samplers[2] = {37{ 0, "tex", SamplerFlags::ARRAY_ON_VULKAN },38{ 1, "pal" },39};4041static const VaryingDef varyings[1] = {42{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },43};4445// Uses integer instructions available since OpenGL 3.0, ES 3.0 (and 2.0 with extensions), and of course Vulkan and D3D11.46void GenerateDepalShader300(ShaderWriter &writer, const DepalConfig &config) {47const int shift = config.shift;48const int mask = config.mask;4950writer.C(" vec2 texcoord = v_texcoord;\n");5152// Implement the swizzle we need to simulate, if a game uses 8888 framebuffers and any other mode than "6" to access depth textures.53// This implements the "2" mode swizzle (it fixes up the Y direction but not X. See comments on issue #15898, Tantalus games)54// NOTE: This swizzle can be made to work with any power-of-2 resolution scaleFactor by shifting55// the bits around, but not sure how to handle 3x scaling. For now this is 1x-only (rough edges at higher resolutions).56if (config.bufferFormat == GE_FORMAT_DEPTH16) {57if (config.depthUpperBits == 0x2) {58writer.C(R"(59int x = int((texcoord.x / scaleFactor) * texSize.x);60int xclear = x & 0x01F0;61int temp = (x - xclear) | ((x >> 1) & 0xF0) | ((x << 4) & 0x100);62texcoord.x = (float(temp) / texSize.x) * scaleFactor;63)");64}65}6667// Sampling turns our texture into floating point. To avoid this, might be able68// to declare them as isampler2D objects, but these require integer textures, which needs more work.69// Anyhow, we simply work around this by converting back to integer, which is fine.70// Use the mask to skip reading some components.7172// TODO: Since we actually have higher precision color data here, we might want to apply a dithering pattern here73// in the 5551, 565 and 4444 modes. This would benefit Test Drive which renders at 16-bit on the real hardware74// and dithers immediately, while we render at higher color depth and thus don't dither resulting in banding75// when we sample it at low color depth like this.7677// An alternative would be to have a special mode where we keep some extra precision here and sample the CLUT linearly - works for ramps such78// as those that Test Drive uses for its color remapping. But would need game specific flagging.7980writer.C(" vec4 color = ").SampleTexture2D("tex", "texcoord").C(";\n");8182int shiftedMask = mask << shift;83switch (config.bufferFormat) {84case GE_FORMAT_CLUT8:85writer.C(" int index = int(color.r * 255.99);\n");86break;87case GE_FORMAT_8888:88if (shiftedMask & 0xFF) writer.C(" int r = int(color.r * 255.99);\n"); else writer.C(" int r = 0;\n");89if (shiftedMask & 0xFF00) writer.C(" int g = int(color.g * 255.99);\n"); else writer.C(" int g = 0;\n");90if (shiftedMask & 0xFF0000) writer.C(" int b = int(color.b * 255.99);\n"); else writer.C(" int b = 0;\n");91if (shiftedMask & 0xFF000000) writer.C(" int a = int(color.a * 255.99);\n"); else writer.C(" int a = 0;\n");92writer.C(" int index = (a << 24) | (b << 16) | (g << 8) | (r);\n");93break;94case GE_FORMAT_4444:95if (shiftedMask & 0xF) writer.C(" int r = int(color.r * 15.99);\n"); else writer.C(" int r = 0;\n");96if (shiftedMask & 0xF0) writer.C(" int g = int(color.g * 15.99);\n"); else writer.C(" int g = 0;\n");97if (shiftedMask & 0xF00) writer.C(" int b = int(color.b * 15.99);\n"); else writer.C(" int b = 0;\n");98if (shiftedMask & 0xF000) writer.C(" int a = int(color.a * 15.99);\n"); else writer.C(" int a = 0;\n");99writer.C(" int index = (a << 12) | (b << 8) | (g << 4) | (r);\n");100break;101case GE_FORMAT_565:102if (shiftedMask & 0x1F) writer.C(" int r = int(color.r * 31.99);\n"); else writer.C(" int r = 0;\n");103if (shiftedMask & 0x7E0) writer.C(" int g = int(color.g * 63.99);\n"); else writer.C(" int g = 0;\n");104if (shiftedMask & 0xF800) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n");105writer.C(" int index = (b << 11) | (g << 5) | (r);\n");106break;107case GE_FORMAT_5551:108if (config.textureFormat == GE_TFMT_CLUT8) {109// SOCOM case. We need to make sure the next few lines load the right bits, see below.110shiftedMask <<= 8;111}112if (shiftedMask & 0x1F) writer.C(" int r = int(color.r * 31.99);\n"); else writer.C(" int r = 0;\n");113if (shiftedMask & 0x3E0) writer.C(" int g = int(color.g * 31.99);\n"); else writer.C(" int g = 0;\n");114if (shiftedMask & 0x7C00) writer.C(" int b = int(color.b * 31.99);\n"); else writer.C(" int b = 0;\n");115if (shiftedMask & 0x8000) writer.C(" int a = int(color.a);\n"); else writer.C(" int a = 0;\n");116writer.C(" int index = (a << 15) | (b << 10) | (g << 5) | (r);\n");117118if (config.textureFormat == GE_TFMT_CLUT8) {119// SOCOM case. #16210120// To debug the issue, remove this shift to see the texture (check for clamping etc).121writer.C(" index >>= 8;\n");122}123124break;125case GE_FORMAT_DEPTH16:126// Decode depth buffer.127writer.C(" float depth = (color.x - z_offset) * z_scale * 65535.0f;\n");128129if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {130// Convert depth to 565, without going through a CLUT.131// TODO: Make "depal without a CLUT" a separate concept, to avoid redundantly creating a CLUT texture.132writer.C(" int idepth = int(clamp(depth, 0.0, 65535.0));\n");133writer.C(" float r = float(idepth & 31) / 31.0;\n");134writer.C(" float g = float((idepth >> 5) & 63) / 63.0;\n");135writer.C(" float b = float((idepth >> 11) & 31) / 31.0;\n");136writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");137return;138}139140writer.C(" int index = int(clamp(depth, 0.0, 65535.0));\n");141break;142default:143break;144}145146float texturePixels = 512.0f;147148if (shift) {149writer.F(" index = (int(uint(index) >> uint(%d)) & 0x%02x)", shift, mask);150} else {151writer.F(" index = (index & 0x%02x)", mask);152}153if (config.startPos) {154writer.F(" | %d;\n", config.startPos); // '|' matches what we have in gstate.h155} else {156writer.F(";\n");157}158159writer.F(" vec2 uv = vec2((float(index) + 0.5) * %f, 0.0);\n", 1.0f / texturePixels);160writer.C(" vec4 outColor = ").SampleTexture2D("pal", "uv").C(";\n");161}162163// FP only, to suit GL(ES) 2.0 and DX9164void GenerateDepalShaderFloat(ShaderWriter &writer, const DepalConfig &config) {165char lookupMethod[128] = "index.r";166167const int shift = config.shift;168const int mask = config.mask;169170if (config.bufferFormat == GE_FORMAT_DEPTH16) {171DepthScaleFactors factors = GetDepthScaleFactors(gstate_c.UseFlags());172writer.ConstFloat("z_scale", factors.ScaleU16());173writer.ConstFloat("z_offset", factors.Offset());174}175176writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord").C(";\n");177178float index_multiplier = 1.0f;179// pixelformat is the format of the texture we are sampling.180bool formatOK = true;181switch (config.bufferFormat) {182case GE_FORMAT_CLUT8:183if (shift == 0 && mask == 0xFF) {184// Easy peasy.185if (writer.Lang().shaderLanguage == HLSL_D3D9)186snprintf(lookupMethod, sizeof(lookupMethod), "index.a");187else188snprintf(lookupMethod, sizeof(lookupMethod), "index.r");189formatOK = true;190} else {191// Deal with this if we find it.192formatOK = false;193}194break;195case GE_FORMAT_8888:196if ((mask & (mask + 1)) == 0) {197// If the value has all bits contiguous (bitmask check above), we can mod by it + 1.198const char *rgba = "rrrrrrrrggggggggbbbbbbbbaaaaaaaa";199const u8 rgba_shift = shift & 7;200if (rgba_shift == 0 && mask == 0xFF) {201snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);202} else {203snprintf(lookupMethod, sizeof(lookupMethod), "mod(index.%c * %f, %d.0)", rgba[shift], 255.99f / (1 << rgba_shift), mask + 1);204index_multiplier = 1.0f / 256.0f;205// Format was OK if there weren't bits from another component.206formatOK = mask <= 255 - (1 << rgba_shift);207}208} else {209formatOK = false;210}211break;212case GE_FORMAT_4444:213if ((mask & (mask + 1)) == 0 && shift < 16) {214const char *rgba = "rrrrggggbbbbaaaa";215const u8 rgba_shift = shift & 3;216if (rgba_shift == 0 && mask == 0xF) {217snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);218index_multiplier = 15.0f / 256.0f;219} else {220// Let's divide and mod to get the right bits. A common case is shift=0, mask=01.221snprintf(lookupMethod, sizeof(lookupMethod), "mod(index.%c * %f, %d.0)", rgba[shift], 15.99f / (1 << rgba_shift), mask + 1);222index_multiplier = 1.0f / 256.0f;223formatOK = mask <= 15 - (1 << rgba_shift);224}225} else {226formatOK = false;227}228break;229case GE_FORMAT_565:230if ((mask & (mask + 1)) == 0 && shift < 16) {231const u8 shifts[16] = { 0, 1, 2, 3, 4, 0, 1, 2, 3, 4, 5, 0, 1, 2, 3, 4 };232const int multipliers[16] = { 31, 31, 31, 31, 31, 63, 63, 63, 63, 63, 63, 31, 31, 31, 31, 31 };233const char *rgba = "rrrrrggggggbbbbb";234const u8 rgba_shift = shifts[shift];235if (rgba_shift == 0 && mask == multipliers[shift]) {236snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);237index_multiplier = multipliers[shift] / 256.0f;238} else {239// We just need to divide the right component by the right value, and then mod against the mask.240// A common case is shift=1, mask=0f.241snprintf(lookupMethod, sizeof(lookupMethod), "mod(index.%c * %f, %d.0)", rgba[shift], ((float)multipliers[shift] + 0.99f) / (1 << rgba_shift), mask + 1);242index_multiplier = 1.0f / 256.0f;243formatOK = mask <= multipliers[shift] - (1 << rgba_shift);244}245} else {246formatOK = false;247}248break;249case GE_FORMAT_5551:250if (config.textureFormat == GE_TFMT_CLUT8 && mask == 0xFF && shift == 0) {251// Follow the intent here, and ignore g (and let's not round unnecessarily).252snprintf(lookupMethod, sizeof(lookupMethod), "floor(floor(index.a) * 128.0 + index.b * 64.0)");253index_multiplier = 1.0f / 256.0f;254// SOCOM case. #16210255} else if ((mask & (mask + 1)) == 0 && shift < 16) {256const char *rgba = "rrrrrgggggbbbbba";257const u8 rgba_shift = shift % 5;258if (rgba_shift == 0 && mask == 0x1F) {259snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);260index_multiplier = 31.0f / 256.0f;261} else if (shift == 15 && mask == 1) {262snprintf(lookupMethod, sizeof(lookupMethod), "index.%c", rgba[shift]);263index_multiplier = 1.0f / 256.0f;264} else {265// A isn't possible here.266snprintf(lookupMethod, sizeof(lookupMethod), "mod(index.%c * %f, %d.0)", rgba[shift], 31.99f / (1 << rgba_shift), mask + 1);267index_multiplier = 1.0f / 256.0f;268formatOK = mask <= 31 - (1 << rgba_shift);269}270} else {271formatOK = false;272}273break;274case GE_FORMAT_DEPTH16:275{276// TODO: I think we can handle most scenarios here, but texturing from depth buffers requires an extension on ES 2.0 anyway.277// Not on D3D9 though, so this path is still relevant.278279if (config.bufferFormat == GE_FORMAT_DEPTH16 && config.textureFormat == GE_TFMT_5650) {280// Convert depth to 565, without going through a CLUT.281writer.C(" float depth = (index.x - z_offset) * z_scale;\n");282writer.C(" float idepth = floor(clamp(depth, 0.0, 65535.0));\n");283writer.C(" float r = mod(idepth, 32.0) / 31.0;\n");284writer.C(" float g = mod(floor(idepth / 32.0), 64.0) / 63.0;\n");285writer.C(" float b = mod(floor(idepth / 2048.0), 32.0) / 31.0;\n");286writer.C(" vec4 outColor = vec4(r, g, b, 1.0);\n");287return;288}289290if (shift < 16) {291index_multiplier = 1.0f / (float)(1 << shift);292truncate_cpy(lookupMethod, "((index.x - z_offset) * z_scale)");293294if ((mask & (mask + 1)) != 0) {295// But we'll try with the above anyway.296formatOK = false;297}298} else {299formatOK = false;300}301break;302}303default:304break;305}306307// We always use 512-sized textures now.308float texturePixels = 512.f;309index_multiplier *= 0.5f;310311// Adjust index_multiplier, similar to the use of 15.99 instead of 16 in the ES 3 path.312// index_multiplier -= 0.01f / texturePixels;313314if (!formatOK) {315ERROR_LOG_REPORT_ONCE(depal, Log::G3D, "%s depal unsupported: shift=%d mask=%02x offset=%d", GeBufferFormatToString(config.bufferFormat), shift, mask, config.startPos);316}317318// Offset by half a texel (plus clutBase) to turn NEAREST filtering into FLOOR.319// Technically, the clutBase should be |'d, not added, but that's hard with floats.320float texel_offset = ((float)config.startPos + 0.5f) / texturePixels;321if (writer.Lang().shaderLanguage == HLSL_D3D9) {322// Seems to need a half-pixel offset fix? Might mean it was rendered wrong...323texel_offset += 0.5f / texturePixels;324}325writer.F(" float coord = (%s * %f) + %f;\n", lookupMethod, index_multiplier, texel_offset);326writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");327}328329void GenerateDepalSmoothed(ShaderWriter &writer, const DepalConfig &config) {330const char *sourceChannel = "error";331float indexMultiplier = 31.0f;332333if (config.bufferFormat == GE_FORMAT_5551) {334_dbg_assert_(config.mask == 0x1F);335switch (config.shift) {336case 0: sourceChannel = "r"; break;337case 5: sourceChannel = "g"; break;338case 10: sourceChannel = "b"; break;339default: _dbg_assert_(false);340}341} else if (config.bufferFormat == GE_FORMAT_565) {342_dbg_assert_(config.mask == 0x1F || config.mask == 0x3F);343switch (config.shift) {344case 0: sourceChannel = "r"; break;345case 5: sourceChannel = "g"; indexMultiplier = 63.0f; break;346case 11: sourceChannel = "b"; break;347default: _dbg_assert_(false);348}349} else {350_dbg_assert_(false);351}352353writer.C(" float index = ").SampleTexture2D("tex", "v_texcoord").F(".%s * %0.1f;\n", sourceChannel, indexMultiplier);354float texturePixels = 512.f;355writer.F(" float coord = (index + 0.5) * %f;\n", 1.0 / texturePixels);356writer.C(" vec4 outColor = ").SampleTexture2D("pal", "vec2(coord, 0.0)").C(";\n");357}358359void GenerateDepalFs(ShaderWriter &writer, const DepalConfig &config) {360writer.DeclareSamplers(samplers);361writer.HighPrecisionFloat();362writer.BeginFSMain(config.bufferFormat == GE_FORMAT_DEPTH16 ? g_draw2Duniforms : Slice<UniformDef>::empty(), varyings);363if (config.smoothedDepal) {364// Handles a limited set of cases, but doesn't need any integer math so we don't365// need two variants.366GenerateDepalSmoothed(writer, config);367} else {368switch (writer.Lang().shaderLanguage) {369case HLSL_D3D9:370case GLSL_1xx:371GenerateDepalShaderFloat(writer, config);372break;373case GLSL_VULKAN:374case GLSL_3xx:375case HLSL_D3D11:376// Use the float shader for the SOCOM special.377if (config.bufferFormat == GE_FORMAT_5551 && config.textureFormat == GE_TFMT_CLUT8) {378GenerateDepalShaderFloat(writer, config);379} else {380GenerateDepalShader300(writer, config);381}382break;383default:384_assert_msg_(false, "Shader language not supported for depal: %d", (int)writer.Lang().shaderLanguage);385}386}387writer.EndFSMain("outColor");388}389390391