CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Common/DepthBufferCommon.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include <algorithm>1819#include "Common/GPU/OpenGL/GLFeatures.h"20#include "Common/LogReporting.h"21#include "Core/ConfigValues.h"22#include "GPU/Common/GPUStateUtils.h"23#include "GPU/Common/DrawEngineCommon.h"24#include "GPU/Common/FramebufferManagerCommon.h"25#include "GPU/Common/TextureCacheCommon.h"26#include "Common/GPU/ShaderWriter.h"272829static const InputDef vs_inputs[] = {30{ "vec2", "a_position", Draw::SEM_POSITION },31};3233struct DepthUB {34float u_depthFactor[4];35float u_depthShift[4];36float u_depthTo8[4];37};3839const UniformDef depthUniforms[] = {40{ "vec4", "u_depthFactor", 0 },41{ "vec4", "u_depthShift", 1},42{ "vec4", "u_depthTo8", 2},43};4445const UniformBufferDesc depthUBDesc{ sizeof(DepthUB), {46{ "u_depthFactor", -1, -1, UniformType::FLOAT4, 0 },47{ "u_depthShift", -1, -1, UniformType::FLOAT4, 16 },48{ "u_depthTo8", -1, -1, UniformType::FLOAT4, 32 },49} };5051static const SamplerDef samplers[] = {52{ 0, "tex" },53};5455static const VaryingDef varyings[] = {56{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },57};5859void GenerateDepthDownloadFs(ShaderWriter &writer) {60writer.DeclareSamplers(samplers);61writer.BeginFSMain(depthUniforms, varyings);62writer.C(" float depth = ").SampleTexture2D("tex", "v_texcoord").C(".r; \n");63// At this point, clamped maps [0, 1] to [0, 65535].64writer.C(" float clamped = clamp((depth - u_depthFactor.x) * u_depthFactor.y, 0.0, 1.0);\n");65writer.C(" vec4 enc = u_depthShift * clamped;\n");66writer.C(" enc = floor(mod(enc, 256.0)) * u_depthTo8;\n");67writer.C(" vec4 outColor = enc.yzww;\n"); // Let's ignore the bits outside 16 bit precision.68writer.EndFSMain("outColor");69}7071void GenerateDepthDownloadVs(ShaderWriter &writer) {72writer.BeginVSMain(vs_inputs, Slice<UniformDef>::empty(), varyings);73writer.C("v_texcoord = a_position * 2.0;\n");74writer.C("gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);");75writer.EndVSMain(varyings);76}7778static const char * const stencil_dl_fs = R"(79#ifdef GL_ES80#ifdef GL_FRAGMENT_PRECISION_HIGH81precision highp float;82#else83precision mediump float;84#endif85#endif86#if __VERSION__ >= 13087#define varying in88#define texture2D texture89#define gl_FragColor fragColor090out vec4 fragColor0;91#endif92varying vec2 v_texcoord;93lowp uniform usampler2D tex;94void main() {95uint stencil = texture2D(tex, v_texcoord).r;96float scaled = float(stencil) / 255.0;97gl_FragColor = vec4(scaled, scaled, scaled, scaled);98}99)";100101static const char * const stencil_vs = R"(102#ifdef GL_ES103precision highp float;104#endif105#if __VERSION__ >= 130106#define attribute in107#define varying out108#endif109attribute vec2 a_position;110varying vec2 v_texcoord;111void main() {112v_texcoord = a_position * 2.0;113gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);114}115)";116117static bool SupportsDepthTexturing() {118if (gl_extensions.IsGLES) {119return gl_extensions.OES_packed_depth_stencil && (gl_extensions.OES_depth_texture || gl_extensions.GLES3);120}121return gl_extensions.ARB_texture_float;122}123124Draw::Pipeline *CreateReadbackPipeline(Draw::DrawContext *draw, const char *tag, const UniformBufferDesc *ubDesc, const char *fs, const char *fsTag, const char *vs, const char *vsTag) {125using namespace Draw;126127const ShaderLanguageDesc &shaderLanguageDesc = draw->GetShaderLanguageDesc();128129ShaderModule *readbackFs = draw->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fs, strlen(fs), fsTag);130ShaderModule *readbackVs = draw->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vs, strlen(vs), vsTag);131_assert_(readbackFs && readbackVs);132133static const InputLayoutDesc desc = {1348,135{136{ SEM_POSITION, DataFormat::R32G32_FLOAT, 0 },137},138};139InputLayout *inputLayout = draw->CreateInputLayout(desc);140141BlendState *blendOff = draw->CreateBlendState({ false, 0xF });142DepthStencilState *stencilIgnore = draw->CreateDepthStencilState({});143RasterState *rasterNoCull = draw->CreateRasterState({});144145PipelineDesc readbackDesc{146Primitive::TRIANGLE_LIST,147{ readbackVs, readbackFs },148inputLayout, stencilIgnore, blendOff, rasterNoCull, ubDesc,149};150Draw::Pipeline *pipeline = draw->CreateGraphicsPipeline(readbackDesc, tag);151_assert_(pipeline);152153rasterNoCull->Release();154blendOff->Release();155stencilIgnore->Release();156inputLayout->Release();157158readbackFs->Release();159readbackVs->Release();160161return pipeline;162}163164bool FramebufferManagerCommon::ReadbackDepthbuffer(Draw::Framebuffer *fbo, int x, int y, int w, int h, uint16_t *pixels, int pixelsStride, int destW, int destH, Draw::ReadbackMode mode) {165using namespace Draw;166167if (!fbo) {168ERROR_LOG_REPORT_ONCE(vfbfbozero, Log::sceGe, "ReadbackDepthbufferSync: bad fbo");169return false;170}171// Old desktop GL can download depth, but not upload.172if (gl_extensions.IsGLES && !SupportsDepthTexturing()) {173return false;174}175176// Pixel size always 4 here because we always request float or RGBA.177const u32 bufSize = destW * destH * 4;178if (!convBuf_ || convBufSize_ < bufSize) {179delete[] convBuf_;180convBuf_ = new u8[bufSize];181convBufSize_ = bufSize;182}183184float scaleX = (float)destW / w;185float scaleY = (float)destH / h;186187bool useColorPath = gl_extensions.IsGLES || scaleX != 1.0f || scaleY != 1.0f;188bool format16Bit = false;189190if (useColorPath) {191if (!depthReadbackPipeline_) {192const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();193char depth_dl_fs[1024];194char depth_dl_vs[1024];195ShaderWriter fsWriter(depth_dl_fs, shaderLanguageDesc, ShaderStage::Fragment);196ShaderWriter vsWriter(depth_dl_vs, shaderLanguageDesc, ShaderStage::Vertex);197GenerateDepthDownloadFs(fsWriter);198GenerateDepthDownloadVs(vsWriter);199depthReadbackPipeline_ = CreateReadbackPipeline(draw_, "depth_dl", &depthUBDesc, depth_dl_fs, "depth_dl_fs", depth_dl_vs, "depth_dl_vs");200depthReadbackSampler_ = draw_->CreateSamplerState({});201}202203shaderManager_->DirtyLastShader();204auto *blitFBO = GetTempFBO(TempFBO::Z_COPY, fbo->Width() * scaleX, fbo->Height() * scaleY);205draw_->BindFramebufferAsRenderTarget(blitFBO, { RPAction::DONT_CARE, RPAction::DONT_CARE, RPAction::DONT_CARE }, "ReadbackDepthbufferSync");206Draw::Viewport viewport = { 0.0f, 0.0f, (float)destW, (float)destH, 0.0f, 1.0f };207draw_->SetViewport(viewport);208draw_->SetScissorRect(0, 0, fbo->Width() * scaleX, fbo->Height() * scaleY);209210draw_->BindFramebufferAsTexture(fbo, TEX_SLOT_PSP_TEXTURE, FB_DEPTH_BIT, 0);211draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &depthReadbackSampler_);212213// We must bind the program after starting the render pass.214draw_->BindPipeline(depthReadbackPipeline_);215216DepthUB ub{};217218// Setting this to 0.95f eliminates flickering lights with delayed readback in Syphon Filter.219// That's pretty ugly though! But we'll need to do that if we're gonna enable delayed readback in those games.220const float fudgeFactor = 1.0f;221DepthScaleFactors depthScale = GetDepthScaleFactors(gstate_c.UseFlags());222ub.u_depthFactor[0] = depthScale.Offset();223ub.u_depthFactor[1] = depthScale.Scale();224225// These are for packing a float in u8x4 colors. We should support more suitable readback formats on APIs that can do it.226static constexpr float shifts[] = { 16777215.0f, 16777215.0f / 256.0f, 16777215.0f / 65536.0f, 16777215.0f / 16777216.0f };227memcpy(ub.u_depthShift, shifts, sizeof(shifts));228static constexpr float to8[] = { 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f, 1.0f / 255.0f };229memcpy(ub.u_depthTo8, to8, sizeof(to8));230231draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));232233// Fullscreen triangle coordinates.234static const float positions[6] = {2350.0, 0.0,2361.0, 0.0,2370.0, 1.0,238};239draw_->DrawUP(positions, 3);240241draw_->CopyFramebufferToMemory(blitFBO, FB_COLOR_BIT,242x * scaleX, y * scaleY, w * scaleX, h * scaleY,243DataFormat::R8G8B8A8_UNORM, convBuf_, destW, mode, "ReadbackDepthbufferSync");244245textureCache_->ForgetLastTexture();246// TODO: Use 4444 (or better, R16_UNORM) so we can copy lines directly (instead of 32 -> 16 on CPU)?247format16Bit = true;248} else {249draw_->CopyFramebufferToMemory(fbo, FB_DEPTH_BIT, x, y, w, h, DataFormat::D32F, convBuf_, w, mode, "ReadbackDepthbufferSync");250format16Bit = false;251}252253// TODO: Move this conversion into the backends.254if (format16Bit) {255// In this case, we used the shader to apply depth scale factors.256// This can be SSE'd or NEON'd very efficiently, though ideally we would avoid this conversion by using R16_UNORM for readback.257uint16_t *dest = pixels;258const u32_le *packed32 = (u32_le *)convBuf_;259for (int yp = 0; yp < destH; ++yp) {260for (int xp = 0; xp < destW; ++xp) {261dest[xp] = packed32[xp] & 0xFFFF;262}263dest += pixelsStride;264packed32 += destW;265}266} else {267// TODO: Apply this in the shader? May have precision issues if it becomes important to match.268// We downloaded float values directly in this case.269uint16_t *dest = pixels;270const float *packedf = (float *)convBuf_;271DepthScaleFactors depthScale = GetDepthScaleFactors(gstate_c.UseFlags());272for (int yp = 0; yp < destH; ++yp) {273for (int xp = 0; xp < destW; ++xp) {274float scaled = depthScale.DecodeToU16(packedf[xp]);275if (scaled <= 0.0f) {276dest[xp] = 0;277} else if (scaled >= 65535.0f) {278dest[xp] = 65535;279} else {280dest[xp] = (int)scaled;281}282}283dest += pixelsStride;284packedf += destW;285}286}287288gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);289return true;290}291292293