CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Common/StencilCommon.cpp
Views: 1401
// Copyright (c) 2014- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include "Common/GPU/Shader.h"18#include "Common/GPU/ShaderWriter.h"19#include "Core/Config.h"20#include "Core/ConfigValues.h"21#include "GPU/Common/StencilCommon.h"22#include "GPU/Common/DrawEngineCommon.h"23#include "GPU/Common/FramebufferManagerCommon.h"24#include "GPU/Common/TextureCacheCommon.h"2526static u8 StencilBits5551(const u8 *ptr8, u32 numPixels) {27const u32 *ptr = (const u32 *)ptr8;2829for (u32 i = 0; i < numPixels / 2; ++i) {30if (ptr[i] & 0x80008000) {31return 1;32}33}34return 0;35}3637static u8 StencilBits4444(const u8 *ptr8, u32 numPixels) {38const u32 *ptr = (const u32 *)ptr8;39u32 bits = 0;4041for (u32 i = 0; i < numPixels / 2; ++i) {42bits |= ptr[i];43}4445return ((bits >> 12) & 0xF) | (bits >> 28);46}4748static u8 StencilBits8888(const u8 *ptr8, u32 numPixels) {49const u32 *ptr = (const u32 *)ptr8;50u32 bits = 0;5152for (u32 i = 0; i < numPixels; ++i) {53bits |= ptr[i];54}5556return bits >> 24;57}5859static bool CheckStencilBits(const u8 *src, const VirtualFramebuffer *dstBuffer, int &values, u8 &usedBits) {60switch (dstBuffer->fb_format) {61case GE_FORMAT_565:62// Well, this doesn't make much sense.63return false;64case GE_FORMAT_5551:65usedBits = StencilBits5551(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);66values = 2;67break;68case GE_FORMAT_4444:69usedBits = StencilBits4444(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);70values = 16;71break;72case GE_FORMAT_8888:73usedBits = StencilBits8888(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);74values = 256;75break;76case GE_FORMAT_INVALID:77case GE_FORMAT_DEPTH16:78case GE_FORMAT_CLUT8:79// Inconceivable.80_assert_(false);81return false;82}8384return true;85}8687struct StencilUB {88float stencilValue;89};9091const UniformBufferDesc stencilUBDesc { sizeof(StencilUB), {92{ "stencilValue", -1, 0, UniformType::FLOAT1, 0 },93} };9495// TODO: Merge this with UniformBufferDesc96static const UniformDef uniforms[1] = {97{ "float", "stencilValue", 0 },98};99100static const InputDef inputs[1] = {101{ "vec2", "a_position", Draw::SEM_POSITION, }102};103104static const VaryingDef varyings[1] = {105{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },106};107108static const SamplerDef samplers[1] = {109{ 0, "tex" },110};111112void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs, bool useExport) {113std::vector<const char *> extensions;114if (useExport)115extensions.push_back("#extension GL_ARB_shader_stencil_export : require");116117ShaderWriter writer(buffer, lang, ShaderStage::Fragment, extensions);118writer.HighPrecisionFloat();119writer.DeclareSamplers(samplers);120121if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO)) {122writer.C("layout (depth_unchanged) out float gl_FragDepth;\n");123}124125writer.C("float roundAndScaleTo255f(in float x) { return floor(x * 255.99); }\n");126127writer.BeginFSMain(uniforms, varyings);128129writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");130writer.C(" vec4 outColor = index.aaaa;\n"); // Only care about a.131if (useExport) {132writer.C(" gl_FragStencilRefARB = int(roundAndScaleTo255f(index.a));\n");133} else {134writer.C(" float shifted = roundAndScaleTo255f(index.a) / roundAndScaleTo255f(stencilValue);\n");135// Bitwise operations on floats, ugh.136writer.C(" if (mod(floor(shifted), 2.0) < 0.99) DISCARD;\n");137}138139if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO)) {140writer.C(" gl_FragDepth = gl_FragCoord.z;\n");141}142143writer.EndFSMain("outColor");144}145146// This can probably be shared with some other shaders, like reinterpret or the future depth upload.147void GenerateStencilVs(char *buffer, const ShaderLanguageDesc &lang) {148ShaderWriter writer(buffer, lang, ShaderStage::Vertex);149150writer.BeginVSMain(lang.vertexIndex ? Slice<InputDef>::empty() : inputs, Slice<UniformDef>::empty(), varyings);151152if (lang.vertexIndex) {153writer.C(" float x = float((gl_VertexIndex & 1) << 1);\n");154writer.C(" float y = float(gl_VertexIndex & 2);\n");155writer.C(" v_texcoord = vec2(x, y);\n");156} else {157writer.C(" v_texcoord = a_position * 2.0;\n"); // yes, this should be right. Should be 2.0 in the far corners.158}159writer.C(" gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);\n");160161writer.EndVSMain(varyings);162}163164bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size, WriteStencil flags) {165using namespace Draw;166167addr &= 0x3FFFFFFF;168if (!MayIntersectFramebufferColor(addr)) {169return false;170}171172VirtualFramebuffer *dstBuffer = nullptr;173for (size_t i = 0; i < vfbs_.size(); ++i) {174VirtualFramebuffer *vfb = vfbs_[i];175// TODO: Maybe we should broadcast to all? Most of the time, there's only one.176if (vfb->fb_address == addr && (!dstBuffer || dstBuffer->colorBindSeq < vfb->colorBindSeq)) {177dstBuffer = vfb;178}179}180if (!dstBuffer) {181return false;182}183184int values = 0;185u8 usedBits = 0;186bool useExportShader = draw_->GetDeviceCaps().fragmentShaderStencilWriteSupported;187188const u8 *src = Memory::GetPointer(addr);189if (!src)190return false;191192// Could skip this when doing useExportShader, but then we couldn't optimize usedBits == 0.193if (!CheckStencilBits(src, dstBuffer, values, usedBits))194return false;195196if (usedBits == 0) {197if (flags & WriteStencil::STENCIL_IS_ZERO) {198// Common when creating buffers, it's already 0.199// We're done.200return false;201}202203// Otherwise, we can skip alpha in many cases, in which case we don't even use a shader.204if (flags & WriteStencil::IGNORE_ALPHA) {205if (dstBuffer->fbo) {206draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "WriteStencilFromMemory_Clear");207}208return true;209}210}211212shaderManager_->DirtyLastShader();213textureCache_->ForgetLastTexture();214215if (!stencilWritePipeline_) {216const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();217218char *fsCode = new char[8192];219char *vsCode = new char[8192];220GenerateStencilFs(fsCode, shaderLanguageDesc, draw_->GetBugs(), useExportShader);221GenerateStencilVs(vsCode, shaderLanguageDesc);222223_assert_msg_(strlen(fsCode) < 8192, "StenFS length error: %d", (int)strlen(fsCode));224_assert_msg_(strlen(vsCode) < 8192, "StenVS length error: %d", (int)strlen(vsCode));225226ShaderModule *stencilUploadFs = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), "stencil_fs");227ShaderModule *stencilUploadVs = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vsCode, strlen(vsCode), "stencil_vs");228229_assert_(stencilUploadFs && stencilUploadVs);230231InputLayoutDesc desc = {2328,233{234{ SEM_POSITION, DataFormat::R32G32_FLOAT, 0 },235},236};237InputLayout *inputLayout = draw_->CreateInputLayout(desc);238239BlendState *blendOff = draw_->CreateBlendState({ false, 0x8 });240DepthStencilStateDesc dsDesc{};241dsDesc.stencilEnabled = true;242dsDesc.stencil.compareOp = Comparison::ALWAYS;243dsDesc.stencil.depthFailOp = StencilOp::REPLACE;244dsDesc.stencil.failOp = StencilOp::REPLACE;245dsDesc.stencil.passOp = StencilOp::REPLACE;246DepthStencilState *stencilWrite = draw_->CreateDepthStencilState(dsDesc);247RasterState *rasterNoCull = draw_->CreateRasterState({});248249PipelineDesc stencilWriteDesc{250Primitive::TRIANGLE_LIST,251{ stencilUploadVs, stencilUploadFs },252inputLayout, stencilWrite, blendOff, rasterNoCull, &stencilUBDesc,253};254stencilWritePipeline_ = draw_->CreateGraphicsPipeline(stencilWriteDesc, "stencil_upload");255_assert_(stencilWritePipeline_);256257delete[] fsCode;258delete[] vsCode;259260rasterNoCull->Release();261blendOff->Release();262stencilWrite->Release();263inputLayout->Release();264265stencilUploadFs->Release();266stencilUploadVs->Release();267268SamplerStateDesc descNearest{};269stencilWriteSampler_ = draw_->CreateSamplerState(descNearest);270}271272// Fullscreen triangle coordinates.273static const float positions[6] = {2740.0, 0.0,2751.0, 0.0,2760.0, 1.0,277};278279bool useBlit = draw_->GetDeviceCaps().framebufferStencilBlitSupported;280281// Our fragment shader (and discard) is slow. Since the source is 1x, we can stencil to 1x.282// Then after we're done, we'll just blit it across and stretch it there. Not worth doing283// if already at 1x size though, of course.284if (dstBuffer->width == dstBuffer->renderWidth || !dstBuffer->fbo) {285useBlit = false;286}287// The blit path doesn't set alpha, so we can't use it if that's needed.288if (!(flags & WriteStencil::IGNORE_ALPHA)) {289useBlit = false;290}291292u16 w = useBlit ? dstBuffer->width : dstBuffer->renderWidth;293u16 h = useBlit ? dstBuffer->height : dstBuffer->renderHeight;294295Draw::Framebuffer *blitFBO = nullptr;296if (useBlit) {297blitFBO = GetTempFBO(TempFBO::STENCIL, w, h);298draw_->BindFramebufferAsRenderTarget(blitFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::CLEAR }, "WriteStencilFromMemory_Blit");299} else if (dstBuffer->fbo) {300draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "WriteStencilFromMemory_NoBlit");301}302303Draw::Viewport viewport = { 0.0f, 0.0f, (float)w, (float)h, 0.0f, 1.0f };304draw_->SetViewport(viewport);305306// TODO: Switch the format to a single channel format?307Draw::Texture *tex = MakePixelTexture(src, dstBuffer->fb_format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height);308if (!tex) {309// Bad!310return false;311}312313draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex);314draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilWriteSampler_);315316// We must bind the program after starting the render pass, and set the color mask after clearing.317draw_->SetScissorRect(0, 0, w, h);318draw_->BindPipeline(stencilWritePipeline_);319320if (useExportShader) {321// We only need to do one pass if using an export shader.322StencilUB ub{};323draw_->SetStencilParams(0xFF, 0xFF, 0xFF);324draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));325draw_->DrawUP(positions, 3);326} else {327for (int i = 1; i < values; i += i) {328if (!(usedBits & i)) {329// It's already zero, let's skip it.330continue;331}332StencilUB ub{};333if (dstBuffer->fb_format == GE_FORMAT_4444) {334draw_->SetStencilParams(0xFF, (i << 4) | i, 0xFF);335ub.stencilValue = i * (16.0f / 255.0f);336} else if (dstBuffer->fb_format == GE_FORMAT_5551) {337draw_->SetStencilParams(0xFF, 0xFF, 0xFF);338ub.stencilValue = i * (128.0f / 255.0f);339} else {340draw_->SetStencilParams(0xFF, i, 0xFF);341ub.stencilValue = i * (1.0f / 255.0f);342}343draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));344draw_->DrawUP(positions, 3);345}346}347348if (useBlit) {349// Note that scissors don't affect blits on other APIs than OpenGL, so might want to try to get rid of this.350draw_->SetScissorRect(0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight);351draw_->BlitFramebuffer(blitFBO, 0, 0, w, h, dstBuffer->fbo, 0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight, Draw::FB_STENCIL_BIT, Draw::FB_BLIT_NEAREST, "WriteStencilFromMemory_Blit");352RebindFramebuffer("RebindFramebuffer - Stencil");353}354355draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);356gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);357return true;358}359360361