CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!
Path: blob/master/GPU/Common/GeometryShaderGenerator.cpp
Views: 1401
// Copyright (c) 2012- PPSSPP Project.12// This program is free software: you can redistribute it and/or modify3// it under the terms of the GNU General Public License as published by4// the Free Software Foundation, version 2.0 or later versions.56// This program is distributed in the hope that it will be useful,7// but WITHOUT ANY WARRANTY; without even the implied warranty of8// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the9// GNU General Public License 2.0 for more details.1011// A copy of the GPL 2.0 should have been included with the program.12// If not, see http://www.gnu.org/licenses/1314// Official git repository and contact information can be found at15// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.1617#include <cstdio>18#include <cstdlib>19#include <locale.h>2021#include "Common/StringUtils.h"22#include "Common/GPU/OpenGL/GLFeatures.h"23#include "Common/GPU/ShaderWriter.h"24#include "Common/GPU/thin3d.h"25#include "Core/Config.h"26#include "GPU/ge_constants.h"27#include "GPU/GPUState.h"28#include "GPU/Common/ShaderId.h"29#include "GPU/Common/ShaderUniforms.h"30#include "GPU/Common/GeometryShaderGenerator.h"3132#undef WRITE3334#define WRITE(p, ...) p.F(__VA_ARGS__)3536// TODO: Could support VK_NV_geometry_shader_passthrough, though the hardware that supports37// it is already pretty fast at geometry shaders..383940bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, std::string *errorString) {41std::vector<const char*> extensions;42if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {43if (gl_extensions.EXT_gpu_shader4) {44extensions.push_back("#extension GL_EXT_gpu_shader4 : enable");45}46}47bool vertexRangeCulling = !id.Bit(GS_BIT_CURVE);48bool clipClampedDepth = gstate_c.Use(GPU_USE_DEPTH_CLAMP);4950ShaderWriter p(buffer, compat, ShaderStage::Geometry, extensions);5152p.F("// %s\n", GeometryShaderDesc(id).c_str());5354p.C("layout(triangles) in;\n");55if (clipClampedDepth && vertexRangeCulling && !gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {56p.C("layout(triangle_strip, max_vertices = 12) out;\n");57} else {58p.C("layout(triangle_strip, max_vertices = 6) out;\n");59}6061if (compat.shaderLanguage == GLSL_VULKAN) {62WRITE(p, "\n");63WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr);64} else if (compat.shaderLanguage == HLSL_D3D11) {65WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr);66}6768std::vector<VaryingDef> varyings, outVaryings;6970if (id.Bit(GS_BIT_DO_TEXTURE)) {71varyings.push_back(VaryingDef{ "vec3", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" });72outVaryings.push_back(VaryingDef{ "vec3", "v_texcoordOut", Draw::SEM_TEXCOORD0, 0, "highp" });73}74varyings.push_back(VaryingDef{ "vec4", "v_color0", Draw::SEM_COLOR0, 1, "lowp" });75outVaryings.push_back(VaryingDef{ "vec4", "v_color0Out", Draw::SEM_COLOR0, 1, "lowp" });76if (id.Bit(GS_BIT_LMODE)) {77varyings.push_back(VaryingDef{ "vec3", "v_color1", Draw::SEM_COLOR1, 2, "lowp" });78outVaryings.push_back(VaryingDef{ "vec3", "v_color1Out", Draw::SEM_COLOR1, 2, "lowp" });79}80varyings.push_back(VaryingDef{ "float", "v_fogdepth", Draw::SEM_TEXCOORD1, 3, "highp" });81outVaryings.push_back(VaryingDef{ "float", "v_fogdepthOut", Draw::SEM_TEXCOORD1, 3, "highp" });8283p.BeginGSMain(varyings, outVaryings);8485// Apply culling.86if (vertexRangeCulling) {87p.C(" bool anyInside = false;\n");88}89// And apply manual clipping if necessary.90if (!gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {91p.C(" float clip0[3];\n");92if (clipClampedDepth) {93p.C(" float clip1[3];\n");94}95}9697p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?98p.C(" vec4 outPos = gl_in[i].gl_Position;\n");99p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");100101if (vertexRangeCulling) {102p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");103// Vertex range culling doesn't happen when Z clips, note sign of w is important.104p.C(" if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");105const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y";106const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y";107p.F(" if ((%s) || (%s)) {\n", outMin, outMax);108p.C(" return;\n"); // Cull!109p.C(" }\n");110p.C(" }\n");111p.C(" if (u_cullRangeMin.w <= 0.0) {\n");112p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n");113// When not clamping depth, cull the triangle of Z is outside the valid range (not based on clip Z.)114p.C(" return;\n");115p.C(" }\n");116p.C(" } else {\n");117p.C(" if (projPos.z >= u_cullRangeMin.z) { anyInside = true; }\n");118p.C(" if (projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n");119p.C(" }\n");120}121122if (!gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {123// This is basically the same value as gl_ClipDistance would take, z + w.124if (vertexRangeCulling) {125// We add a small amount to prevent error as in #15816 (PSP Z is only 16-bit fixed point, anyway.)126p.F(" clip0[i] = projZ * outPos.w + outPos.w + %f;\n", 0.0625 / 65536.0);127} else {128// Let's not complicate the code overly for this case. We'll clipClampedDepth.129p.C(" clip0[i] = 0.0;\n");130}131132// This one does happen for rectangles.133if (clipClampedDepth) {134if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {135// On OpenGL/GLES, these values account for the -1 -> 1 range.136p.C(" if (u_depthRange.y - u_depthRange.x >= 1.0) {\n");137p.C(" clip1[i] = outPos.w + outPos.z;\n");138} else {139// Everywhere else, it's 0 -> 1, simpler.140p.C(" if (u_depthRange.y >= 1.0) {\n");141p.C(" clip1[i] = outPos.z;\n");142}143// This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here.144p.C(" } else if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");145p.C(" clip1[i] = outPos.w - outPos.z;\n");146p.C(" } else {\n");147p.C(" clip1[i] = 0.0;\n");148p.C(" }\n");149}150}151152p.C(" } // for\n");153154// Cull any triangle fully outside in the same direction when depth clamp enabled.155// Basically simulate cull distances.156if (vertexRangeCulling) {157p.C(" if (u_cullRangeMin.w > 0.0 && !anyInside) {\n");158p.C(" return;\n");159p.C(" }\n");160}161162if (!gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {163// Clipping against one half-space cuts a triangle (17/27), culls (7/27), or creates two triangles (3/27).164// We clip against two, so we can generate up to 4 triangles, a polygon with 6 points.165p.C(" int indices[6];\n");166p.C(" float factors[6];\n");167p.C(" int ind = 0;\n");168169// Pass 1 - clip against first half-space.170p.C(" for (int i = 0; i < 3; i++) {\n");171// First, use this vertex if it doesn't need clipping.172p.C(" if (clip0[i] >= 0.0) {\n");173p.C(" indices[ind] = i;\n");174p.C(" factors[ind] = 0.0;\n");175p.C(" ind++;\n");176p.C(" }\n");177178// Next, we generate an interpolated vertex if signs differ.179p.C(" int inext = i == 2 ? 0 : i + 1;\n");180p.C(" if (clip0[i] * clip0[inext] < 0.0) {\n");181p.C(" float t = clip0[i] < 0.0 ? clip0[i] / (clip0[i] - clip0[inext]) : 1.0 - (clip0[inext] / (clip0[inext] - clip0[i]));\n");182p.C(" indices[ind] = i;\n");183p.C(" factors[ind] = t;\n");184p.C(" ind++;\n");185p.C(" }\n");186187p.C(" }\n");188189// Pass 2 - further clip against clamped Z.190if (clipClampedDepth) {191p.C(" int count0 = ind;\n");192p.C(" int indices1[6];\n");193p.C(" float factors1[6];\n");194p.C(" ind = 0;\n");195196// Let's start by interpolating the clip values.197p.C(" float clip1after[4];\n");198p.C(" for (int i = 0; i < count0; i++) {\n");199p.C(" int idx = indices[i];\n");200p.C(" float factor = factors[i];\n");201p.C(" int next = idx == 2 ? 0 : idx + 1;\n");202p.C(" clip1after[i] = mix(clip1[idx], clip1[next], factor);\n");203p.C(" }\n");204205// Alright, now time to clip, again.206p.C(" for (int i = 0; i < count0; i++) {\n");207// First, use this vertex if it doesn't need clipping.208p.C(" if (clip1after[i] >= 0.0) {\n");209p.C(" indices1[ind] = i;\n");210p.C(" factors1[ind] = 0.0;\n");211p.C(" ind++;\n");212p.C(" }\n");213214// Next, we generate an interpolated vertex if signs differ.215p.C(" int inext = i == count0 - 1 ? 0 : i + 1;\n");216p.C(" if (clip1after[i] * clip1after[inext] < 0.0) {\n");217p.C(" float t = clip1after[i] < 0.0 ? clip1after[i] / (clip1after[i] - clip1after[inext]) : 1.0 - (clip1after[inext] / (clip1after[inext] - clip1after[i]));\n");218p.C(" indices1[ind] = i;\n");219p.C(" factors1[ind] = t;\n");220p.C(" ind++;\n");221p.C(" }\n");222223p.C(" }\n");224}225226p.C(" if (ind < 3) {\n");227p.C(" return;\n");228p.C(" }\n");229230p.C(" int idx;\n");231p.C(" int next;\n");232p.C(" float factor;\n");233234auto emitIndex = [&](const char *which) {235if (clipClampedDepth) {236// We have to interpolate between four vertices.237p.F(" idx = indices1[%s];\n", which);238p.F(" factor = factors1[%s];\n", which);239p.C(" next = idx == count0 - 1 ? 0 : idx + 1;\n");240p.C(" gl_Position = mix(mix(gl_in[indices[idx]].gl_Position, gl_in[(indices[idx] + 1) % 3].gl_Position, factors[idx]), mix(gl_in[indices[next]].gl_Position, gl_in[(indices[next] + 1) % 3].gl_Position, factors[next]), factor);\n");241for (size_t i = 0; i < varyings.size(); i++) {242const VaryingDef &in = varyings[i];243const VaryingDef &out = outVaryings[i];244p.F(" %s = mix(mix(%s[indices[idx]], %s[(indices[idx] + 1) % 3], factors[idx]), mix(%s[indices[next]], %s[(indices[next] + 1) % 3], factors[next]), factor);\n", out.name, in.name, in.name, in.name, in.name);245}246} else {247p.F(" idx = indices[%s];\n", which);248p.F(" factor = factors[%s];\n", which);249p.C(" next = idx == 2 ? 0 : idx + 1;\n");250p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n");251for (size_t i = 0; i < varyings.size(); i++) {252const VaryingDef &in = varyings[i];253const VaryingDef &out = outVaryings[i];254p.F(" %s = mix(%s[idx], %s[next], factor);\n", out.name, in.name, in.name);255}256}257p.C(" EmitVertex();\n");258};259260// Alright, time to actually emit the first triangle.261p.C(" for (int i = 0; i < 3; i++) {\n");262emitIndex("i");263p.C(" }\n");264265// Did we end up with additional triangles? We'll do three points each for the rest.266p.C(" for (int i = 3; i < ind; i++) {\n");267p.C(" EndPrimitive();\n");268269// Point one, always index zero.270emitIndex("0");271272// After that, one less than i (basically a triangle fan.)273emitIndex("(i - 1)");274275// And the new vertex itself.276emitIndex("i");277278p.C(" }\n");279} else {280const char *clipSuffix0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";281const char *clipSuffix1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";282283p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?284p.C(" vec4 outPos = gl_in[i].gl_Position;\n");285p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");286p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");287if (clipClampedDepth) {288// Copy the clip distance from the vertex shader.289p.F(" gl_ClipDistance%s = gl_in[i].gl_ClipDistance%s;\n", clipSuffix0, clipSuffix0);290p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix1);291} else {292// We shouldn't need to worry about rectangles-as-triangles here, since we don't use geometry shaders for that.293// We add a small amount to prevent error as in #15816 (PSP Z is only 16-bit fixed point, anyway.)294p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w + %f;\n", clipSuffix0, 0.0625 / 65536.0);295}296p.C(" gl_Position = outPos;\n");297if (gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {298}299300for (size_t i = 0; i < varyings.size(); i++) {301const VaryingDef &in = varyings[i];302const VaryingDef &out = outVaryings[i];303p.F(" %s = %s[i];\n", out.name, in.name);304}305// Debug - null the red channel306//p.C(" if (i == 0) v_color0Out.x = 0.0;\n");307p.C(" EmitVertex();\n");308p.C(" }\n");309}310311p.EndGSMain();312313return true;314}315316317