CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/GeometryShaderGenerator.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include <cstdio>
19
#include <cstdlib>
20
#include <locale.h>
21
22
#include "Common/StringUtils.h"
23
#include "Common/GPU/OpenGL/GLFeatures.h"
24
#include "Common/GPU/ShaderWriter.h"
25
#include "Common/GPU/thin3d.h"
26
#include "Core/Config.h"
27
#include "GPU/ge_constants.h"
28
#include "GPU/GPUState.h"
29
#include "GPU/Common/ShaderId.h"
30
#include "GPU/Common/ShaderUniforms.h"
31
#include "GPU/Common/GeometryShaderGenerator.h"
32
33
#undef WRITE
34
35
#define WRITE(p, ...) p.F(__VA_ARGS__)
36
37
// TODO: Could support VK_NV_geometry_shader_passthrough, though the hardware that supports
38
// it is already pretty fast at geometry shaders..
39
40
41
bool GenerateGeometryShader(const GShaderID &id, char *buffer, const ShaderLanguageDesc &compat, const Draw::Bugs bugs, std::string *errorString) {
42
std::vector<const char*> extensions;
43
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
44
if (gl_extensions.EXT_gpu_shader4) {
45
extensions.push_back("#extension GL_EXT_gpu_shader4 : enable");
46
}
47
}
48
bool vertexRangeCulling = !id.Bit(GS_BIT_CURVE);
49
bool clipClampedDepth = gstate_c.Use(GPU_USE_DEPTH_CLAMP);
50
51
ShaderWriter p(buffer, compat, ShaderStage::Geometry, extensions);
52
53
p.F("// %s\n", GeometryShaderDesc(id).c_str());
54
55
p.C("layout(triangles) in;\n");
56
if (clipClampedDepth && vertexRangeCulling && !gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {
57
p.C("layout(triangle_strip, max_vertices = 12) out;\n");
58
} else {
59
p.C("layout(triangle_strip, max_vertices = 6) out;\n");
60
}
61
62
if (compat.shaderLanguage == GLSL_VULKAN) {
63
WRITE(p, "\n");
64
WRITE(p, "layout (std140, set = 0, binding = 3) uniform baseVars {\n%s};\n", ub_baseStr);
65
} else if (compat.shaderLanguage == HLSL_D3D11) {
66
WRITE(p, "cbuffer base : register(b0) {\n%s};\n", ub_baseStr);
67
}
68
69
std::vector<VaryingDef> varyings, outVaryings;
70
71
if (id.Bit(GS_BIT_DO_TEXTURE)) {
72
varyings.push_back(VaryingDef{ "vec3", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" });
73
outVaryings.push_back(VaryingDef{ "vec3", "v_texcoordOut", Draw::SEM_TEXCOORD0, 0, "highp" });
74
}
75
varyings.push_back(VaryingDef{ "vec4", "v_color0", Draw::SEM_COLOR0, 1, "lowp" });
76
outVaryings.push_back(VaryingDef{ "vec4", "v_color0Out", Draw::SEM_COLOR0, 1, "lowp" });
77
if (id.Bit(GS_BIT_LMODE)) {
78
varyings.push_back(VaryingDef{ "vec3", "v_color1", Draw::SEM_COLOR1, 2, "lowp" });
79
outVaryings.push_back(VaryingDef{ "vec3", "v_color1Out", Draw::SEM_COLOR1, 2, "lowp" });
80
}
81
varyings.push_back(VaryingDef{ "float", "v_fogdepth", Draw::SEM_TEXCOORD1, 3, "highp" });
82
outVaryings.push_back(VaryingDef{ "float", "v_fogdepthOut", Draw::SEM_TEXCOORD1, 3, "highp" });
83
84
p.BeginGSMain(varyings, outVaryings);
85
86
// Apply culling.
87
if (vertexRangeCulling) {
88
p.C(" bool anyInside = false;\n");
89
}
90
// And apply manual clipping if necessary.
91
if (!gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {
92
p.C(" float clip0[3];\n");
93
if (clipClampedDepth) {
94
p.C(" float clip1[3];\n");
95
}
96
}
97
98
p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?
99
p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
100
p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");
101
102
if (vertexRangeCulling) {
103
p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
104
// Vertex range culling doesn't happen when Z clips, note sign of w is important.
105
p.C(" if (u_cullRangeMin.w <= 0.0 || projZ * outPos.w > -outPos.w) {\n");
106
const char *outMin = "projPos.x < u_cullRangeMin.x || projPos.y < u_cullRangeMin.y";
107
const char *outMax = "projPos.x > u_cullRangeMax.x || projPos.y > u_cullRangeMax.y";
108
p.F(" if ((%s) || (%s)) {\n", outMin, outMax);
109
p.C(" return;\n"); // Cull!
110
p.C(" }\n");
111
p.C(" }\n");
112
p.C(" if (u_cullRangeMin.w <= 0.0) {\n");
113
p.C(" if (projPos.z < u_cullRangeMin.z || projPos.z > u_cullRangeMax.z) {\n");
114
// When not clamping depth, cull the triangle of Z is outside the valid range (not based on clip Z.)
115
p.C(" return;\n");
116
p.C(" }\n");
117
p.C(" } else {\n");
118
p.C(" if (projPos.z >= u_cullRangeMin.z) { anyInside = true; }\n");
119
p.C(" if (projPos.z <= u_cullRangeMax.z) { anyInside = true; }\n");
120
p.C(" }\n");
121
}
122
123
if (!gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {
124
// This is basically the same value as gl_ClipDistance would take, z + w.
125
if (vertexRangeCulling) {
126
// We add a small amount to prevent error as in #15816 (PSP Z is only 16-bit fixed point, anyway.)
127
p.F(" clip0[i] = projZ * outPos.w + outPos.w + %f;\n", 0.0625 / 65536.0);
128
} else {
129
// Let's not complicate the code overly for this case. We'll clipClampedDepth.
130
p.C(" clip0[i] = 0.0;\n");
131
}
132
133
// This one does happen for rectangles.
134
if (clipClampedDepth) {
135
if (ShaderLanguageIsOpenGL(compat.shaderLanguage)) {
136
// On OpenGL/GLES, these values account for the -1 -> 1 range.
137
p.C(" if (u_depthRange.y - u_depthRange.x >= 1.0) {\n");
138
p.C(" clip1[i] = outPos.w + outPos.z;\n");
139
} else {
140
// Everywhere else, it's 0 -> 1, simpler.
141
p.C(" if (u_depthRange.y >= 1.0) {\n");
142
p.C(" clip1[i] = outPos.z;\n");
143
}
144
// This is similar, but for maxz when it's below 65535.0. -1/0 don't matter here.
145
p.C(" } else if (u_depthRange.x + u_depthRange.y <= 65534.0) {\n");
146
p.C(" clip1[i] = outPos.w - outPos.z;\n");
147
p.C(" } else {\n");
148
p.C(" clip1[i] = 0.0;\n");
149
p.C(" }\n");
150
}
151
}
152
153
p.C(" } // for\n");
154
155
// Cull any triangle fully outside in the same direction when depth clamp enabled.
156
// Basically simulate cull distances.
157
if (vertexRangeCulling) {
158
p.C(" if (u_cullRangeMin.w > 0.0 && !anyInside) {\n");
159
p.C(" return;\n");
160
p.C(" }\n");
161
}
162
163
if (!gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {
164
// Clipping against one half-space cuts a triangle (17/27), culls (7/27), or creates two triangles (3/27).
165
// We clip against two, so we can generate up to 4 triangles, a polygon with 6 points.
166
p.C(" int indices[6];\n");
167
p.C(" float factors[6];\n");
168
p.C(" int ind = 0;\n");
169
170
// Pass 1 - clip against first half-space.
171
p.C(" for (int i = 0; i < 3; i++) {\n");
172
// First, use this vertex if it doesn't need clipping.
173
p.C(" if (clip0[i] >= 0.0) {\n");
174
p.C(" indices[ind] = i;\n");
175
p.C(" factors[ind] = 0.0;\n");
176
p.C(" ind++;\n");
177
p.C(" }\n");
178
179
// Next, we generate an interpolated vertex if signs differ.
180
p.C(" int inext = i == 2 ? 0 : i + 1;\n");
181
p.C(" if (clip0[i] * clip0[inext] < 0.0) {\n");
182
p.C(" float t = clip0[i] < 0.0 ? clip0[i] / (clip0[i] - clip0[inext]) : 1.0 - (clip0[inext] / (clip0[inext] - clip0[i]));\n");
183
p.C(" indices[ind] = i;\n");
184
p.C(" factors[ind] = t;\n");
185
p.C(" ind++;\n");
186
p.C(" }\n");
187
188
p.C(" }\n");
189
190
// Pass 2 - further clip against clamped Z.
191
if (clipClampedDepth) {
192
p.C(" int count0 = ind;\n");
193
p.C(" int indices1[6];\n");
194
p.C(" float factors1[6];\n");
195
p.C(" ind = 0;\n");
196
197
// Let's start by interpolating the clip values.
198
p.C(" float clip1after[4];\n");
199
p.C(" for (int i = 0; i < count0; i++) {\n");
200
p.C(" int idx = indices[i];\n");
201
p.C(" float factor = factors[i];\n");
202
p.C(" int next = idx == 2 ? 0 : idx + 1;\n");
203
p.C(" clip1after[i] = mix(clip1[idx], clip1[next], factor);\n");
204
p.C(" }\n");
205
206
// Alright, now time to clip, again.
207
p.C(" for (int i = 0; i < count0; i++) {\n");
208
// First, use this vertex if it doesn't need clipping.
209
p.C(" if (clip1after[i] >= 0.0) {\n");
210
p.C(" indices1[ind] = i;\n");
211
p.C(" factors1[ind] = 0.0;\n");
212
p.C(" ind++;\n");
213
p.C(" }\n");
214
215
// Next, we generate an interpolated vertex if signs differ.
216
p.C(" int inext = i == count0 - 1 ? 0 : i + 1;\n");
217
p.C(" if (clip1after[i] * clip1after[inext] < 0.0) {\n");
218
p.C(" float t = clip1after[i] < 0.0 ? clip1after[i] / (clip1after[i] - clip1after[inext]) : 1.0 - (clip1after[inext] / (clip1after[inext] - clip1after[i]));\n");
219
p.C(" indices1[ind] = i;\n");
220
p.C(" factors1[ind] = t;\n");
221
p.C(" ind++;\n");
222
p.C(" }\n");
223
224
p.C(" }\n");
225
}
226
227
p.C(" if (ind < 3) {\n");
228
p.C(" return;\n");
229
p.C(" }\n");
230
231
p.C(" int idx;\n");
232
p.C(" int next;\n");
233
p.C(" float factor;\n");
234
235
auto emitIndex = [&](const char *which) {
236
if (clipClampedDepth) {
237
// We have to interpolate between four vertices.
238
p.F(" idx = indices1[%s];\n", which);
239
p.F(" factor = factors1[%s];\n", which);
240
p.C(" next = idx == count0 - 1 ? 0 : idx + 1;\n");
241
p.C(" gl_Position = mix(mix(gl_in[indices[idx]].gl_Position, gl_in[(indices[idx] + 1) % 3].gl_Position, factors[idx]), mix(gl_in[indices[next]].gl_Position, gl_in[(indices[next] + 1) % 3].gl_Position, factors[next]), factor);\n");
242
for (size_t i = 0; i < varyings.size(); i++) {
243
const VaryingDef &in = varyings[i];
244
const VaryingDef &out = outVaryings[i];
245
p.F(" %s = mix(mix(%s[indices[idx]], %s[(indices[idx] + 1) % 3], factors[idx]), mix(%s[indices[next]], %s[(indices[next] + 1) % 3], factors[next]), factor);\n", out.name, in.name, in.name, in.name, in.name);
246
}
247
} else {
248
p.F(" idx = indices[%s];\n", which);
249
p.F(" factor = factors[%s];\n", which);
250
p.C(" next = idx == 2 ? 0 : idx + 1;\n");
251
p.C(" gl_Position = mix(gl_in[idx].gl_Position, gl_in[next].gl_Position, factor);\n");
252
for (size_t i = 0; i < varyings.size(); i++) {
253
const VaryingDef &in = varyings[i];
254
const VaryingDef &out = outVaryings[i];
255
p.F(" %s = mix(%s[idx], %s[next], factor);\n", out.name, in.name, in.name);
256
}
257
}
258
p.C(" EmitVertex();\n");
259
};
260
261
// Alright, time to actually emit the first triangle.
262
p.C(" for (int i = 0; i < 3; i++) {\n");
263
emitIndex("i");
264
p.C(" }\n");
265
266
// Did we end up with additional triangles? We'll do three points each for the rest.
267
p.C(" for (int i = 3; i < ind; i++) {\n");
268
p.C(" EndPrimitive();\n");
269
270
// Point one, always index zero.
271
emitIndex("0");
272
273
// After that, one less than i (basically a triangle fan.)
274
emitIndex("(i - 1)");
275
276
// And the new vertex itself.
277
emitIndex("i");
278
279
p.C(" }\n");
280
} else {
281
const char *clipSuffix0 = compat.shaderLanguage == HLSL_D3D11 ? ".x" : "[0]";
282
const char *clipSuffix1 = compat.shaderLanguage == HLSL_D3D11 ? ".y" : "[1]";
283
284
p.C(" for (int i = 0; i < 3; i++) {\n"); // TODO: 3 or gl_in.length()? which will be faster?
285
p.C(" vec4 outPos = gl_in[i].gl_Position;\n");
286
p.C(" vec3 projPos = outPos.xyz / outPos.w;\n");
287
p.C(" float projZ = (projPos.z - u_depthRange.z) * u_depthRange.w;\n");
288
if (clipClampedDepth) {
289
// Copy the clip distance from the vertex shader.
290
p.F(" gl_ClipDistance%s = gl_in[i].gl_ClipDistance%s;\n", clipSuffix0, clipSuffix0);
291
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w;\n", clipSuffix1);
292
} else {
293
// We shouldn't need to worry about rectangles-as-triangles here, since we don't use geometry shaders for that.
294
// We add a small amount to prevent error as in #15816 (PSP Z is only 16-bit fixed point, anyway.)
295
p.F(" gl_ClipDistance%s = projZ * outPos.w + outPos.w + %f;\n", clipSuffix0, 0.0625 / 65536.0);
296
}
297
p.C(" gl_Position = outPos;\n");
298
if (gstate_c.Use(GPU_USE_CLIP_DISTANCE)) {
299
}
300
301
for (size_t i = 0; i < varyings.size(); i++) {
302
const VaryingDef &in = varyings[i];
303
const VaryingDef &out = outVaryings[i];
304
p.F(" %s = %s[i];\n", out.name, in.name);
305
}
306
// Debug - null the red channel
307
//p.C(" if (i == 0) v_color0Out.x = 0.0;\n");
308
p.C(" EmitVertex();\n");
309
p.C(" }\n");
310
}
311
312
p.EndGSMain();
313
314
return true;
315
}
316
317