CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Common/StencilCommon.cpp
Views: 1401
1
// Copyright (c) 2014- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "Common/GPU/Shader.h"
19
#include "Common/GPU/ShaderWriter.h"
20
#include "Core/Config.h"
21
#include "Core/ConfigValues.h"
22
#include "GPU/Common/StencilCommon.h"
23
#include "GPU/Common/DrawEngineCommon.h"
24
#include "GPU/Common/FramebufferManagerCommon.h"
25
#include "GPU/Common/TextureCacheCommon.h"
26
27
static u8 StencilBits5551(const u8 *ptr8, u32 numPixels) {
28
const u32 *ptr = (const u32 *)ptr8;
29
30
for (u32 i = 0; i < numPixels / 2; ++i) {
31
if (ptr[i] & 0x80008000) {
32
return 1;
33
}
34
}
35
return 0;
36
}
37
38
static u8 StencilBits4444(const u8 *ptr8, u32 numPixels) {
39
const u32 *ptr = (const u32 *)ptr8;
40
u32 bits = 0;
41
42
for (u32 i = 0; i < numPixels / 2; ++i) {
43
bits |= ptr[i];
44
}
45
46
return ((bits >> 12) & 0xF) | (bits >> 28);
47
}
48
49
static u8 StencilBits8888(const u8 *ptr8, u32 numPixels) {
50
const u32 *ptr = (const u32 *)ptr8;
51
u32 bits = 0;
52
53
for (u32 i = 0; i < numPixels; ++i) {
54
bits |= ptr[i];
55
}
56
57
return bits >> 24;
58
}
59
60
static bool CheckStencilBits(const u8 *src, const VirtualFramebuffer *dstBuffer, int &values, u8 &usedBits) {
61
switch (dstBuffer->fb_format) {
62
case GE_FORMAT_565:
63
// Well, this doesn't make much sense.
64
return false;
65
case GE_FORMAT_5551:
66
usedBits = StencilBits5551(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);
67
values = 2;
68
break;
69
case GE_FORMAT_4444:
70
usedBits = StencilBits4444(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);
71
values = 16;
72
break;
73
case GE_FORMAT_8888:
74
usedBits = StencilBits8888(src, dstBuffer->fb_stride * dstBuffer->bufferHeight);
75
values = 256;
76
break;
77
case GE_FORMAT_INVALID:
78
case GE_FORMAT_DEPTH16:
79
case GE_FORMAT_CLUT8:
80
// Inconceivable.
81
_assert_(false);
82
return false;
83
}
84
85
return true;
86
}
87
88
struct StencilUB {
89
float stencilValue;
90
};
91
92
const UniformBufferDesc stencilUBDesc { sizeof(StencilUB), {
93
{ "stencilValue", -1, 0, UniformType::FLOAT1, 0 },
94
} };
95
96
// TODO: Merge this with UniformBufferDesc
97
static const UniformDef uniforms[1] = {
98
{ "float", "stencilValue", 0 },
99
};
100
101
static const InputDef inputs[1] = {
102
{ "vec2", "a_position", Draw::SEM_POSITION, }
103
};
104
105
static const VaryingDef varyings[1] = {
106
{ "vec2", "v_texcoord", Draw::SEM_TEXCOORD0, 0, "highp" },
107
};
108
109
static const SamplerDef samplers[1] = {
110
{ 0, "tex" },
111
};
112
113
void GenerateStencilFs(char *buffer, const ShaderLanguageDesc &lang, const Draw::Bugs &bugs, bool useExport) {
114
std::vector<const char *> extensions;
115
if (useExport)
116
extensions.push_back("#extension GL_ARB_shader_stencil_export : require");
117
118
ShaderWriter writer(buffer, lang, ShaderStage::Fragment, extensions);
119
writer.HighPrecisionFloat();
120
writer.DeclareSamplers(samplers);
121
122
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO)) {
123
writer.C("layout (depth_unchanged) out float gl_FragDepth;\n");
124
}
125
126
writer.C("float roundAndScaleTo255f(in float x) { return floor(x * 255.99); }\n");
127
128
writer.BeginFSMain(uniforms, varyings);
129
130
writer.C(" vec4 index = ").SampleTexture2D("tex", "v_texcoord.xy").C(";\n");
131
writer.C(" vec4 outColor = index.aaaa;\n"); // Only care about a.
132
if (useExport) {
133
writer.C(" gl_FragStencilRefARB = int(roundAndScaleTo255f(index.a));\n");
134
} else {
135
writer.C(" float shifted = roundAndScaleTo255f(index.a) / roundAndScaleTo255f(stencilValue);\n");
136
// Bitwise operations on floats, ugh.
137
writer.C(" if (mod(floor(shifted), 2.0) < 0.99) DISCARD;\n");
138
}
139
140
if (bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_MALI) || bugs.Has(Draw::Bugs::NO_DEPTH_CANNOT_DISCARD_STENCIL_ADRENO)) {
141
writer.C(" gl_FragDepth = gl_FragCoord.z;\n");
142
}
143
144
writer.EndFSMain("outColor");
145
}
146
147
// This can probably be shared with some other shaders, like reinterpret or the future depth upload.
148
void GenerateStencilVs(char *buffer, const ShaderLanguageDesc &lang) {
149
ShaderWriter writer(buffer, lang, ShaderStage::Vertex);
150
151
writer.BeginVSMain(lang.vertexIndex ? Slice<InputDef>::empty() : inputs, Slice<UniformDef>::empty(), varyings);
152
153
if (lang.vertexIndex) {
154
writer.C(" float x = float((gl_VertexIndex & 1) << 1);\n");
155
writer.C(" float y = float(gl_VertexIndex & 2);\n");
156
writer.C(" v_texcoord = vec2(x, y);\n");
157
} else {
158
writer.C(" v_texcoord = a_position * 2.0;\n"); // yes, this should be right. Should be 2.0 in the far corners.
159
}
160
writer.C(" gl_Position = vec4(v_texcoord * 2.0 - vec2(1.0, 1.0), 0.0, 1.0);\n");
161
162
writer.EndVSMain(varyings);
163
}
164
165
bool FramebufferManagerCommon::PerformWriteStencilFromMemory(u32 addr, int size, WriteStencil flags) {
166
using namespace Draw;
167
168
addr &= 0x3FFFFFFF;
169
if (!MayIntersectFramebufferColor(addr)) {
170
return false;
171
}
172
173
VirtualFramebuffer *dstBuffer = nullptr;
174
for (size_t i = 0; i < vfbs_.size(); ++i) {
175
VirtualFramebuffer *vfb = vfbs_[i];
176
// TODO: Maybe we should broadcast to all? Most of the time, there's only one.
177
if (vfb->fb_address == addr && (!dstBuffer || dstBuffer->colorBindSeq < vfb->colorBindSeq)) {
178
dstBuffer = vfb;
179
}
180
}
181
if (!dstBuffer) {
182
return false;
183
}
184
185
int values = 0;
186
u8 usedBits = 0;
187
bool useExportShader = draw_->GetDeviceCaps().fragmentShaderStencilWriteSupported;
188
189
const u8 *src = Memory::GetPointer(addr);
190
if (!src)
191
return false;
192
193
// Could skip this when doing useExportShader, but then we couldn't optimize usedBits == 0.
194
if (!CheckStencilBits(src, dstBuffer, values, usedBits))
195
return false;
196
197
if (usedBits == 0) {
198
if (flags & WriteStencil::STENCIL_IS_ZERO) {
199
// Common when creating buffers, it's already 0.
200
// We're done.
201
return false;
202
}
203
204
// Otherwise, we can skip alpha in many cases, in which case we don't even use a shader.
205
if (flags & WriteStencil::IGNORE_ALPHA) {
206
if (dstBuffer->fbo) {
207
draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "WriteStencilFromMemory_Clear");
208
}
209
return true;
210
}
211
}
212
213
shaderManager_->DirtyLastShader();
214
textureCache_->ForgetLastTexture();
215
216
if (!stencilWritePipeline_) {
217
const ShaderLanguageDesc &shaderLanguageDesc = draw_->GetShaderLanguageDesc();
218
219
char *fsCode = new char[8192];
220
char *vsCode = new char[8192];
221
GenerateStencilFs(fsCode, shaderLanguageDesc, draw_->GetBugs(), useExportShader);
222
GenerateStencilVs(vsCode, shaderLanguageDesc);
223
224
_assert_msg_(strlen(fsCode) < 8192, "StenFS length error: %d", (int)strlen(fsCode));
225
_assert_msg_(strlen(vsCode) < 8192, "StenVS length error: %d", (int)strlen(vsCode));
226
227
ShaderModule *stencilUploadFs = draw_->CreateShaderModule(ShaderStage::Fragment, shaderLanguageDesc.shaderLanguage, (const uint8_t *)fsCode, strlen(fsCode), "stencil_fs");
228
ShaderModule *stencilUploadVs = draw_->CreateShaderModule(ShaderStage::Vertex, shaderLanguageDesc.shaderLanguage, (const uint8_t *)vsCode, strlen(vsCode), "stencil_vs");
229
230
_assert_(stencilUploadFs && stencilUploadVs);
231
232
InputLayoutDesc desc = {
233
8,
234
{
235
{ SEM_POSITION, DataFormat::R32G32_FLOAT, 0 },
236
},
237
};
238
InputLayout *inputLayout = draw_->CreateInputLayout(desc);
239
240
BlendState *blendOff = draw_->CreateBlendState({ false, 0x8 });
241
DepthStencilStateDesc dsDesc{};
242
dsDesc.stencilEnabled = true;
243
dsDesc.stencil.compareOp = Comparison::ALWAYS;
244
dsDesc.stencil.depthFailOp = StencilOp::REPLACE;
245
dsDesc.stencil.failOp = StencilOp::REPLACE;
246
dsDesc.stencil.passOp = StencilOp::REPLACE;
247
DepthStencilState *stencilWrite = draw_->CreateDepthStencilState(dsDesc);
248
RasterState *rasterNoCull = draw_->CreateRasterState({});
249
250
PipelineDesc stencilWriteDesc{
251
Primitive::TRIANGLE_LIST,
252
{ stencilUploadVs, stencilUploadFs },
253
inputLayout, stencilWrite, blendOff, rasterNoCull, &stencilUBDesc,
254
};
255
stencilWritePipeline_ = draw_->CreateGraphicsPipeline(stencilWriteDesc, "stencil_upload");
256
_assert_(stencilWritePipeline_);
257
258
delete[] fsCode;
259
delete[] vsCode;
260
261
rasterNoCull->Release();
262
blendOff->Release();
263
stencilWrite->Release();
264
inputLayout->Release();
265
266
stencilUploadFs->Release();
267
stencilUploadVs->Release();
268
269
SamplerStateDesc descNearest{};
270
stencilWriteSampler_ = draw_->CreateSamplerState(descNearest);
271
}
272
273
// Fullscreen triangle coordinates.
274
static const float positions[6] = {
275
0.0, 0.0,
276
1.0, 0.0,
277
0.0, 1.0,
278
};
279
280
bool useBlit = draw_->GetDeviceCaps().framebufferStencilBlitSupported;
281
282
// Our fragment shader (and discard) is slow. Since the source is 1x, we can stencil to 1x.
283
// Then after we're done, we'll just blit it across and stretch it there. Not worth doing
284
// if already at 1x size though, of course.
285
if (dstBuffer->width == dstBuffer->renderWidth || !dstBuffer->fbo) {
286
useBlit = false;
287
}
288
// The blit path doesn't set alpha, so we can't use it if that's needed.
289
if (!(flags & WriteStencil::IGNORE_ALPHA)) {
290
useBlit = false;
291
}
292
293
u16 w = useBlit ? dstBuffer->width : dstBuffer->renderWidth;
294
u16 h = useBlit ? dstBuffer->height : dstBuffer->renderHeight;
295
296
Draw::Framebuffer *blitFBO = nullptr;
297
if (useBlit) {
298
blitFBO = GetTempFBO(TempFBO::STENCIL, w, h);
299
draw_->BindFramebufferAsRenderTarget(blitFBO, { Draw::RPAction::DONT_CARE, Draw::RPAction::DONT_CARE, Draw::RPAction::CLEAR }, "WriteStencilFromMemory_Blit");
300
} else if (dstBuffer->fbo) {
301
draw_->BindFramebufferAsRenderTarget(dstBuffer->fbo, { Draw::RPAction::KEEP, Draw::RPAction::KEEP, Draw::RPAction::CLEAR }, "WriteStencilFromMemory_NoBlit");
302
}
303
304
Draw::Viewport viewport = { 0.0f, 0.0f, (float)w, (float)h, 0.0f, 1.0f };
305
draw_->SetViewport(viewport);
306
307
// TODO: Switch the format to a single channel format?
308
Draw::Texture *tex = MakePixelTexture(src, dstBuffer->fb_format, dstBuffer->fb_stride, dstBuffer->width, dstBuffer->height);
309
if (!tex) {
310
// Bad!
311
return false;
312
}
313
314
draw_->BindTextures(TEX_SLOT_PSP_TEXTURE, 1, &tex);
315
draw_->BindSamplerStates(TEX_SLOT_PSP_TEXTURE, 1, &stencilWriteSampler_);
316
317
// We must bind the program after starting the render pass, and set the color mask after clearing.
318
draw_->SetScissorRect(0, 0, w, h);
319
draw_->BindPipeline(stencilWritePipeline_);
320
321
if (useExportShader) {
322
// We only need to do one pass if using an export shader.
323
StencilUB ub{};
324
draw_->SetStencilParams(0xFF, 0xFF, 0xFF);
325
draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));
326
draw_->DrawUP(positions, 3);
327
} else {
328
for (int i = 1; i < values; i += i) {
329
if (!(usedBits & i)) {
330
// It's already zero, let's skip it.
331
continue;
332
}
333
StencilUB ub{};
334
if (dstBuffer->fb_format == GE_FORMAT_4444) {
335
draw_->SetStencilParams(0xFF, (i << 4) | i, 0xFF);
336
ub.stencilValue = i * (16.0f / 255.0f);
337
} else if (dstBuffer->fb_format == GE_FORMAT_5551) {
338
draw_->SetStencilParams(0xFF, 0xFF, 0xFF);
339
ub.stencilValue = i * (128.0f / 255.0f);
340
} else {
341
draw_->SetStencilParams(0xFF, i, 0xFF);
342
ub.stencilValue = i * (1.0f / 255.0f);
343
}
344
draw_->UpdateDynamicUniformBuffer(&ub, sizeof(ub));
345
draw_->DrawUP(positions, 3);
346
}
347
}
348
349
if (useBlit) {
350
// Note that scissors don't affect blits on other APIs than OpenGL, so might want to try to get rid of this.
351
draw_->SetScissorRect(0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight);
352
draw_->BlitFramebuffer(blitFBO, 0, 0, w, h, dstBuffer->fbo, 0, 0, dstBuffer->renderWidth, dstBuffer->renderHeight, Draw::FB_STENCIL_BIT, Draw::FB_BLIT_NEAREST, "WriteStencilFromMemory_Blit");
353
RebindFramebuffer("RebindFramebuffer - Stencil");
354
}
355
356
draw_->Invalidate(InvalidationFlags::CACHED_RENDER_STATE);
357
gstate_c.Dirty(DIRTY_ALL_RENDER_STATE);
358
return true;
359
}
360
361