CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/GPU/Vulkan/DrawEngineVulkan.cpp
Views: 1401
1
// Copyright (c) 2012- PPSSPP Project.
2
3
// This program is free software: you can redistribute it and/or modify
4
// it under the terms of the GNU General Public License as published by
5
// the Free Software Foundation, version 2.0 or later versions.
6
7
// This program is distributed in the hope that it will be useful,
8
// but WITHOUT ANY WARRANTY; without even the implied warranty of
9
// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
10
// GNU General Public License 2.0 for more details.
11
12
// A copy of the GPL 2.0 should have been included with the program.
13
// If not, see http://www.gnu.org/licenses/
14
15
// Official git repository and contact information can be found at
16
// https://github.com/hrydgard/ppsspp and http://www.ppsspp.org/.
17
18
#include "ppsspp_config.h"
19
#include <algorithm>
20
#include <functional>
21
22
#include "Common/Data/Convert/SmallDataConvert.h"
23
#include "Common/Profiler/Profiler.h"
24
#include "Common/GPU/Vulkan/VulkanRenderManager.h"
25
26
#include "Common/Log.h"
27
#include "Common/MemoryUtil.h"
28
#include "Common/TimeUtil.h"
29
#include "Core/MemMap.h"
30
#include "Core/System.h"
31
#include "Core/Config.h"
32
#include "Core/CoreTiming.h"
33
34
#include "GPU/Math3D.h"
35
#include "GPU/GPUState.h"
36
#include "GPU/ge_constants.h"
37
38
#include "Common/GPU/Vulkan/VulkanContext.h"
39
#include "Common/GPU/Vulkan/VulkanMemory.h"
40
41
#include "GPU/Common/SplineCommon.h"
42
#include "GPU/Common/TransformCommon.h"
43
#include "GPU/Common/VertexDecoderCommon.h"
44
#include "GPU/Common/SoftwareTransformCommon.h"
45
#include "GPU/Common/DrawEngineCommon.h"
46
#include "GPU/Common/ShaderUniforms.h"
47
#include "GPU/Debugger/Debugger.h"
48
#include "GPU/Vulkan/DrawEngineVulkan.h"
49
#include "GPU/Vulkan/TextureCacheVulkan.h"
50
#include "GPU/Vulkan/ShaderManagerVulkan.h"
51
#include "GPU/Vulkan/PipelineManagerVulkan.h"
52
#include "GPU/Vulkan/FramebufferManagerVulkan.h"
53
#include "GPU/Vulkan/GPU_Vulkan.h"
54
55
using namespace PPSSPP_VK;
56
57
enum {
58
TRANSFORMED_VERTEX_BUFFER_SIZE = VERTEX_BUFFER_MAX * sizeof(TransformedVertex)
59
};
60
61
DrawEngineVulkan::DrawEngineVulkan(Draw::DrawContext *draw)
62
: draw_(draw) {
63
decOptions_.expandAllWeightsToFloat = false;
64
decOptions_.expand8BitNormalsToFloat = false;
65
indexGen.Setup(decIndex_);
66
}
67
68
void DrawEngineVulkan::InitDeviceObjects() {
69
// All resources we need for PSP drawing. Usually only bindings 0 and 2-4 are populated.
70
71
BindingType bindingTypes[VKRPipelineLayout::MAX_DESC_SET_BINDINGS] = {
72
BindingType::COMBINED_IMAGE_SAMPLER, // main
73
BindingType::COMBINED_IMAGE_SAMPLER, // framebuffer-read
74
BindingType::COMBINED_IMAGE_SAMPLER, // palette
75
BindingType::UNIFORM_BUFFER_DYNAMIC_ALL, // uniforms
76
BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX, // lights
77
BindingType::UNIFORM_BUFFER_DYNAMIC_VERTEX, // bones
78
BindingType::STORAGE_BUFFER_VERTEX, // tess
79
BindingType::STORAGE_BUFFER_VERTEX,
80
BindingType::STORAGE_BUFFER_VERTEX,
81
};
82
83
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
84
VkDevice device = vulkan->GetDevice();
85
86
VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
87
pipelineLayout_ = renderManager->CreatePipelineLayout(bindingTypes, ARRAY_SIZE(bindingTypes), draw_->GetDeviceCaps().geometryShaderSupported, "drawengine_layout");
88
89
pushUBO_ = (VulkanPushPool *)draw_->GetNativeObject(Draw::NativeObject::PUSH_POOL);
90
pushVertex_ = new VulkanPushPool(vulkan, "pushVertex", 4 * 1024 * 1024, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
91
pushIndex_ = new VulkanPushPool(vulkan, "pushIndex", 1 * 512 * 1024, VK_BUFFER_USAGE_INDEX_BUFFER_BIT);
92
93
VkSamplerCreateInfo samp{ VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO };
94
samp.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
95
samp.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
96
samp.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE;
97
samp.magFilter = VK_FILTER_LINEAR;
98
samp.minFilter = VK_FILTER_LINEAR;
99
samp.maxLod = VK_LOD_CLAMP_NONE; // recommended by best practices, has no effect since we don't use mipmaps.
100
VkResult res = vkCreateSampler(device, &samp, nullptr, &samplerSecondaryLinear_);
101
samp.magFilter = VK_FILTER_NEAREST;
102
samp.minFilter = VK_FILTER_NEAREST;
103
res = vkCreateSampler(device, &samp, nullptr, &samplerSecondaryNearest_);
104
_dbg_assert_(VK_SUCCESS == res);
105
res = vkCreateSampler(device, &samp, nullptr, &nullSampler_);
106
_dbg_assert_(VK_SUCCESS == res);
107
108
tessDataTransferVulkan = new TessellationDataTransferVulkan(vulkan);
109
tessDataTransfer = tessDataTransferVulkan;
110
111
draw_->SetInvalidationCallback(std::bind(&DrawEngineVulkan::Invalidate, this, std::placeholders::_1));
112
}
113
114
DrawEngineVulkan::~DrawEngineVulkan() {
115
DestroyDeviceObjects();
116
}
117
118
void DrawEngineVulkan::DestroyDeviceObjects() {
119
if (!draw_) {
120
// We've already done this from LostDevice.
121
return;
122
}
123
124
VulkanContext *vulkan = (VulkanContext *)draw_->GetNativeObject(Draw::NativeObject::CONTEXT);
125
VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
126
127
draw_->SetInvalidationCallback(InvalidationCallback());
128
129
delete tessDataTransferVulkan;
130
tessDataTransfer = nullptr;
131
tessDataTransferVulkan = nullptr;
132
133
pushUBO_ = nullptr;
134
135
if (pushVertex_) {
136
pushVertex_->Destroy();
137
delete pushVertex_;
138
pushVertex_ = nullptr;
139
}
140
if (pushIndex_) {
141
pushIndex_->Destroy();
142
delete pushIndex_;
143
pushIndex_ = nullptr;
144
}
145
146
if (samplerSecondaryNearest_ != VK_NULL_HANDLE)
147
vulkan->Delete().QueueDeleteSampler(samplerSecondaryNearest_);
148
if (samplerSecondaryLinear_ != VK_NULL_HANDLE)
149
vulkan->Delete().QueueDeleteSampler(samplerSecondaryLinear_);
150
if (nullSampler_ != VK_NULL_HANDLE)
151
vulkan->Delete().QueueDeleteSampler(nullSampler_);
152
153
renderManager->DestroyPipelineLayout(pipelineLayout_);
154
}
155
156
void DrawEngineVulkan::DeviceLost() {
157
DestroyDeviceObjects();
158
DirtyAllUBOs();
159
draw_ = nullptr;
160
}
161
162
void DrawEngineVulkan::DeviceRestore(Draw::DrawContext *draw) {
163
draw_ = draw;
164
InitDeviceObjects();
165
}
166
167
void DrawEngineVulkan::BeginFrame() {
168
lastPipeline_ = nullptr;
169
170
// pushUBO is the thin3d push pool, don't need to BeginFrame again.
171
pushVertex_->BeginFrame();
172
pushIndex_->BeginFrame();
173
174
tessDataTransferVulkan->SetPushPool(pushUBO_);
175
176
DirtyAllUBOs();
177
}
178
179
void DrawEngineVulkan::EndFrame() {
180
stats_.pushVertexSpaceUsed = (int)pushVertex_->GetUsedThisFrame();
181
stats_.pushIndexSpaceUsed = (int)pushIndex_->GetUsedThisFrame();
182
}
183
184
void DrawEngineVulkan::DirtyAllUBOs() {
185
baseUBOOffset = 0;
186
lightUBOOffset = 0;
187
boneUBOOffset = 0;
188
baseBuf = VK_NULL_HANDLE;
189
lightBuf = VK_NULL_HANDLE;
190
boneBuf = VK_NULL_HANDLE;
191
dirtyUniforms_ = DIRTY_BASE_UNIFORMS | DIRTY_LIGHT_UNIFORMS | DIRTY_BONE_UNIFORMS;
192
imageView = VK_NULL_HANDLE;
193
sampler = VK_NULL_HANDLE;
194
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
195
}
196
197
void DrawEngineVulkan::Invalidate(InvalidationCallbackFlags flags) {
198
if (flags & InvalidationCallbackFlags::COMMAND_BUFFER_STATE) {
199
// Nothing here anymore (removed the "frame descriptor set"
200
// If we add back "seldomly-changing" descriptors, we might use this again.
201
}
202
if (flags & InvalidationCallbackFlags::RENDER_PASS_STATE) {
203
// If have a new render pass, dirty our dynamic state so it gets re-set.
204
//
205
// Dirty everything that has dynamic state that will need re-recording.
206
gstate_c.Dirty(DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_BLEND_STATE | DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
207
lastPipeline_ = nullptr;
208
}
209
}
210
211
// The inline wrapper in the header checks for numDrawCalls_ == 0
212
void DrawEngineVulkan::DoFlush() {
213
VulkanRenderManager *renderManager = (VulkanRenderManager *)draw_->GetNativeObject(Draw::NativeObject::RENDER_MANAGER);
214
215
PROFILE_THIS_SCOPE("Flush");
216
217
bool tess = gstate_c.submitType == SubmitType::HW_BEZIER || gstate_c.submitType == SubmitType::HW_SPLINE;
218
219
bool textureNeedsApply = false;
220
if (gstate_c.IsDirty(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS) && !gstate.isModeClear() && gstate.isTextureMapEnabled()) {
221
textureCache_->SetTexture();
222
gstate_c.Clean(DIRTY_TEXTURE_IMAGE | DIRTY_TEXTURE_PARAMS);
223
// NOTE: After this is set, we MUST call ApplyTexture before returning.
224
textureNeedsApply = true;
225
} else if (gstate.getTextureAddress(0) == (gstate.getFrameBufRawAddress() | 0x04000000)) {
226
// This catches the case of clearing a texture.
227
gstate_c.Dirty(DIRTY_TEXTURE_IMAGE);
228
}
229
230
GEPrimitiveType prim = prevPrim_;
231
232
// Always use software for flat shading to fix the provoking index
233
// if the provoking vertex extension is not available.
234
bool provokingVertexOk = (tess || gstate.getShadeMode() != GE_SHADE_FLAT);
235
if (renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) {
236
provokingVertexOk = true;
237
}
238
bool useHWTransform = CanUseHardwareTransform(prim) && provokingVertexOk;
239
240
uint32_t ibOffset;
241
uint32_t vbOffset;
242
243
// The optimization to avoid indexing isn't really worth it on Vulkan since it means creating more pipelines.
244
// This could be avoided with the new dynamic state extensions, but not available enough on mobile.
245
const bool forceIndexed = draw_->GetDeviceCaps().verySlowShaderCompiler;
246
247
if (useHWTransform) {
248
VkBuffer vbuf = VK_NULL_HANDLE;
249
VkBuffer ibuf = VK_NULL_HANDLE;
250
if (decOptions_.applySkinInDecode && (lastVType_ & GE_VTYPE_WEIGHT_MASK)) {
251
// If software skinning, we're predecoding into "decoded". So make sure we're done, then push that content.
252
DecodeVerts(decoded_);
253
VkDeviceSize size = numDecodedVerts_ * dec_->GetDecVtxFmt().stride;
254
u8 *dest = (u8 *)pushVertex_->Allocate(size, 4, &vbuf, &vbOffset);
255
memcpy(dest, decoded_, size);
256
} else {
257
// Figure out how much pushbuffer space we need to allocate.
258
int vertsToDecode = ComputeNumVertsToDecode();
259
// Decode directly into the pushbuffer
260
u8 *dest = pushVertex_->Allocate(vertsToDecode * dec_->GetDecVtxFmt().stride, 4, &vbuf, &vbOffset);
261
DecodeVerts(dest);
262
}
263
264
int vertexCount;
265
int maxIndex;
266
bool useElements;
267
DecodeIndsAndGetData(&prim, &vertexCount, &maxIndex, &useElements, false);
268
269
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
270
if (gstate.isModeThrough()) {
271
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
272
} else {
273
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
274
}
275
276
if (textureNeedsApply) {
277
textureCache_->ApplyTexture();
278
textureCache_->GetVulkanHandles(imageView, sampler);
279
if (imageView == VK_NULL_HANDLE)
280
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
281
if (sampler == VK_NULL_HANDLE)
282
sampler = nullSampler_;
283
}
284
285
if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE) || prim != lastPrim_) {
286
if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) {
287
ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
288
}
289
290
VulkanVertexShader *vshader = nullptr;
291
VulkanFragmentShader *fshader = nullptr;
292
VulkanGeometryShader *gshader = nullptr;
293
294
shaderManager_->GetShaders(prim, dec_, &vshader, &fshader, &gshader, pipelineState_, true, useHWTessellation_, decOptions_.expandAllWeightsToFloat, decOptions_.applySkinInDecode);
295
_dbg_assert_msg_(vshader->UseHWTransform(), "Bad vshader");
296
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, gshader, true, 0, framebufferManager_->GetMSAALevel(), false);
297
if (!pipeline || !pipeline->pipeline) {
298
// Already logged, let's bail out.
299
ResetAfterDraw();
300
return;
301
}
302
BindShaderBlendTex(); // This might cause copies so important to do before BindPipeline.
303
304
if (!renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_)) {
305
renderManager->ReportBadStateForDraw();
306
ResetAfterDraw();
307
return;
308
}
309
if (pipeline != lastPipeline_) {
310
if (lastPipeline_ && !(lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant())) {
311
gstate_c.Dirty(DIRTY_BLEND_STATE);
312
}
313
lastPipeline_ = pipeline;
314
}
315
ApplyDrawStateLate(renderManager, false, 0, pipeline->UsesBlendConstant());
316
gstate_c.Clean(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
317
gstate_c.Dirty(dirtyRequiresRecheck_);
318
dirtyRequiresRecheck_ = 0;
319
lastPipeline_ = pipeline;
320
}
321
lastPrim_ = prim;
322
323
dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
324
UpdateUBOs();
325
326
int descCount = 6;
327
if (tess)
328
descCount = 9;
329
int descSetIndex;
330
PackedDescriptor *descriptors = renderManager->PushDescriptorSet(descCount, &descSetIndex);
331
descriptors[0].image.view = imageView;
332
descriptors[0].image.sampler = sampler;
333
334
descriptors[1].image.view = boundSecondary_;
335
descriptors[1].image.sampler = samplerSecondaryNearest_;
336
337
descriptors[2].image.view = boundDepal_;
338
descriptors[2].image.sampler = (boundDepal_ && boundDepalSmoothed_) ? samplerSecondaryLinear_ : samplerSecondaryNearest_;
339
340
descriptors[3].buffer.buffer = baseBuf;
341
descriptors[3].buffer.range = sizeof(UB_VS_FS_Base);
342
descriptors[3].buffer.offset = 0;
343
344
descriptors[4].buffer.buffer = lightBuf;
345
descriptors[4].buffer.range = sizeof(UB_VS_Lights);
346
descriptors[4].buffer.offset = 0;
347
348
descriptors[5].buffer.buffer = boneBuf;
349
descriptors[5].buffer.range = sizeof(UB_VS_Bones);
350
descriptors[5].buffer.offset = 0;
351
if (tess) {
352
const VkDescriptorBufferInfo *bufInfo = tessDataTransferVulkan->GetBufferInfo();
353
for (int j = 0; j < 3; j++) {
354
descriptors[j + 6].buffer.buffer = bufInfo[j].buffer;
355
descriptors[j + 6].buffer.range = bufInfo[j].range;
356
descriptors[j + 6].buffer.offset = bufInfo[j].offset;
357
}
358
}
359
// TODO: Can we avoid binding all three when not needed? Same below for hardware transform.
360
// Think this will require different descriptor set layouts.
361
const uint32_t dynamicUBOOffsets[3] = {
362
baseUBOOffset, lightUBOOffset, boneUBOOffset,
363
};
364
if (useElements) {
365
if (!ibuf) {
366
ibOffset = (uint32_t)pushIndex_->Push(decIndex_, sizeof(uint16_t) * vertexCount, 4, &ibuf);
367
}
368
renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, vertexCount, 1);
369
} else {
370
renderManager->Draw(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, vertexCount);
371
}
372
} else {
373
PROFILE_THIS_SCOPE("soft");
374
if (!decOptions_.applySkinInDecode) {
375
decOptions_.applySkinInDecode = true;
376
lastVType_ |= (1 << 26);
377
dec_ = GetVertexDecoder(lastVType_);
378
}
379
int prevDecodedVerts = numDecodedVerts_;
380
381
DecodeVerts(decoded_);
382
int vertexCount = DecodeInds();
383
384
bool hasColor = (lastVType_ & GE_VTYPE_COL_MASK) != GE_VTYPE_COL_NONE;
385
if (gstate.isModeThrough()) {
386
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && (hasColor || gstate.getMaterialAmbientA() == 255);
387
} else {
388
gstate_c.vertexFullAlpha = gstate_c.vertexFullAlpha && ((hasColor && (gstate.materialupdate & 1)) || gstate.getMaterialAmbientA() == 255) && (!gstate.isLightingEnabled() || gstate.getAmbientA() == 255);
389
}
390
391
gpuStats.numUncachedVertsDrawn += vertexCount;
392
prim = IndexGenerator::GeneralPrim((GEPrimitiveType)drawInds_[0].prim);
393
394
// At this point, the output is always an index triangle/line/point list, no strips/fans.
395
396
u16 *inds = decIndex_;
397
SoftwareTransformResult result{};
398
SoftwareTransformParams params{};
399
params.decoded = decoded_;
400
params.transformed = transformed_;
401
params.transformedExpanded = transformedExpanded_;
402
params.fbman = framebufferManager_;
403
params.texCache = textureCache_;
404
// In Vulkan, we have to force drawing of primitives if !framebufferManager_->UseBufferedRendering() because Vulkan clears
405
// do not respect scissor rects.
406
params.allowClear = framebufferManager_->UseBufferedRendering();
407
params.allowSeparateAlphaClear = false;
408
409
if (gstate.getShadeMode() == GE_SHADE_FLAT) {
410
if (!renderManager->GetVulkanContext()->GetDeviceFeatures().enabled.provokingVertex.provokingVertexLast) {
411
// If we can't have the hardware do it, we need to rotate the index buffer to simulate a different provoking vertex.
412
// We do this before line expansion etc.
413
IndexBufferProvokingLastToFirst(prim, inds, vertexCount);
414
}
415
}
416
params.flippedY = true;
417
params.usesHalfZ = true;
418
419
// We need to update the viewport early because it's checked for flipping in SoftwareTransform.
420
// We don't have a "DrawStateEarly" in vulkan, so...
421
// TODO: Probably should eventually refactor this and feed the vp size into SoftwareTransform directly (Unknown's idea).
422
if (gstate_c.IsDirty(DIRTY_VIEWPORTSCISSOR_STATE)) {
423
ViewportAndScissor vpAndScissor;
424
ConvertViewportAndScissor(framebufferManager_->UseBufferedRendering(),
425
framebufferManager_->GetRenderWidth(), framebufferManager_->GetRenderHeight(),
426
framebufferManager_->GetTargetBufferWidth(), framebufferManager_->GetTargetBufferHeight(),
427
vpAndScissor);
428
UpdateCachedViewportState(vpAndScissor);
429
}
430
431
SoftwareTransform swTransform(params);
432
433
const Lin::Vec3 trans(gstate_c.vpXOffset, gstate_c.vpYOffset, gstate_c.vpZOffset * 0.5f + 0.5f);
434
const Lin::Vec3 scale(gstate_c.vpWidthScale, gstate_c.vpHeightScale, gstate_c.vpDepthScale * 0.5f);
435
swTransform.SetProjMatrix(gstate.projMatrix, gstate_c.vpWidth < 0, gstate_c.vpHeight < 0, trans, scale);
436
437
swTransform.Transform(prim, dec_->VertexType(), dec_->GetDecVtxFmt(), numDecodedVerts_, &result);
438
// Non-zero depth clears are unusual, but some drivers don't match drawn depth values to cleared values.
439
// Games sometimes expect exact matches (see #12626, for example) for equal comparisons.
440
if (result.action == SW_CLEAR && everUsedEqualDepth_ && gstate.isClearModeDepthMask() && result.depth > 0.0f && result.depth < 1.0f)
441
result.action = SW_NOT_READY;
442
443
if (result.action == SW_NOT_READY) {
444
// decIndex_ here is always equal to inds currently, but it may not be in the future.
445
swTransform.BuildDrawingParams(prim, vertexCount, dec_->VertexType(), inds, RemainingIndices(inds), numDecodedVerts_, VERTEX_BUFFER_MAX, &result);
446
}
447
448
if (result.setSafeSize)
449
framebufferManager_->SetSafeSize(result.safeWidth, result.safeHeight);
450
451
// Only here, where we know whether to clear or to draw primitives, should we actually set the current framebuffer! Because that gives use the opportunity
452
// to use a "pre-clear" render pass, for high efficiency on tilers.
453
if (result.action == SW_DRAW_INDEXED) {
454
if (textureNeedsApply) {
455
gstate_c.pixelMapped = result.pixelMapped;
456
textureCache_->ApplyTexture();
457
gstate_c.pixelMapped = false;
458
textureCache_->GetVulkanHandles(imageView, sampler);
459
if (imageView == VK_NULL_HANDLE)
460
imageView = (VkImageView)draw_->GetNativeObject(gstate_c.textureIsArray ? Draw::NativeObject::NULL_IMAGEVIEW_ARRAY : Draw::NativeObject::NULL_IMAGEVIEW);
461
if (sampler == VK_NULL_HANDLE)
462
sampler = nullSampler_;
463
}
464
if (!lastPipeline_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_VERTEXSHADER_STATE | DIRTY_FRAGMENTSHADER_STATE | DIRTY_GEOMETRYSHADER_STATE) || prim != lastPrim_) {
465
if (prim != lastPrim_ || gstate_c.IsDirty(DIRTY_BLEND_STATE | DIRTY_VIEWPORTSCISSOR_STATE | DIRTY_RASTER_STATE | DIRTY_DEPTHSTENCIL_STATE)) {
466
ConvertStateToVulkanKey(*framebufferManager_, shaderManager_, prim, pipelineKey_, dynState_);
467
}
468
469
VulkanVertexShader *vshader = nullptr;
470
VulkanFragmentShader *fshader = nullptr;
471
VulkanGeometryShader *gshader = nullptr;
472
473
shaderManager_->GetShaders(prim, dec_, &vshader, &fshader, &gshader, pipelineState_, false, false, decOptions_.expandAllWeightsToFloat, true);
474
_dbg_assert_msg_(!vshader->UseHWTransform(), "Bad vshader");
475
VulkanPipeline *pipeline = pipelineManager_->GetOrCreatePipeline(renderManager, pipelineLayout_, pipelineKey_, &dec_->decFmt, vshader, fshader, gshader, false, 0, framebufferManager_->GetMSAALevel(), false);
476
if (!pipeline || !pipeline->pipeline) {
477
// Already logged, let's bail out.
478
ResetAfterDraw();
479
return;
480
}
481
BindShaderBlendTex(); // This might cause copies so super important to do before BindPipeline.
482
483
if (!renderManager->BindPipeline(pipeline->pipeline, pipeline->pipelineFlags, pipelineLayout_)) {
484
renderManager->ReportBadStateForDraw();
485
ResetAfterDraw();
486
return;
487
}
488
if (pipeline != lastPipeline_) {
489
if (lastPipeline_ && !lastPipeline_->UsesBlendConstant() && pipeline->UsesBlendConstant()) {
490
gstate_c.Dirty(DIRTY_BLEND_STATE);
491
}
492
lastPipeline_ = pipeline;
493
}
494
ApplyDrawStateLate(renderManager, result.setStencil, result.stencilValue, pipeline->UsesBlendConstant());
495
gstate_c.Clean(DIRTY_BLEND_STATE | DIRTY_DEPTHSTENCIL_STATE | DIRTY_RASTER_STATE | DIRTY_VIEWPORTSCISSOR_STATE);
496
gstate_c.Dirty(dirtyRequiresRecheck_);
497
dirtyRequiresRecheck_ = 0;
498
lastPipeline_ = pipeline;
499
}
500
501
lastPrim_ = prim;
502
503
dirtyUniforms_ |= shaderManager_->UpdateUniforms(framebufferManager_->UseBufferedRendering());
504
505
// Even if the first draw is through-mode, make sure we at least have one copy of these uniforms buffered
506
UpdateUBOs();
507
508
int descCount = 6;
509
int descSetIndex;
510
PackedDescriptor *descriptors = renderManager->PushDescriptorSet(descCount, &descSetIndex);
511
descriptors[0].image.view = imageView;
512
descriptors[0].image.sampler = sampler;
513
descriptors[1].image.view = boundSecondary_;
514
descriptors[1].image.sampler = samplerSecondaryNearest_;
515
descriptors[2].image.view = boundDepal_;
516
descriptors[2].image.sampler = (boundDepal_ && boundDepalSmoothed_) ? samplerSecondaryLinear_ : samplerSecondaryNearest_;
517
descriptors[3].buffer.buffer = baseBuf;
518
descriptors[3].buffer.range = sizeof(UB_VS_FS_Base);
519
descriptors[4].buffer.buffer = lightBuf;
520
descriptors[4].buffer.range = sizeof(UB_VS_Lights);
521
descriptors[5].buffer.buffer = boneBuf;
522
descriptors[5].buffer.range = sizeof(UB_VS_Bones);
523
524
const uint32_t dynamicUBOOffsets[3] = {
525
baseUBOOffset, lightUBOOffset, boneUBOOffset,
526
};
527
528
PROFILE_THIS_SCOPE("renderman_q");
529
530
VkBuffer vbuf, ibuf;
531
vbOffset = (uint32_t)pushVertex_->Push(result.drawBuffer, numDecodedVerts_ * sizeof(TransformedVertex), 4, &vbuf);
532
ibOffset = (uint32_t)pushIndex_->Push(inds, sizeof(short) * result.drawNumTrans, 4, &ibuf);
533
renderManager->DrawIndexed(descSetIndex, ARRAY_SIZE(dynamicUBOOffsets), dynamicUBOOffsets, vbuf, vbOffset, ibuf, ibOffset, result.drawNumTrans, 1);
534
} else if (result.action == SW_CLEAR) {
535
// Note: we won't get here if the clear is alpha but not color, or color but not alpha.
536
bool clearColor = gstate.isClearModeColorMask();
537
bool clearAlpha = gstate.isClearModeAlphaMask(); // and stencil
538
bool clearDepth = gstate.isClearModeDepthMask();
539
int mask = 0;
540
// The Clear detection takes care of doing a regular draw instead if separate masking
541
// of color and alpha is needed, so we can just treat them as the same.
542
if (clearColor || clearAlpha) mask |= Draw::FBChannel::FB_COLOR_BIT;
543
if (clearDepth) mask |= Draw::FBChannel::FB_DEPTH_BIT;
544
if (clearAlpha) mask |= Draw::FBChannel::FB_STENCIL_BIT;
545
// Note that since the alpha channel and the stencil channel are shared on the PSP,
546
// when we clear alpha, we also clear stencil to the same value.
547
draw_->Clear(mask, result.color, result.depth, result.color >> 24);
548
if (clearColor || clearAlpha) {
549
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
550
}
551
if (gstate_c.Use(GPU_USE_CLEAR_RAM_HACK) && gstate.isClearModeColorMask() && (gstate.isClearModeAlphaMask() || gstate.FrameBufFormat() == GE_FORMAT_565)) {
552
int scissorX1 = gstate.getScissorX1();
553
int scissorY1 = gstate.getScissorY1();
554
int scissorX2 = gstate.getScissorX2() + 1;
555
int scissorY2 = gstate.getScissorY2() + 1;
556
framebufferManager_->ApplyClearToMemory(scissorX1, scissorY1, scissorX2, scissorY2, result.color);
557
}
558
}
559
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
560
}
561
562
ResetAfterDrawInline();
563
564
framebufferManager_->SetColorUpdated(gstate_c.skipDrawReason);
565
566
GPUDebug::NotifyDraw();
567
}
568
569
void DrawEngineVulkan::ResetAfterDraw() {
570
indexGen.Reset();
571
numDecodedVerts_ = 0;
572
numDrawVerts_ = 0;
573
numDrawInds_ = 0;
574
vertexCountInDrawCalls_ = 0;
575
decodeIndsCounter_ = 0;
576
decodeVertsCounter_ = 0;
577
decOptions_.applySkinInDecode = g_Config.bSoftwareSkinning;
578
gstate_c.vertexFullAlpha = true;
579
}
580
581
void DrawEngineVulkan::UpdateUBOs() {
582
if ((dirtyUniforms_ & DIRTY_BASE_UNIFORMS) || baseBuf == VK_NULL_HANDLE) {
583
baseUBOOffset = shaderManager_->PushBaseBuffer(pushUBO_, &baseBuf);
584
dirtyUniforms_ &= ~DIRTY_BASE_UNIFORMS;
585
}
586
if ((dirtyUniforms_ & DIRTY_LIGHT_UNIFORMS) || lightBuf == VK_NULL_HANDLE) {
587
lightUBOOffset = shaderManager_->PushLightBuffer(pushUBO_, &lightBuf);
588
dirtyUniforms_ &= ~DIRTY_LIGHT_UNIFORMS;
589
}
590
if ((dirtyUniforms_ & DIRTY_BONE_UNIFORMS) || boneBuf == VK_NULL_HANDLE) {
591
boneUBOOffset = shaderManager_->PushBoneBuffer(pushUBO_, &boneBuf);
592
dirtyUniforms_ &= ~DIRTY_BONE_UNIFORMS;
593
}
594
}
595
596
void TessellationDataTransferVulkan::SendDataToShader(const SimpleVertex *const *points, int size_u, int size_v, u32 vertType, const Spline::Weight2D &weights) {
597
// SSBOs that are not simply float1 or float2 need to be padded up to a float4 size. vec3 members
598
// also need to be 16-byte aligned, hence the padding.
599
struct TessData {
600
float pos[3]; float pad1;
601
float uv[2]; float pad2[2];
602
float color[4];
603
};
604
605
int size = size_u * size_v;
606
607
int ssboAlignment = vulkan_->GetPhysicalDeviceProperties().properties.limits.minStorageBufferOffsetAlignment;
608
uint8_t *data = (uint8_t *)push_->Allocate(size * sizeof(TessData), ssboAlignment, &bufInfo_[0].buffer, (uint32_t *)&bufInfo_[0].offset);
609
bufInfo_[0].range = size * sizeof(TessData);
610
611
float *pos = (float *)(data);
612
float *tex = (float *)(data + offsetof(TessData, uv));
613
float *col = (float *)(data + offsetof(TessData, color));
614
int stride = sizeof(TessData) / sizeof(float);
615
616
CopyControlPoints(pos, tex, col, stride, stride, stride, points, size, vertType);
617
618
using Spline::Weight;
619
620
// Weights U
621
data = (uint8_t *)push_->Allocate(weights.size_u * sizeof(Weight), ssboAlignment, &bufInfo_[1].buffer, (uint32_t *)&bufInfo_[1].offset);
622
memcpy(data, weights.u, weights.size_u * sizeof(Weight));
623
bufInfo_[1].range = weights.size_u * sizeof(Weight);
624
625
// Weights V
626
data = (uint8_t *)push_->Allocate(weights.size_v * sizeof(Weight), ssboAlignment, &bufInfo_[2].buffer, (uint32_t *)&bufInfo_[2].offset);
627
memcpy(data, weights.v, weights.size_v * sizeof(Weight));
628
bufInfo_[2].range = weights.size_v * sizeof(Weight);
629
}
630
631