Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
hrydgard
GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/GPU/Vulkan/VulkanQueueRunner.cpp
5654 views
1
#include <unordered_map>
2
3
#include "Common/GPU/DataFormat.h"
4
#include "Common/GPU/Vulkan/VulkanQueueRunner.h"
5
#include "Common/GPU/Vulkan/VulkanRenderManager.h"
6
#include "Common/Log.h"
7
#include "Common/TimeUtil.h"
8
9
using namespace PPSSPP_VK;
10
11
// Debug help: adb logcat -s DEBUG AndroidRuntime PPSSPPNativeActivity PPSSPP NativeGLView NativeRenderer NativeSurfaceView PowerSaveModeReceiver InputDeviceState PpssppActivity CameraHelper
12
13
static void MergeRenderAreaRectInto(VkRect2D *dest, const VkRect2D &src) {
14
if (dest->offset.x > src.offset.x) {
15
dest->extent.width += (dest->offset.x - src.offset.x);
16
dest->offset.x = src.offset.x;
17
}
18
if (dest->offset.y > src.offset.y) {
19
dest->extent.height += (dest->offset.y - src.offset.y);
20
dest->offset.y = src.offset.y;
21
}
22
if (dest->offset.x + dest->extent.width < src.offset.x + src.extent.width) {
23
dest->extent.width = src.offset.x + src.extent.width - dest->offset.x;
24
}
25
if (dest->offset.y + dest->extent.height < src.offset.y + src.extent.height) {
26
dest->extent.height = src.offset.y + src.extent.height - dest->offset.y;
27
}
28
}
29
30
// We need to take the "max" of the features used in the two render passes.
31
RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) {
32
// Either both are backbuffer type, or neither are.
33
// These can't merge with other renderpasses
34
if (a == RenderPassType::BACKBUFFER || b == RenderPassType::BACKBUFFER) {
35
_dbg_assert_(a == b);
36
return a;
37
}
38
39
_dbg_assert_((a & RenderPassType::MULTIVIEW) == (b & RenderPassType::MULTIVIEW));
40
41
// The rest we can just OR together to get the maximum feature set.
42
return (RenderPassType)((u32)a | (u32)b);
43
}
44
45
void VulkanQueueRunner::CreateDeviceObjects() {
46
INFO_LOG(Log::G3D, "VulkanQueueRunner::CreateDeviceObjects");
47
48
RPKey key{
49
VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR,
50
VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE,
51
};
52
compatibleRenderPass_ = GetRenderPass(key);
53
54
#if 0
55
// Just to check whether it makes sense to split some of these. drawidx is way bigger than the others...
56
// We should probably just move to variable-size data in a raw buffer anyway...
57
VkRenderData rd;
58
INFO_LOG(Log::G3D, "sizeof(pipeline): %d", (int)sizeof(rd.pipeline));
59
INFO_LOG(Log::G3D, "sizeof(draw): %d", (int)sizeof(rd.draw));
60
INFO_LOG(Log::G3D, "sizeof(drawidx): %d", (int)sizeof(rd.drawIndexed));
61
INFO_LOG(Log::G3D, "sizeof(clear): %d", (int)sizeof(rd.clear));
62
INFO_LOG(Log::G3D, "sizeof(viewport): %d", (int)sizeof(rd.viewport));
63
INFO_LOG(Log::G3D, "sizeof(scissor): %d", (int)sizeof(rd.scissor));
64
INFO_LOG(Log::G3D, "sizeof(blendColor): %d", (int)sizeof(rd.blendColor));
65
INFO_LOG(Log::G3D, "sizeof(push): %d", (int)sizeof(rd.push));
66
#endif
67
}
68
69
void VulkanQueueRunner::DestroyDeviceObjects() {
70
INFO_LOG(Log::G3D, "VulkanQueueRunner::DestroyDeviceObjects");
71
72
syncReadback_.Destroy(vulkan_);
73
74
renderPasses_.IterateMut([&](const RPKey &rpkey, VKRRenderPass *rp) {
75
_dbg_assert_(rp);
76
rp->Destroy(vulkan_);
77
delete rp;
78
});
79
renderPasses_.Clear();
80
}
81
82
bool VulkanQueueRunner::InitBackbufferFramebuffers(int width, int height, FrameDataShared &frameDataShared) {
83
VkResult res;
84
// We share the same depth buffer but have multiple color buffers, see the loop below.
85
VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
86
87
VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
88
fb_info.renderPass = GetCompatibleRenderPass()->Get(vulkan_, RenderPassType::BACKBUFFER, VK_SAMPLE_COUNT_1_BIT);
89
fb_info.attachmentCount = 2;
90
fb_info.pAttachments = attachments;
91
fb_info.width = width;
92
fb_info.height = height;
93
fb_info.layers = 1;
94
95
framebuffers_.resize(frameDataShared.swapchainImageCount_);
96
97
for (uint32_t i = 0; i < frameDataShared.swapchainImageCount_; i++) {
98
attachments[0] = frameDataShared.swapchainImages_[i].view;
99
res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
100
_dbg_assert_(res == VK_SUCCESS);
101
if (res != VK_SUCCESS) {
102
framebuffers_.clear();
103
return false;
104
}
105
}
106
107
return true;
108
}
109
110
bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd, VulkanBarrierBatch *barriers) {
111
const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
112
int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
113
VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
114
image_info.imageType = VK_IMAGE_TYPE_2D;
115
image_info.format = depth_format;
116
image_info.extent.width = vulkan_->GetBackbufferWidth();
117
image_info.extent.height = vulkan_->GetBackbufferHeight();
118
image_info.extent.depth = 1;
119
image_info.mipLevels = 1;
120
image_info.arrayLayers = 1;
121
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
122
image_info.queueFamilyIndexCount = 0;
123
image_info.pQueueFamilyIndices = nullptr;
124
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
125
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT;
126
image_info.flags = 0;
127
128
depth_.format = depth_format;
129
130
VmaAllocationCreateInfo allocCreateInfo{};
131
VmaAllocationInfo allocInfo{};
132
133
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
134
135
VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo);
136
_dbg_assert_(res == VK_SUCCESS);
137
if (res != VK_SUCCESS)
138
return false;
139
140
vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
141
142
VkImageMemoryBarrier *barrier = barriers->Add(depth_.image,
143
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
144
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, 0);
145
barrier->subresourceRange.aspectMask = aspectMask;
146
barrier->oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
147
barrier->newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
148
barrier->srcAccessMask = 0;
149
barrier->dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
150
151
VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
152
depth_view_info.image = depth_.image;
153
depth_view_info.format = depth_format;
154
depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
155
depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
156
depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
157
depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
158
depth_view_info.subresourceRange.aspectMask = aspectMask;
159
depth_view_info.subresourceRange.baseMipLevel = 0;
160
depth_view_info.subresourceRange.levelCount = 1;
161
depth_view_info.subresourceRange.baseArrayLayer = 0;
162
depth_view_info.subresourceRange.layerCount = 1;
163
depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
164
depth_view_info.flags = 0;
165
166
VkDevice device = vulkan_->GetDevice();
167
168
res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
169
vulkan_->SetDebugName(depth_.view, VK_OBJECT_TYPE_IMAGE_VIEW, "depth_stencil_backbuffer");
170
_dbg_assert_(res == VK_SUCCESS);
171
if (res != VK_SUCCESS)
172
return false;
173
174
return true;
175
}
176
177
void VulkanQueueRunner::DestroyBackBuffers() {
178
if (depth_.view) {
179
vulkan_->Delete().QueueDeleteImageView(depth_.view);
180
}
181
if (depth_.image) {
182
_dbg_assert_(depth_.alloc);
183
vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc);
184
}
185
depth_ = {};
186
for (uint32_t i = 0; i < framebuffers_.size(); i++) {
187
_dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
188
vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
189
}
190
framebuffers_.clear();
191
192
INFO_LOG(Log::G3D, "Backbuffers destroyed");
193
}
194
195
// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
196
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
197
VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
198
VKRRenderPass *foundPass;
199
if (renderPasses_.Get(key, &foundPass)) {
200
return foundPass;
201
}
202
203
VKRRenderPass *pass = new VKRRenderPass(key);
204
renderPasses_.Insert(key, pass);
205
return pass;
206
}
207
208
void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
209
// Optimizes renderpasses, then sequences them.
210
// Planned optimizations:
211
// * Create copies of render target that are rendered to multiple times and textured from in sequence, and push those render passes
212
// as early as possible in the frame (Wipeout billboards). This will require taking over more of descriptor management so we can
213
// substitute descriptors, alternatively using texture array layers creatively.
214
215
for (int j = 0; j < (int)steps.size(); j++) {
216
if (steps[j]->stepType == VKRStepType::RENDER &&
217
steps[j]->render.framebuffer) {
218
if (steps[j]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
219
steps[j]->render.finalColorLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
220
}
221
if (steps[j]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
222
steps[j]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
223
}
224
}
225
}
226
227
for (int j = 0; j < (int)steps.size() - 1; j++) {
228
// Push down empty "Clear/Store" renderpasses, and merge them with the first "Load/Store" to the same framebuffer.
229
if (steps.size() > 1 && steps[j]->stepType == VKRStepType::RENDER &&
230
steps[j]->render.numDraws == 0 &&
231
steps[j]->render.numReads == 0 &&
232
steps[j]->render.colorLoad == VKRRenderPassLoadAction::CLEAR &&
233
steps[j]->render.stencilLoad == VKRRenderPassLoadAction::CLEAR &&
234
steps[j]->render.depthLoad == VKRRenderPassLoadAction::CLEAR) {
235
236
// Drop the clear step, and merge it into the next step that touches the same framebuffer.
237
for (int i = j + 1; i < (int)steps.size(); i++) {
238
if (steps[i]->stepType == VKRStepType::RENDER &&
239
steps[i]->render.framebuffer == steps[j]->render.framebuffer) {
240
if (steps[i]->render.colorLoad != VKRRenderPassLoadAction::CLEAR) {
241
steps[i]->render.colorLoad = VKRRenderPassLoadAction::CLEAR;
242
steps[i]->render.clearColor = steps[j]->render.clearColor;
243
}
244
if (steps[i]->render.depthLoad != VKRRenderPassLoadAction::CLEAR) {
245
steps[i]->render.depthLoad = VKRRenderPassLoadAction::CLEAR;
246
steps[i]->render.clearDepth = steps[j]->render.clearDepth;
247
}
248
if (steps[i]->render.stencilLoad != VKRRenderPassLoadAction::CLEAR) {
249
steps[i]->render.stencilLoad = VKRRenderPassLoadAction::CLEAR;
250
steps[i]->render.clearStencil = steps[j]->render.clearStencil;
251
}
252
MergeRenderAreaRectInto(&steps[i]->render.renderArea, steps[j]->render.renderArea);
253
steps[i]->render.renderPassType = MergeRPTypes(steps[i]->render.renderPassType, steps[j]->render.renderPassType);
254
steps[i]->render.numDraws += steps[j]->render.numDraws;
255
steps[i]->render.numReads += steps[j]->render.numReads;
256
// Cheaply skip the first step.
257
steps[j]->stepType = VKRStepType::RENDER_SKIP;
258
break;
259
} else if (steps[i]->stepType == VKRStepType::COPY &&
260
steps[i]->copy.src == steps[j]->render.framebuffer) {
261
// Can't eliminate the clear if a game copies from it before it's
262
// rendered to. However this should be rare.
263
// TODO: This should never happen when we check numReads now.
264
break;
265
}
266
}
267
}
268
}
269
270
// Queue hacks.
271
if (hacksEnabled_) {
272
if (hacksEnabled_ & QUEUE_HACK_MGS2_ACID) {
273
// Massive speedup due to re-ordering.
274
ApplyMGSHack(steps);
275
}
276
if (hacksEnabled_ & QUEUE_HACK_SONIC) {
277
ApplySonicHack(steps);
278
}
279
if (hacksEnabled_ & QUEUE_HACK_RENDERPASS_MERGE) {
280
ApplyRenderPassMerge(steps);
281
}
282
}
283
}
284
285
void VulkanQueueRunner::RunSteps(std::vector<VKRStep *> &steps, int curFrame, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps) {
286
QueueProfileContext *profile = frameData.profile.enabled ? &frameData.profile : nullptr;
287
288
if (profile)
289
profile->cpuStartTime = time_now_d();
290
291
bool emitLabels = vulkan_->Extensions().EXT_debug_utils;
292
293
VkCommandBuffer cmd = frameData.hasPresentCommands ? frameData.presentCmd : frameData.mainCmd;
294
295
for (size_t i = 0; i < steps.size(); i++) {
296
const VKRStep &step = *steps[i];
297
if (emitLabels) {
298
VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
299
char temp[128];
300
if (step.stepType == VKRStepType::RENDER && step.render.framebuffer) {
301
snprintf(temp, sizeof(temp), "%s: %s", step.tag, step.render.framebuffer->Tag());
302
labelInfo.pLabelName = temp;
303
} else {
304
labelInfo.pLabelName = step.tag;
305
}
306
vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo);
307
}
308
309
switch (step.stepType) {
310
case VKRStepType::RENDER:
311
{
312
bool perform = true;
313
if (!step.render.framebuffer) {
314
if (emitLabels) {
315
vkCmdEndDebugUtilsLabelEXT(cmd);
316
}
317
frameData.Submit(vulkan_, FrameSubmitType::Pending, frameDataShared);
318
319
// If the window is minimized and we don't have a swap chain, don't bother.
320
if (frameDataShared.swapchainImageCount_ > 0) {
321
// When stepping in the GE debugger, we can end up here multiple times in a "frame".
322
// So only acquire once.
323
if (!frameData.hasAcquired) {
324
frameData.AcquireNextImage(vulkan_);
325
if (frameData.hasAcquired && frameData.curSwapchainImage != (uint32_t)-1) {
326
SetBackbuffer(framebuffers_[frameData.curSwapchainImage], frameDataShared.swapchainImages_[frameData.curSwapchainImage].image);
327
}
328
}
329
330
if (!frameData.hasPresentCommands) {
331
// A RENDER step rendering to the backbuffer is normally the last step that happens in a frame,
332
// unless taking a screenshot, in which case there might be a READBACK_IMAGE after it.
333
// This is why we have to switch cmd to presentCmd, in this case.
334
VkCommandBufferBeginInfo begin{VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO};
335
begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
336
vkBeginCommandBuffer(frameData.presentCmd, &begin);
337
frameData.hasPresentCommands = true;
338
}
339
cmd = frameData.presentCmd;
340
if (emitLabels) {
341
VkDebugUtilsLabelEXT labelInfo{VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT};
342
labelInfo.pLabelName = "present";
343
vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo);
344
}
345
} else {
346
perform = false;
347
}
348
}
349
if (perform) {
350
PerformRenderPass(step, cmd, curFrame, frameData.profile);
351
} else {
352
frameData.skipSwap = true;
353
}
354
break;
355
}
356
case VKRStepType::COPY:
357
PerformCopy(step, cmd);
358
break;
359
case VKRStepType::BLIT:
360
PerformBlit(step, cmd);
361
break;
362
case VKRStepType::READBACK:
363
PerformReadback(step, cmd, frameData);
364
break;
365
case VKRStepType::READBACK_IMAGE:
366
PerformReadbackImage(step, cmd);
367
break;
368
case VKRStepType::RENDER_SKIP:
369
break;
370
default:
371
UNREACHABLE();
372
break;
373
}
374
375
if (profile && profile->timestampsEnabled && profile->timestampDescriptions.size() + 1 < MAX_TIMESTAMP_QUERIES) {
376
vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile->queryPool, (uint32_t)profile->timestampDescriptions.size());
377
profile->timestampDescriptions.push_back(StepToString(vulkan_, step));
378
}
379
380
if (emitLabels) {
381
vkCmdEndDebugUtilsLabelEXT(cmd);
382
}
383
}
384
385
// Deleting all in one go should be easier on the instruction cache than deleting
386
// them as we go - and easier to debug because we can look backwards in the frame.
387
if (!keepSteps) {
388
for (auto step : steps) {
389
delete step;
390
}
391
steps.clear();
392
}
393
394
if (profile)
395
profile->cpuEndTime = time_now_d();
396
}
397
398
void VulkanQueueRunner::ApplyMGSHack(std::vector<VKRStep *> &steps) {
399
// Really need a sane way to express transforms of steps.
400
401
// We want to turn a sequence of copy,render(1),copy,render(1),copy,render(1) to copy,copy,copy,render(n).
402
403
// TODO: Where does this first part trigger? The below depal part triggers reliably in Acid2.
404
405
for (int i = 0; i < (int)steps.size() - 3; i++) {
406
int last = -1;
407
if (!(steps[i]->stepType == VKRStepType::COPY &&
408
steps[i + 1]->stepType == VKRStepType::RENDER &&
409
steps[i + 2]->stepType == VKRStepType::COPY &&
410
steps[i + 1]->render.numDraws == 1 &&
411
steps[i]->copy.dst == steps[i + 2]->copy.dst))
412
continue;
413
// Looks promising! Let's start by finding the last one.
414
for (int j = i; j < (int)steps.size(); j++) {
415
switch (steps[j]->stepType) {
416
case VKRStepType::RENDER:
417
if (steps[j]->render.numDraws > 1)
418
last = j - 1;
419
// should really also check descriptor sets...
420
if (steps[j]->commands.size()) {
421
const VkRenderData &cmd = steps[j]->commands.back();
422
if (cmd.cmd == VKRRenderCommand::DRAW_INDEXED && cmd.draw.count != 6)
423
last = j - 1;
424
}
425
break;
426
case VKRStepType::COPY:
427
if (steps[j]->copy.dst != steps[i]->copy.dst)
428
last = j - 1;
429
break;
430
default:
431
break;
432
}
433
if (last != -1)
434
break;
435
}
436
437
if (last != -1) {
438
// We've got a sequence from i to last that needs reordering.
439
// First, let's sort it, keeping the same length.
440
std::vector<VKRStep *> copies;
441
std::vector<VKRStep *> renders;
442
copies.reserve((last - i) / 2);
443
renders.reserve((last - i) / 2);
444
for (int n = i; n <= last; n++) {
445
if (steps[n]->stepType == VKRStepType::COPY)
446
copies.push_back(steps[n]);
447
else if (steps[n]->stepType == VKRStepType::RENDER)
448
renders.push_back(steps[n]);
449
}
450
// Write the copies back. TODO: Combine them too.
451
for (int j = 0; j < (int)copies.size(); j++) {
452
steps[i + j] = copies[j];
453
}
454
455
const int firstRender = i + (int)copies.size();
456
457
// Write the renders back (so they will be deleted properly).
458
for (int j = 0; j < (int)renders.size(); j++) {
459
steps[firstRender + j] = renders[j];
460
}
461
_assert_(steps[firstRender]->stepType == VKRStepType::RENDER);
462
// Combine the renders.
463
for (int j = 1; j < (int)renders.size(); j++) {
464
steps[firstRender]->commands.reserve(renders[j]->commands.size());
465
for (int k = 0; k < (int)renders[j]->commands.size(); k++) {
466
steps[firstRender]->commands.push_back(renders[j]->commands[k]);
467
}
468
MergeRenderAreaRectInto(&steps[firstRender]->render.renderArea, renders[j]->render.renderArea);
469
// Easier than removing them from the list, though that might be the better option.
470
steps[firstRender + j]->stepType = VKRStepType::RENDER_SKIP;
471
steps[firstRender + j]->commands.clear();
472
}
473
// We're done.
474
// INFO_LOG(Log::G3D, "MGS HACK part 1: copies: %d renders: %d", (int)copies.size(), (int)renders.size());
475
break;
476
}
477
}
478
479
// There's also a post processing effect using depals that's just brutal in some parts
480
// of the game.
481
for (int i = 0; i < (int)steps.size() - 3; i++) {
482
int last = -1;
483
if (!(steps[i]->stepType == VKRStepType::RENDER &&
484
steps[i + 1]->stepType == VKRStepType::RENDER &&
485
steps[i + 2]->stepType == VKRStepType::RENDER &&
486
steps[i]->render.numDraws == 1 &&
487
steps[i + 1]->render.numDraws == 1 &&
488
steps[i + 2]->render.numDraws == 1 &&
489
steps[i]->render.colorLoad == VKRRenderPassLoadAction::DONT_CARE &&
490
steps[i + 1]->render.colorLoad == VKRRenderPassLoadAction::KEEP &&
491
steps[i + 2]->render.colorLoad == VKRRenderPassLoadAction::DONT_CARE)) {
492
continue;
493
}
494
VKRFramebuffer *depalFramebuffer = steps[i]->render.framebuffer;
495
VKRFramebuffer *targetFramebuffer = steps[i + 1]->render.framebuffer;
496
// OK, found the start of a post-process sequence. Let's scan until we find the end.
497
for (int j = i; j < (int)steps.size() - 3; j++) {
498
if (((j - i) & 1) == 0) {
499
// This should be a depal draw.
500
if (steps[j]->render.numDraws != 1)
501
break;
502
if (steps[j]->commands.size() > 5) // TODO: Not the greatest heuristic! This may change if we merge commands.
503
break;
504
if (steps[j]->render.colorLoad != VKRRenderPassLoadAction::DONT_CARE)
505
break;
506
if (steps[j]->render.framebuffer != depalFramebuffer)
507
break;
508
last = j;
509
} else {
510
// This should be a target draw.
511
if (steps[j]->render.numDraws != 1)
512
break;
513
if (steps[j]->commands.size() > 5) // TODO: Not the greatest heuristic! This may change if we merge commands.
514
break;
515
if (steps[j]->render.colorLoad != VKRRenderPassLoadAction::KEEP)
516
break;
517
if (steps[j]->render.framebuffer != targetFramebuffer)
518
break;
519
last = j;
520
}
521
}
522
523
if (last == -1)
524
continue;
525
526
if (last > 479) {
527
// Avoid some problems with the hack (oil slick crash). Some additional commands get added there that
528
// confuses this merging. NOTE: This is not really a solution! See #20306.
529
last = 479;
530
}
531
532
int minScissorX = 10000;
533
int minScissorY = 10000;
534
int maxScissorX = 0;
535
int maxScissorY = 0;
536
537
// Combine the depal renders. Also record scissor bounds.
538
for (int j = i + 2; j <= last + 1; j += 2) {
539
for (int k = 0; k < (int)steps[j]->commands.size(); k++) {
540
switch (steps[j]->commands[k].cmd) {
541
case VKRRenderCommand::DRAW:
542
case VKRRenderCommand::DRAW_INDEXED:
543
steps[i]->commands.push_back(steps[j]->commands[k]);
544
break;
545
case VKRRenderCommand::SCISSOR:
546
{
547
// TODO: Merge scissor rectangles.
548
const auto &rc = steps[j]->commands[k].scissor.scissor;
549
if (rc.offset.x < minScissorX) {
550
minScissorX = rc.offset.x;
551
}
552
if (rc.offset.y < minScissorY) {
553
minScissorY = rc.offset.y;
554
}
555
if (rc.offset.x + (int)rc.extent.width > maxScissorX) {
556
maxScissorX = rc.offset.x + rc.extent.width;
557
}
558
if (rc.offset.y + (int)rc.extent.height > maxScissorY) {
559
maxScissorY = rc.offset.y + rc.extent.height;
560
}
561
break;
562
}
563
default:
564
break;
565
}
566
}
567
MergeRenderAreaRectInto(&steps[i]->render.renderArea, steps[j]->render.renderArea);
568
steps[j]->stepType = VKRStepType::RENDER_SKIP;
569
}
570
571
// Update the scissor in the first draw.
572
minScissorX = std::max(0, minScissorX);
573
minScissorY = std::max(0, minScissorY);
574
if (maxScissorX > minScissorX && maxScissorY > minScissorY) {
575
for (int k = 0; k < steps[i]->commands.size(); k++) {
576
if (steps[i]->commands[k].cmd == VKRRenderCommand::SCISSOR) {
577
auto &rc = steps[i]->commands[k].scissor.scissor;
578
rc.offset.x = minScissorX;
579
rc.offset.y = minScissorY;
580
rc.extent.width = maxScissorX - minScissorX;
581
rc.extent.height = maxScissorY - minScissorY;
582
break;
583
}
584
}
585
}
586
587
// Combine the target renders.
588
for (int j = i + 3; j <= last; j += 2) {
589
for (int k = 0; k < (int)steps[j]->commands.size(); k++) {
590
switch (steps[j]->commands[k].cmd) {
591
case VKRRenderCommand::DRAW:
592
case VKRRenderCommand::DRAW_INDEXED:
593
steps[i + 1]->commands.push_back(steps[j]->commands[k]);
594
break;
595
default:
596
break;
597
}
598
}
599
MergeRenderAreaRectInto(&steps[i + 1]->render.renderArea, steps[j]->render.renderArea);
600
steps[j]->stepType = VKRStepType::RENDER_SKIP;
601
}
602
603
// INFO_LOG(Log::G3D, "MGS HACK part 2: %d-%d : %d (total steps: %d)", i, last, (last - i), (int)steps.size());
604
605
// We're done - we only expect one of these sequences per frame.
606
break;
607
}
608
}
609
610
void VulkanQueueRunner::ApplySonicHack(std::vector<VKRStep *> &steps) {
611
// We want to turn a sequence of render(3),render(1),render(6),render(1),render(6),render(1),render(3) to
612
// render(1), render(1), render(1), render(6), render(6), render(6)
613
614
for (int i = 0; i < (int)steps.size() - 4; i++) {
615
int last = -1;
616
if (!(steps[i]->stepType == VKRStepType::RENDER &&
617
steps[i + 1]->stepType == VKRStepType::RENDER &&
618
steps[i + 2]->stepType == VKRStepType::RENDER &&
619
steps[i + 3]->stepType == VKRStepType::RENDER &&
620
steps[i]->render.numDraws == 3 &&
621
steps[i + 1]->render.numDraws == 1 &&
622
steps[i + 2]->render.numDraws == 6 &&
623
steps[i + 3]->render.numDraws == 1 &&
624
steps[i]->render.framebuffer == steps[i + 2]->render.framebuffer &&
625
steps[i + 1]->render.framebuffer == steps[i + 3]->render.framebuffer))
626
continue;
627
// Looks promising! Let's start by finding the last one.
628
for (int j = i; j < (int)steps.size(); j++) {
629
switch (steps[j]->stepType) {
630
case VKRStepType::RENDER:
631
if ((j - i) & 1) {
632
if (steps[j]->render.framebuffer != steps[i + 1]->render.framebuffer)
633
last = j - 1;
634
if (steps[j]->render.numDraws != 1)
635
last = j - 1;
636
} else {
637
if (steps[j]->render.framebuffer != steps[i]->render.framebuffer)
638
last = j - 1;
639
if (steps[j]->render.numDraws != 3 && steps[j]->render.numDraws != 6)
640
last = j - 1;
641
}
642
break;
643
default:
644
break;
645
}
646
if (last != -1)
647
break;
648
}
649
650
if (last != -1) {
651
// We've got a sequence from i to last that needs reordering.
652
// First, let's sort it, keeping the same length.
653
std::vector<VKRStep *> type1;
654
std::vector<VKRStep *> type2;
655
type1.reserve((last - i) / 2);
656
type2.reserve((last - i) / 2);
657
for (int n = i; n <= last; n++) {
658
if (steps[n]->render.framebuffer == steps[i]->render.framebuffer)
659
type1.push_back(steps[n]);
660
else
661
type2.push_back(steps[n]);
662
}
663
664
// Write the renders back in order. Same amount, so deletion will work fine.
665
for (int j = 0; j < (int)type1.size(); j++) {
666
steps[i + j] = type1[j];
667
}
668
for (int j = 0; j < (int)type2.size(); j++) {
669
steps[i + j + type1.size()] = type2[j];
670
}
671
672
// Combine the renders.
673
for (int j = 1; j < (int)type1.size(); j++) {
674
for (int k = 0; k < (int)type1[j]->commands.size(); k++) {
675
steps[i]->commands.push_back(type1[j]->commands[k]);
676
}
677
steps[i + j]->stepType = VKRStepType::RENDER_SKIP;
678
}
679
for (int j = 1; j < (int)type2.size(); j++) {
680
for (int k = 0; k < (int)type2[j]->commands.size(); k++) {
681
steps[i + type1.size()]->commands.push_back(type2[j]->commands[k]);
682
}
683
// Technically, should merge render area here, but they're all the same so not needed.
684
steps[i + type1.size() + j]->stepType = VKRStepType::RENDER_SKIP;
685
}
686
// We're done.
687
break;
688
}
689
}
690
}
691
692
const char *AspectToString(VkImageAspectFlags aspect) {
693
switch (aspect) {
694
case VK_IMAGE_ASPECT_COLOR_BIT: return "COLOR";
695
case VK_IMAGE_ASPECT_DEPTH_BIT: return "DEPTH";
696
case VK_IMAGE_ASPECT_STENCIL_BIT: return "STENCIL";
697
case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: return "DEPTHSTENCIL";
698
default: return "UNUSUAL";
699
}
700
}
701
702
std::string VulkanQueueRunner::StepToString(VulkanContext *vulkan, const VKRStep &step) {
703
char buffer[256];
704
switch (step.stepType) {
705
case VKRStepType::RENDER:
706
{
707
int w = step.render.framebuffer ? step.render.framebuffer->width : vulkan->GetBackbufferWidth();
708
int h = step.render.framebuffer ? step.render.framebuffer->height : vulkan->GetBackbufferHeight();
709
int actual_w = step.render.renderArea.extent.width;
710
int actual_h = step.render.renderArea.extent.height;
711
const char *renderCmd = GetRPTypeName(step.render.renderPassType);
712
snprintf(buffer, sizeof(buffer), "%s %s %s (draws: %d, %dx%d/%dx%d)", renderCmd, step.tag, step.render.framebuffer ? step.render.framebuffer->Tag() : "", step.render.numDraws, actual_w, actual_h, w, h);
713
break;
714
}
715
case VKRStepType::COPY:
716
snprintf(buffer, sizeof(buffer), "COPY '%s' %s -> %s (%dx%d, %s)", step.tag, step.copy.src->Tag(), step.copy.dst->Tag(), step.copy.srcRect.extent.width, step.copy.srcRect.extent.height, AspectToString(step.copy.aspectMask));
717
break;
718
case VKRStepType::BLIT:
719
snprintf(buffer, sizeof(buffer), "BLIT '%s' %s -> %s (%dx%d->%dx%d, %s)", step.tag, step.copy.src->Tag(), step.copy.dst->Tag(), step.blit.srcRect.extent.width, step.blit.srcRect.extent.height, step.blit.dstRect.extent.width, step.blit.dstRect.extent.height, AspectToString(step.blit.aspectMask));
720
break;
721
case VKRStepType::READBACK:
722
snprintf(buffer, sizeof(buffer), "READBACK '%s' %s (%dx%d, %s)", step.tag, step.readback.src ? step.readback.src->Tag() : "(backbuffer)", step.readback.srcRect.extent.width, step.readback.srcRect.extent.height, AspectToString(step.readback.aspectMask));
723
break;
724
case VKRStepType::READBACK_IMAGE:
725
snprintf(buffer, sizeof(buffer), "READBACK_IMAGE '%s' (%dx%d)", step.tag, step.readback_image.srcRect.extent.width, step.readback_image.srcRect.extent.height);
726
break;
727
case VKRStepType::RENDER_SKIP:
728
snprintf(buffer, sizeof(buffer), "(RENDER_SKIP) %s", step.tag);
729
break;
730
default:
731
buffer[0] = 0;
732
break;
733
}
734
return std::string(buffer);
735
}
736
737
// Ideally, this should be cheap enough to be applied to all games. At least on mobile, it's pretty
738
// much a guaranteed neutral or win in terms of GPU power. However, dependency calculation really
739
// must be perfect!
740
void VulkanQueueRunner::ApplyRenderPassMerge(std::vector<VKRStep *> &steps) {
741
// First let's count how many times each framebuffer is rendered to.
742
// If it's more than one, let's do our best to merge them. This can help God of War quite a bit.
743
std::unordered_map<VKRFramebuffer *, int> counts;
744
for (int i = 0; i < (int)steps.size(); i++) {
745
if (steps[i]->stepType == VKRStepType::RENDER) {
746
counts[steps[i]->render.framebuffer]++;
747
}
748
}
749
750
auto mergeRenderSteps = [](VKRStep *dst, VKRStep *src) {
751
// OK. Now, if it's a render, slurp up all the commands and kill the step.
752
// Also slurp up any pretransitions.
753
dst->preTransitions.append(src->preTransitions);
754
dst->commands.insert(dst->commands.end(), src->commands.begin(), src->commands.end());
755
MergeRenderAreaRectInto(&dst->render.renderArea, src->render.renderArea);
756
// So we don't consider it for other things, maybe doesn't matter.
757
src->dependencies.clear();
758
src->stepType = VKRStepType::RENDER_SKIP;
759
dst->render.numDraws += src->render.numDraws;
760
dst->render.numReads += src->render.numReads;
761
dst->render.pipelineFlags |= src->render.pipelineFlags;
762
dst->render.renderPassType = MergeRPTypes(dst->render.renderPassType, src->render.renderPassType);
763
};
764
auto renderHasClear = [](const VKRStep *step) {
765
const auto &r = step->render;
766
return r.colorLoad == VKRRenderPassLoadAction::CLEAR || r.depthLoad == VKRRenderPassLoadAction::CLEAR || r.stencilLoad == VKRRenderPassLoadAction::CLEAR;
767
};
768
769
// Now, let's go through the steps. If we find one that is rendered to more than once,
770
// we'll scan forward and slurp up any rendering that can be merged across.
771
for (int i = 0; i < (int)steps.size(); i++) {
772
if (steps[i]->stepType == VKRStepType::RENDER && counts[steps[i]->render.framebuffer] > 1) {
773
auto fb = steps[i]->render.framebuffer;
774
TinySet<VKRFramebuffer *, 8> touchedFramebuffers; // must be the same fast-size as the dependencies TinySet for annoying reasons.
775
for (int j = i + 1; j < (int)steps.size(); j++) {
776
// If any other passes are reading from this framebuffer as-is, we cancel the scan.
777
if (steps[j]->dependencies.contains(fb)) {
778
// Reading from itself means a KEEP, which is okay.
779
if (steps[j]->stepType != VKRStepType::RENDER || steps[j]->render.framebuffer != fb)
780
break;
781
}
782
switch (steps[j]->stepType) {
783
case VKRStepType::RENDER:
784
if (steps[j]->render.framebuffer == fb) {
785
// Prevent Unknown's example case from https://github.com/hrydgard/ppsspp/pull/12242
786
if (renderHasClear(steps[j]) || steps[j]->dependencies.contains(touchedFramebuffers)) {
787
goto done_fb;
788
} else {
789
// Safe to merge, great.
790
mergeRenderSteps(steps[i], steps[j]);
791
}
792
} else {
793
// Remember the framebuffer this wrote to. We can't merge with later passes that depend on these.
794
touchedFramebuffers.insert(steps[j]->render.framebuffer);
795
}
796
break;
797
case VKRStepType::COPY:
798
if (steps[j]->copy.dst == fb) {
799
// Without framebuffer "renaming", we can't merge past a clobbered fb.
800
goto done_fb;
801
}
802
touchedFramebuffers.insert(steps[j]->copy.dst);
803
break;
804
case VKRStepType::BLIT:
805
if (steps[j]->blit.dst == fb) {
806
// Without framebuffer "renaming", we can't merge past a clobbered fb.
807
goto done_fb;
808
}
809
touchedFramebuffers.insert(steps[j]->blit.dst);
810
break;
811
case VKRStepType::READBACK:
812
// Not sure this has much effect, when executed READBACK is always the last step
813
// since we stall the GPU and wait immediately after.
814
break;
815
case VKRStepType::RENDER_SKIP:
816
case VKRStepType::READBACK_IMAGE:
817
break;
818
default:
819
// We added a new step? Might be unsafe.
820
_dbg_assert_(false);
821
goto done_fb;
822
}
823
}
824
done_fb:
825
;
826
}
827
}
828
}
829
830
void VulkanQueueRunner::LogSteps(const std::vector<VKRStep *> &steps, bool verbose) {
831
INFO_LOG(Log::G3D, "=================== FRAME ====================");
832
for (size_t i = 0; i < steps.size(); i++) {
833
const VKRStep &step = *steps[i];
834
switch (step.stepType) {
835
case VKRStepType::RENDER:
836
LogRenderPass(step, verbose);
837
break;
838
case VKRStepType::COPY:
839
LogCopy(step);
840
break;
841
case VKRStepType::BLIT:
842
LogBlit(step);
843
break;
844
case VKRStepType::READBACK:
845
LogReadback(step);
846
break;
847
case VKRStepType::READBACK_IMAGE:
848
LogReadbackImage(step);
849
break;
850
case VKRStepType::RENDER_SKIP:
851
INFO_LOG(Log::G3D, "(skipped render pass)");
852
break;
853
}
854
}
855
INFO_LOG(Log::G3D, "------------------- SUBMIT ------------------");
856
}
857
858
const char *RenderPassActionName(VKRRenderPassLoadAction a) {
859
switch (a) {
860
case VKRRenderPassLoadAction::CLEAR:
861
return "CLEAR";
862
case VKRRenderPassLoadAction::DONT_CARE:
863
return "DONT_CARE";
864
case VKRRenderPassLoadAction::KEEP:
865
return "KEEP";
866
}
867
return "?";
868
}
869
870
const char *ImageLayoutToString(VkImageLayout layout) {
871
switch (layout) {
872
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: return "COLOR_ATTACHMENT";
873
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: return "DEPTH_STENCIL_ATTACHMENT";
874
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: return "SHADER_READ_ONLY";
875
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: return "TRANSFER_SRC";
876
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: return "TRANSFER_DST";
877
case VK_IMAGE_LAYOUT_GENERAL: return "GENERAL";
878
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: return "PRESENT_SRC_KHR";
879
case VK_IMAGE_LAYOUT_UNDEFINED: return "UNDEFINED";
880
default: return "(unknown)";
881
}
882
}
883
884
void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) {
885
const auto &r = pass.render;
886
const char *framebuf = r.framebuffer ? r.framebuffer->Tag() : "backbuffer";
887
int w = r.framebuffer ? r.framebuffer->width : vulkan_->GetBackbufferWidth();
888
int h = r.framebuffer ? r.framebuffer->height : vulkan_->GetBackbufferHeight();
889
890
INFO_LOG(Log::G3D, "RENDER %s Begin(%s, draws: %d, %dx%d, %s, %s, %s)", pass.tag, framebuf, r.numDraws, w, h, RenderPassActionName(r.colorLoad), RenderPassActionName(r.depthLoad), RenderPassActionName(r.stencilLoad));
891
// TODO: Log these in detail.
892
for (int i = 0; i < (int)pass.preTransitions.size(); i++) {
893
INFO_LOG(Log::G3D, " PRETRANSITION: %s %s -> %s", pass.preTransitions[i].fb->Tag(), AspectToString(pass.preTransitions[i].aspect), ImageLayoutToString(pass.preTransitions[i].targetLayout));
894
}
895
896
if (verbose) {
897
for (auto &cmd : pass.commands) {
898
switch (cmd.cmd) {
899
case VKRRenderCommand::REMOVED:
900
INFO_LOG(Log::G3D, " (Removed)");
901
break;
902
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
903
INFO_LOG(Log::G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline);
904
break;
905
case VKRRenderCommand::BLEND:
906
INFO_LOG(Log::G3D, " BlendColor(%08x)", cmd.blendColor.color);
907
break;
908
case VKRRenderCommand::CLEAR:
909
INFO_LOG(Log::G3D, " Clear");
910
break;
911
case VKRRenderCommand::DRAW:
912
INFO_LOG(Log::G3D, " Draw(%d)", cmd.draw.count);
913
break;
914
case VKRRenderCommand::DRAW_INDEXED:
915
INFO_LOG(Log::G3D, " DrawIndexed(%d)", cmd.drawIndexed.count);
916
break;
917
case VKRRenderCommand::SCISSOR:
918
INFO_LOG(Log::G3D, " Scissor(%d, %d, %d, %d)", (int)cmd.scissor.scissor.offset.x, (int)cmd.scissor.scissor.offset.y, (int)cmd.scissor.scissor.extent.width, (int)cmd.scissor.scissor.extent.height);
919
break;
920
case VKRRenderCommand::STENCIL:
921
INFO_LOG(Log::G3D, " Stencil(ref=%d, compare=%d, write=%d)", cmd.stencil.stencilRef, cmd.stencil.stencilCompareMask, cmd.stencil.stencilWriteMask);
922
break;
923
case VKRRenderCommand::VIEWPORT:
924
INFO_LOG(Log::G3D, " Viewport(%f, %f, %f, %f, %f, %f)", cmd.viewport.vp.x, cmd.viewport.vp.y, cmd.viewport.vp.width, cmd.viewport.vp.height, cmd.viewport.vp.minDepth, cmd.viewport.vp.maxDepth);
925
break;
926
case VKRRenderCommand::PUSH_CONSTANTS:
927
INFO_LOG(Log::G3D, " PushConstants(%d)", cmd.push.size);
928
break;
929
case VKRRenderCommand::DEBUG_ANNOTATION:
930
INFO_LOG(Log::G3D, " DebugAnnotation(%s)", cmd.debugAnnotation.annotation);
931
break;
932
933
case VKRRenderCommand::NUM_RENDER_COMMANDS:
934
break;
935
}
936
}
937
}
938
939
INFO_LOG(Log::G3D, " Final: %s %s", ImageLayoutToString(pass.render.finalColorLayout), ImageLayoutToString(pass.render.finalDepthStencilLayout));
940
INFO_LOG(Log::G3D, "RENDER End(%s) - %d commands executed", framebuf, (int)pass.commands.size());
941
}
942
943
void VulkanQueueRunner::LogCopy(const VKRStep &step) {
944
INFO_LOG(Log::G3D, "%s", StepToString(vulkan_, step).c_str());
945
}
946
947
void VulkanQueueRunner::LogBlit(const VKRStep &step) {
948
INFO_LOG(Log::G3D, "%s", StepToString(vulkan_, step).c_str());
949
}
950
951
void VulkanQueueRunner::LogReadback(const VKRStep &step) {
952
INFO_LOG(Log::G3D, "%s", StepToString(vulkan_, step).c_str());
953
}
954
955
void VulkanQueueRunner::LogReadbackImage(const VKRStep &step) {
956
INFO_LOG(Log::G3D, "%s", StepToString(vulkan_, step).c_str());
957
}
958
959
void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer cmd, int curFrame, QueueProfileContext &profile) {
960
for (size_t i = 0; i < step.preTransitions.size(); i++) {
961
const TransitionRequest &iter = step.preTransitions[i];
962
if (iter.aspect == VK_IMAGE_ASPECT_COLOR_BIT && iter.fb->color.layout != iter.targetLayout) {
963
recordBarrier_.TransitionColorImageAuto(
964
&iter.fb->color,
965
iter.targetLayout
966
);
967
} else if (iter.fb->depth.image != VK_NULL_HANDLE && (iter.aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) && iter.fb->depth.layout != iter.targetLayout) {
968
recordBarrier_.TransitionDepthStencilImageAuto(
969
&iter.fb->depth,
970
iter.targetLayout
971
);
972
}
973
}
974
975
// Don't execute empty renderpasses that keep the contents.
976
if (step.commands.empty() && step.render.colorLoad == VKRRenderPassLoadAction::KEEP && step.render.depthLoad == VKRRenderPassLoadAction::KEEP && step.render.stencilLoad == VKRRenderPassLoadAction::KEEP) {
977
// Flush the pending barrier
978
recordBarrier_.Flush(cmd);
979
// Nothing to do.
980
// TODO: Though - a later step might have used this step's finalColorLayout etc to get things in a layout it expects.
981
// Should we just do a barrier? Or just let the later step deal with not having things in its preferred layout, like now?
982
return;
983
}
984
985
// Write-after-write hazards. Fixed flicker in God of War on ARM (before we added another fix that removed these).
986
// NOTE: These are commented out because the normal barriers no longer check for equality, effectively generating these
987
// barriers automatically. This is safe, but sometimes I think can be improved on.
988
/*
989
if (step.render.framebuffer) {
990
int n = 0;
991
int stage = 0;
992
993
if (step.render.framebuffer->color.layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
994
recordBarrier_.TransitionImage(
995
step.render.framebuffer->color.image,
996
0,
997
1,
998
step.render.framebuffer->numLayers,
999
VK_IMAGE_ASPECT_COLOR_BIT,
1000
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1001
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1002
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1003
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT,
1004
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
1005
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
1006
);
1007
}
1008
if (step.render.framebuffer->depth.image != VK_NULL_HANDLE && step.render.framebuffer->depth.layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
1009
recordBarrier_.TransitionImage(
1010
step.render.framebuffer->depth.image,
1011
0,
1012
1,
1013
step.render.framebuffer->numLayers,
1014
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
1015
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1016
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1017
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
1018
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT,
1019
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
1020
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
1021
);
1022
}
1023
}*/
1024
1025
// This chooses a render pass according to the load/store attachment state. We no longer transition
1026
// image layouts as part of the passes.
1027
//
1028
// NOTE: Unconditionally flushes recordBarrier_.
1029
VKRRenderPass *renderPass = PerformBindFramebufferAsRenderTarget(step, cmd);
1030
1031
int curWidth = step.render.framebuffer ? step.render.framebuffer->width : vulkan_->GetBackbufferWidth();
1032
int curHeight = step.render.framebuffer ? step.render.framebuffer->height : vulkan_->GetBackbufferHeight();
1033
1034
VKRFramebuffer *fb = step.render.framebuffer;
1035
1036
VKRGraphicsPipeline *lastGraphicsPipeline = nullptr;
1037
VKRComputePipeline *lastComputePipeline = nullptr;
1038
1039
const auto &commands = step.commands;
1040
1041
// We can do a little bit of state tracking here to eliminate some calls into the driver.
1042
// The stencil ones are very commonly mostly redundant so let's eliminate them where possible.
1043
// Might also want to consider scissor and viewport.
1044
VkPipeline lastPipeline = VK_NULL_HANDLE;
1045
FastVec<PendingDescSet> *descSets = nullptr;
1046
VkPipelineLayout pipelineLayout = VK_NULL_HANDLE;
1047
1048
bool pipelineOK = false;
1049
1050
int lastStencilWriteMask = -1;
1051
int lastStencilCompareMask = -1;
1052
int lastStencilReference = -1;
1053
1054
const RenderPassType rpType = step.render.renderPassType;
1055
1056
for (size_t i = 0; i < commands.size(); i++) {
1057
const VkRenderData &c = commands[i];
1058
#ifdef _DEBUG
1059
if (profile.enabled) {
1060
if ((size_t)step.stepType < ARRAY_SIZE(profile.commandCounts)) {
1061
profile.commandCounts[(size_t)c.cmd]++;
1062
}
1063
}
1064
#endif
1065
switch (c.cmd) {
1066
case VKRRenderCommand::REMOVED:
1067
break;
1068
1069
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
1070
{
1071
VKRGraphicsPipeline *graphicsPipeline = c.graphics_pipeline.pipeline;
1072
if (graphicsPipeline != lastGraphicsPipeline) {
1073
VkSampleCountFlagBits fbSampleCount = fb ? fb->sampleCount : VK_SAMPLE_COUNT_1_BIT;
1074
1075
if (RenderPassTypeHasMultisample(rpType) && fbSampleCount != graphicsPipeline->SampleCount()) {
1076
// should have been invalidated.
1077
_assert_msg_(graphicsPipeline->SampleCount() == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM,
1078
"expected %d sample count, got %d", fbSampleCount, graphicsPipeline->SampleCount());
1079
}
1080
1081
VkPipeline pipeline;
1082
1083
{
1084
std::lock_guard<std::mutex> lock(graphicsPipeline->mutex_);
1085
if (!graphicsPipeline->pipeline[(size_t)rpType]) {
1086
// NOTE: If render steps got merged, it can happen that, as they ended during recording,
1087
// they didn't know their final render pass type so they created the wrong pipelines in EndCurRenderStep().
1088
// Unfortunately I don't know if we can fix it in any more sensible place than here.
1089
// Maybe a middle pass. But let's try to just block and compile here for now, this doesn't
1090
// happen all that much.
1091
graphicsPipeline->pipeline[(size_t)rpType] = Promise<VkPipeline>::CreateEmpty();
1092
graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType, fbSampleCount), rpType, fbSampleCount, time_now_d(), -1);
1093
}
1094
pipeline = graphicsPipeline->pipeline[(size_t)rpType]->BlockUntilReady();
1095
}
1096
1097
if (pipeline != VK_NULL_HANDLE) {
1098
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
1099
descSets = &c.graphics_pipeline.pipelineLayout->frameData[curFrame].descSets_;
1100
pipelineLayout = c.graphics_pipeline.pipelineLayout->pipelineLayout;
1101
_dbg_assert_(pipelineLayout != VK_NULL_HANDLE);
1102
lastGraphicsPipeline = graphicsPipeline;
1103
pipelineOK = true;
1104
} else {
1105
pipelineOK = false;
1106
}
1107
1108
// Reset dynamic state so it gets refreshed with the new pipeline.
1109
lastStencilWriteMask = -1;
1110
lastStencilCompareMask = -1;
1111
lastStencilReference = -1;
1112
}
1113
break;
1114
}
1115
1116
case VKRRenderCommand::VIEWPORT:
1117
if (fb != nullptr) {
1118
vkCmdSetViewport(cmd, 0, 1, &c.viewport.vp);
1119
} else {
1120
const VkViewport &vp = c.viewport.vp;
1121
DisplayRect<float> rc{ vp.x, vp.y, vp.width, vp.height };
1122
RotateRectToDisplay(rc, (float)vulkan_->GetBackbufferWidth(), (float)vulkan_->GetBackbufferHeight());
1123
VkViewport final_vp;
1124
final_vp.x = rc.x;
1125
final_vp.y = rc.y;
1126
final_vp.width = rc.w;
1127
final_vp.height = rc.h;
1128
final_vp.maxDepth = vp.maxDepth;
1129
final_vp.minDepth = vp.minDepth;
1130
vkCmdSetViewport(cmd, 0, 1, &final_vp);
1131
}
1132
break;
1133
1134
case VKRRenderCommand::SCISSOR:
1135
{
1136
if (fb != nullptr) {
1137
vkCmdSetScissor(cmd, 0, 1, &c.scissor.scissor);
1138
} else {
1139
// Rendering to backbuffer. Might need to rotate.
1140
const VkRect2D &rc = c.scissor.scissor;
1141
DisplayRect<int> rotated_rc{ rc.offset.x, rc.offset.y, (int)rc.extent.width, (int)rc.extent.height };
1142
RotateRectToDisplay(rotated_rc, vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
1143
_dbg_assert_(rotated_rc.x >= 0);
1144
_dbg_assert_(rotated_rc.y >= 0);
1145
VkRect2D finalRect = VkRect2D{ { rotated_rc.x, rotated_rc.y }, { (uint32_t)rotated_rc.w, (uint32_t)rotated_rc.h} };
1146
vkCmdSetScissor(cmd, 0, 1, &finalRect);
1147
}
1148
break;
1149
}
1150
1151
case VKRRenderCommand::BLEND:
1152
{
1153
float bc[4];
1154
Uint8x4ToFloat4(bc, c.blendColor.color);
1155
vkCmdSetBlendConstants(cmd, bc);
1156
break;
1157
}
1158
1159
case VKRRenderCommand::PUSH_CONSTANTS:
1160
if (pipelineOK) {
1161
vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data);
1162
}
1163
break;
1164
1165
case VKRRenderCommand::STENCIL:
1166
if (lastStencilWriteMask != c.stencil.stencilWriteMask) {
1167
lastStencilWriteMask = (int)c.stencil.stencilWriteMask;
1168
vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, c.stencil.stencilWriteMask);
1169
}
1170
if (lastStencilCompareMask != c.stencil.stencilCompareMask) {
1171
lastStencilCompareMask = c.stencil.stencilCompareMask;
1172
vkCmdSetStencilCompareMask(cmd, VK_STENCIL_FRONT_AND_BACK, c.stencil.stencilCompareMask);
1173
}
1174
if (lastStencilReference != c.stencil.stencilRef) {
1175
lastStencilReference = c.stencil.stencilRef;
1176
vkCmdSetStencilReference(cmd, VK_STENCIL_FRONT_AND_BACK, c.stencil.stencilRef);
1177
}
1178
break;
1179
1180
case VKRRenderCommand::DRAW_INDEXED:
1181
if (pipelineOK) {
1182
VkDescriptorSet set = (*descSets)[c.drawIndexed.descSetIndex].set;
1183
_dbg_assert_(set != VK_NULL_HANDLE);
1184
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.drawIndexed.numUboOffsets, c.drawIndexed.uboOffsets);
1185
vkCmdBindIndexBuffer(cmd, c.drawIndexed.ibuffer, c.drawIndexed.ioffset, VK_INDEX_TYPE_UINT16);
1186
VkDeviceSize voffset = c.drawIndexed.voffset;
1187
vkCmdBindVertexBuffers(cmd, 0, 1, &c.drawIndexed.vbuffer, &voffset);
1188
vkCmdDrawIndexed(cmd, c.drawIndexed.count, c.drawIndexed.instances, 0, 0, 0);
1189
}
1190
break;
1191
1192
case VKRRenderCommand::DRAW:
1193
if (pipelineOK) {
1194
VkDescriptorSet set = (*descSets)[c.drawIndexed.descSetIndex].set;
1195
_dbg_assert_(set != VK_NULL_HANDLE);
1196
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.draw.numUboOffsets, c.draw.uboOffsets);
1197
if (c.draw.vbuffer) {
1198
vkCmdBindVertexBuffers(cmd, 0, 1, &c.draw.vbuffer, &c.draw.voffset);
1199
}
1200
vkCmdDraw(cmd, c.draw.count, 1, c.draw.offset, 0);
1201
}
1202
break;
1203
1204
case VKRRenderCommand::CLEAR:
1205
{
1206
// If we get here, we failed to merge a clear into a render pass load op. This is bad for perf.
1207
int numAttachments = 0;
1208
VkClearRect rc{};
1209
rc.baseArrayLayer = 0;
1210
rc.layerCount = 1; // In multiview mode, 1 means to replicate to all the active layers.
1211
rc.rect.extent.width = (uint32_t)curWidth;
1212
rc.rect.extent.height = (uint32_t)curHeight;
1213
VkClearAttachment attachments[2]{};
1214
if (c.clear.clearMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1215
VkClearAttachment &attachment = attachments[numAttachments++];
1216
attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1217
attachment.colorAttachment = 0;
1218
Uint8x4ToFloat4(attachment.clearValue.color.float32, c.clear.clearColor);
1219
}
1220
if (c.clear.clearMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1221
VkClearAttachment &attachment = attachments[numAttachments++];
1222
attachment.aspectMask = 0;
1223
if (c.clear.clearMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1224
attachment.clearValue.depthStencil.depth = c.clear.clearZ;
1225
attachment.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1226
}
1227
if (c.clear.clearMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
1228
attachment.clearValue.depthStencil.stencil = (uint32_t)c.clear.clearStencil;
1229
attachment.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1230
}
1231
}
1232
if (numAttachments) {
1233
vkCmdClearAttachments(cmd, numAttachments, attachments, 1, &rc);
1234
}
1235
break;
1236
}
1237
1238
case VKRRenderCommand::DEBUG_ANNOTATION:
1239
if (vulkan_->Extensions().EXT_debug_utils) {
1240
VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
1241
labelInfo.pLabelName = c.debugAnnotation.annotation;
1242
vkCmdInsertDebugUtilsLabelEXT(cmd, &labelInfo);
1243
}
1244
break;
1245
1246
default:
1247
UNREACHABLE();
1248
break;
1249
}
1250
}
1251
vkCmdEndRenderPass(cmd);
1252
1253
_dbg_assert_(recordBarrier_.empty());
1254
1255
if (fb) {
1256
// If the desired final layout aren't the optimal layout needed next, early-transition the image.
1257
if (step.render.finalColorLayout != fb->color.layout) {
1258
recordBarrier_.TransitionColorImageAuto(&fb->color, step.render.finalColorLayout);
1259
}
1260
if (fb->depth.image && step.render.finalDepthStencilLayout != fb->depth.layout) {
1261
recordBarrier_.TransitionDepthStencilImageAuto(&fb->depth, step.render.finalDepthStencilLayout);
1262
}
1263
}
1264
}
1265
1266
VKRRenderPass *VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step, VkCommandBuffer cmd) {
1267
VKRRenderPass *renderPass;
1268
int numClearVals = 0;
1269
VkClearValue clearVal[4]{};
1270
VkFramebuffer framebuf;
1271
int w;
1272
int h;
1273
1274
bool hasDepth = RenderPassTypeHasDepth(step.render.renderPassType);
1275
1276
VkSampleCountFlagBits sampleCount;
1277
1278
// Can be used to separate the final*Layout barrier from the rest for debugging in renderdoc.
1279
// recordBarrier_.Flush(cmd);
1280
1281
if (step.render.framebuffer) {
1282
_dbg_assert_(step.render.finalColorLayout != VK_IMAGE_LAYOUT_UNDEFINED);
1283
_dbg_assert_(step.render.finalDepthStencilLayout != VK_IMAGE_LAYOUT_UNDEFINED);
1284
1285
RPKey key{
1286
step.render.colorLoad, step.render.depthLoad, step.render.stencilLoad,
1287
step.render.colorStore, step.render.depthStore, step.render.stencilStore,
1288
};
1289
renderPass = GetRenderPass(key);
1290
1291
VKRFramebuffer *fb = step.render.framebuffer;
1292
framebuf = fb->Get(renderPass, step.render.renderPassType);
1293
sampleCount = fb->sampleCount;
1294
_dbg_assert_(framebuf != VK_NULL_HANDLE);
1295
w = fb->width;
1296
h = fb->height;
1297
1298
// Mali driver on S8 (Android O) and S9 mishandles renderpasses that do just a clear
1299
// and then no draw calls. Memory transaction elimination gets mis-flagged or something.
1300
// To avoid this, we transition to GENERAL and back in this case (ARM-approved workaround).
1301
// See pull request #10723.
1302
bool maliBugWorkaround = step.render.numDraws == 0 &&
1303
step.render.colorLoad == VKRRenderPassLoadAction::CLEAR &&
1304
vulkan_->GetPhysicalDeviceProperties().properties.driverVersion == 0xaa9c4b29;
1305
if (maliBugWorkaround) {
1306
// A little suboptimal but let's go for maximum safety here.
1307
recordBarrier_.TransitionImage(fb->color.image, 0, 1, fb->numLayers, VK_IMAGE_ASPECT_COLOR_BIT,
1308
fb->color.layout, VK_IMAGE_LAYOUT_GENERAL,
1309
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1310
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1311
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
1312
fb->color.layout = VK_IMAGE_LAYOUT_GENERAL;
1313
}
1314
1315
recordBarrier_.TransitionColorImageAuto(&fb->color, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
1316
1317
// If the render pass doesn't touch depth, we can avoid a layout transition of the depth buffer.
1318
if (fb->depth.image && RenderPassTypeHasDepth(step.render.renderPassType)) {
1319
recordBarrier_.TransitionDepthStencilImageAuto(&fb->depth, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
1320
}
1321
1322
// The transition from the optimal format happens after EndRenderPass, now that we don't
1323
// do it as part of the renderpass itself anymore.
1324
1325
if (sampleCount != VK_SAMPLE_COUNT_1_BIT) {
1326
// We don't initialize values for these.
1327
numClearVals = hasDepth ? 2 : 1; // Skip the resolve buffers, don't need to clear those.
1328
}
1329
if (step.render.colorLoad == VKRRenderPassLoadAction::CLEAR) {
1330
Uint8x4ToFloat4(clearVal[numClearVals].color.float32, step.render.clearColor);
1331
}
1332
numClearVals++;
1333
if (hasDepth) {
1334
if (step.render.depthLoad == VKRRenderPassLoadAction::CLEAR || step.render.stencilLoad == VKRRenderPassLoadAction::CLEAR) {
1335
clearVal[numClearVals].depthStencil.depth = step.render.clearDepth;
1336
clearVal[numClearVals].depthStencil.stencil = step.render.clearStencil;
1337
}
1338
numClearVals++;
1339
}
1340
_dbg_assert_(numClearVals != 3);
1341
} else {
1342
RPKey key{
1343
VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR,
1344
VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE,
1345
};
1346
renderPass = GetRenderPass(key);
1347
framebuf = backbuffer_;
1348
1349
// Raw, rotated backbuffer size.
1350
w = vulkan_->GetBackbufferWidth();
1351
h = vulkan_->GetBackbufferHeight();
1352
1353
Uint8x4ToFloat4(clearVal[0].color.float32, step.render.clearColor);
1354
numClearVals = hasDepth ? 2 : 1; // We might do depth-less backbuffer in the future, though doubtful of the value.
1355
clearVal[1].depthStencil.depth = 0.0f;
1356
clearVal[1].depthStencil.stencil = 0;
1357
sampleCount = VK_SAMPLE_COUNT_1_BIT;
1358
}
1359
1360
VkRenderPassBeginInfo rp_begin = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO };
1361
rp_begin.renderPass = renderPass->Get(vulkan_, step.render.renderPassType, sampleCount);
1362
rp_begin.framebuffer = framebuf;
1363
1364
VkRect2D rc = step.render.renderArea;
1365
if (!step.render.framebuffer) {
1366
// Rendering to backbuffer, must rotate, just like scissors.
1367
DisplayRect<int> rotated_rc{ rc.offset.x, rc.offset.y, (int)rc.extent.width, (int)rc.extent.height };
1368
RotateRectToDisplay(rotated_rc, vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
1369
1370
rc.offset.x = rotated_rc.x;
1371
rc.offset.y = rotated_rc.y;
1372
rc.extent.width = rotated_rc.w;
1373
rc.extent.height = rotated_rc.h;
1374
}
1375
1376
recordBarrier_.Flush(cmd);
1377
1378
rp_begin.renderArea = rc;
1379
rp_begin.clearValueCount = numClearVals;
1380
rp_begin.pClearValues = numClearVals ? clearVal : nullptr;
1381
vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
1382
1383
return renderPass;
1384
}
1385
1386
void VulkanQueueRunner::PerformCopy(const VKRStep &step, VkCommandBuffer cmd) {
1387
VKRFramebuffer *src = step.copy.src;
1388
VKRFramebuffer *dst = step.copy.dst;
1389
1390
int layerCount = std::min(step.copy.src->numLayers, step.copy.dst->numLayers);
1391
_dbg_assert_(step.copy.src->numLayers >= step.copy.dst->numLayers);
1392
1393
// TODO: If dst covers exactly the whole destination, we can set up a UNDEFINED->TRANSFER_DST_OPTIMAL transition,
1394
// which can potentially be more efficient.
1395
1396
const VkImageLayout srcTransferLayout = src != dst ? VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL : VK_IMAGE_LAYOUT_GENERAL;
1397
1398
if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1399
recordBarrier_.TransitionColorImageAuto(&src->color, srcTransferLayout);
1400
if (src != dst) {
1401
recordBarrier_.TransitionColorImageAuto(&dst->color, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1402
}
1403
}
1404
1405
// We can't copy only depth or only stencil unfortunately - or can we?.
1406
if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1407
_dbg_assert_(src->depth.image != VK_NULL_HANDLE);
1408
recordBarrier_.TransitionDepthStencilImageAuto(&src->depth, srcTransferLayout);
1409
if (src != dst) {
1410
if (dst->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
1411
recordBarrier_.TransitionDepthStencilImageAuto(&dst->depth, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1412
} else {
1413
// Kingdom Hearts: Subsequent copies twice to the same depth buffer without any other use.
1414
// Not super sure how that happens, but we need a barrier to pass sync validation.
1415
SetupTransferDstWriteAfterWrite(dst->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_);
1416
}
1417
}
1418
}
1419
1420
bool multisampled = src->sampleCount != VK_SAMPLE_COUNT_1_BIT && dst->sampleCount != VK_SAMPLE_COUNT_1_BIT;
1421
if (multisampled) {
1422
// If both the targets are multisampled, copy the msaa targets too.
1423
// For that, we need to transition them from their normally permanent VK_*_ATTACHMENT_OPTIMAL layouts, and then back.
1424
if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1425
recordBarrier_.TransitionColorImageAuto(&src->msaaColor, srcTransferLayout);
1426
if (src != dst) {
1427
recordBarrier_.TransitionColorImageAuto(&dst->msaaColor, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1428
}
1429
}
1430
if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1431
recordBarrier_.TransitionDepthStencilImageAuto(&src->msaaDepth, srcTransferLayout);
1432
if (src != dst) {
1433
// Kingdom Hearts: Subsequent copies to the same depth buffer without any other use.
1434
// Not super sure how that happens, but we need a barrier to pass sync validation.
1435
recordBarrier_.TransitionDepthStencilImageAuto(&dst->msaaDepth, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1436
}
1437
}
1438
}
1439
1440
recordBarrier_.Flush(cmd);
1441
1442
VkImageCopy copy{};
1443
copy.srcOffset.x = step.copy.srcRect.offset.x;
1444
copy.srcOffset.y = step.copy.srcRect.offset.y;
1445
copy.srcOffset.z = 0;
1446
copy.srcSubresource.mipLevel = 0;
1447
copy.srcSubresource.layerCount = layerCount;
1448
copy.dstOffset.x = step.copy.dstPos.x;
1449
copy.dstOffset.y = step.copy.dstPos.y;
1450
copy.dstOffset.z = 0;
1451
copy.dstSubresource.mipLevel = 0;
1452
copy.dstSubresource.layerCount = layerCount;
1453
copy.extent.width = step.copy.srcRect.extent.width;
1454
copy.extent.height = step.copy.srcRect.extent.height;
1455
copy.extent.depth = 1;
1456
1457
if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1458
copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1459
copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1460
vkCmdCopyImage(cmd, src->color.image, src->color.layout, dst->color.image, dst->color.layout, 1, &copy);
1461
1462
if (multisampled) {
1463
vkCmdCopyImage(cmd, src->msaaColor.image, src->msaaColor.layout, dst->msaaColor.image, dst->msaaColor.layout, 1, &copy);
1464
}
1465
}
1466
if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1467
_dbg_assert_(src->depth.image != VK_NULL_HANDLE);
1468
_dbg_assert_(dst->depth.image != VK_NULL_HANDLE);
1469
copy.srcSubresource.aspectMask = step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
1470
copy.dstSubresource.aspectMask = step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
1471
vkCmdCopyImage(cmd, src->depth.image, src->depth.layout, dst->depth.image, dst->depth.layout, 1, &copy);
1472
1473
if (multisampled) {
1474
vkCmdCopyImage(cmd, src->msaaDepth.image, src->msaaDepth.layout, dst->msaaDepth.image, dst->msaaDepth.layout, 1, &copy);
1475
}
1476
}
1477
1478
if (multisampled) {
1479
// Transition the MSAA surfaces back to optimal.
1480
if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1481
recordBarrier_.TransitionImage(
1482
src->msaaColor.image,
1483
0,
1484
1,
1485
src->msaaColor.numLayers,
1486
VK_IMAGE_ASPECT_COLOR_BIT,
1487
srcTransferLayout,
1488
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1489
VK_ACCESS_TRANSFER_READ_BIT,
1490
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1491
VK_PIPELINE_STAGE_TRANSFER_BIT,
1492
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
1493
);
1494
src->msaaColor.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1495
if (src != dst) {
1496
recordBarrier_.TransitionImage(
1497
dst->msaaColor.image,
1498
0,
1499
1,
1500
dst->msaaColor.numLayers,
1501
VK_IMAGE_ASPECT_COLOR_BIT,
1502
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1503
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1504
VK_ACCESS_TRANSFER_WRITE_BIT,
1505
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1506
VK_PIPELINE_STAGE_TRANSFER_BIT,
1507
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
1508
);
1509
dst->msaaColor.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1510
}
1511
}
1512
if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1513
recordBarrier_.TransitionImage(
1514
src->msaaDepth.image,
1515
0,
1516
1,
1517
src->msaaDepth.numLayers,
1518
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
1519
srcTransferLayout,
1520
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1521
VK_ACCESS_TRANSFER_READ_BIT,
1522
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
1523
VK_PIPELINE_STAGE_TRANSFER_BIT,
1524
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
1525
);
1526
src->msaaDepth.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1527
if (src != dst) {
1528
recordBarrier_.TransitionImage(
1529
dst->msaaDepth.image,
1530
0,
1531
1,
1532
dst->msaaDepth.numLayers,
1533
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
1534
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1535
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1536
VK_ACCESS_TRANSFER_WRITE_BIT,
1537
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
1538
VK_PIPELINE_STAGE_TRANSFER_BIT,
1539
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
1540
);
1541
dst->msaaDepth.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1542
}
1543
}
1544
// Probably not necessary.
1545
recordBarrier_.Flush(cmd);
1546
}
1547
}
1548
1549
void VulkanQueueRunner::PerformBlit(const VKRStep &step, VkCommandBuffer cmd) {
1550
// The barrier code doesn't handle this case. We'd need to transition to GENERAL to do an intra-image copy.
1551
_dbg_assert_(step.blit.src != step.blit.dst);
1552
1553
int layerCount = std::min(step.blit.src->numLayers, step.blit.dst->numLayers);
1554
_dbg_assert_(step.blit.src->numLayers >= step.blit.dst->numLayers);
1555
1556
// Blitting is not allowed for multisample images. You're suppose to use vkCmdResolveImage but it only goes in one direction (multi to single).
1557
_dbg_assert_(step.blit.src->sampleCount == VkSampleCountFlagBits::VK_SAMPLE_COUNT_1_BIT);
1558
_dbg_assert_(step.blit.dst->sampleCount == VkSampleCountFlagBits::VK_SAMPLE_COUNT_1_BIT);
1559
1560
VKRFramebuffer *src = step.blit.src;
1561
VKRFramebuffer *dst = step.blit.dst;
1562
1563
// First source barriers.
1564
if (step.blit.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1565
recordBarrier_.TransitionColorImageAuto(&src->color, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1566
recordBarrier_.TransitionColorImageAuto(&dst->color, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1567
}
1568
1569
// We can't copy only depth or only stencil unfortunately.
1570
if (step.blit.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1571
_assert_(src->depth.image != VK_NULL_HANDLE);
1572
_assert_(dst->depth.image != VK_NULL_HANDLE);
1573
recordBarrier_.TransitionDepthStencilImageAuto(&src->depth, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1574
recordBarrier_.TransitionDepthStencilImageAuto(&dst->depth, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1575
}
1576
1577
recordBarrier_.Flush(cmd);
1578
1579
// If any validation needs to be performed here, it should probably have been done
1580
// already when the blit was queued. So don't validate here.
1581
VkImageBlit blit{};
1582
blit.srcOffsets[0].x = step.blit.srcRect.offset.x;
1583
blit.srcOffsets[0].y = step.blit.srcRect.offset.y;
1584
blit.srcOffsets[0].z = 0;
1585
blit.srcOffsets[1].x = step.blit.srcRect.offset.x + step.blit.srcRect.extent.width;
1586
blit.srcOffsets[1].y = step.blit.srcRect.offset.y + step.blit.srcRect.extent.height;
1587
blit.srcOffsets[1].z = 1;
1588
blit.srcSubresource.mipLevel = 0;
1589
blit.srcSubresource.layerCount = layerCount;
1590
blit.dstOffsets[0].x = step.blit.dstRect.offset.x;
1591
blit.dstOffsets[0].y = step.blit.dstRect.offset.y;
1592
blit.dstOffsets[0].z = 0;
1593
blit.dstOffsets[1].x = step.blit.dstRect.offset.x + step.blit.dstRect.extent.width;
1594
blit.dstOffsets[1].y = step.blit.dstRect.offset.y + step.blit.dstRect.extent.height;
1595
blit.dstOffsets[1].z = 1;
1596
blit.dstSubresource.mipLevel = 0;
1597
blit.dstSubresource.layerCount = layerCount;
1598
1599
if (step.blit.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1600
blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1601
blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1602
vkCmdBlitImage(cmd, src->color.image, src->color.layout, dst->color.image, dst->color.layout, 1, &blit, step.blit.filter);
1603
}
1604
1605
// TODO: Need to check if the depth format is blittable.
1606
// Actually, we should probably almost always use copies rather than blits for depth buffers.
1607
if (step.blit.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1608
blit.srcSubresource.aspectMask = 0;
1609
blit.dstSubresource.aspectMask = 0;
1610
if (step.blit.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1611
blit.srcSubresource.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1612
blit.dstSubresource.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1613
}
1614
if (step.blit.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
1615
blit.srcSubresource.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1616
blit.dstSubresource.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1617
}
1618
vkCmdBlitImage(cmd, src->depth.image, src->depth.layout, dst->depth.image, dst->depth.layout, 1, &blit, step.blit.filter);
1619
}
1620
}
1621
1622
void VulkanQueueRunner::SetupTransferDstWriteAfterWrite(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrierBatch *recordBarrier) {
1623
VkImageAspectFlags imageAspect = aspect;
1624
VkAccessFlags srcAccessMask = 0;
1625
VkPipelineStageFlags srcStageMask = 0;
1626
if (img.format == VK_FORMAT_D16_UNORM_S8_UINT || img.format == VK_FORMAT_D24_UNORM_S8_UINT || img.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
1627
// Barrier must specify both for combined depth/stencil buffers.
1628
imageAspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
1629
} else {
1630
imageAspect = aspect;
1631
}
1632
_dbg_assert_(img.layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1633
srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1634
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1635
recordBarrier->TransitionImage(
1636
img.image,
1637
0,
1638
1,
1639
img.numLayers,
1640
aspect,
1641
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1642
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1643
VK_ACCESS_TRANSFER_WRITE_BIT,
1644
VK_ACCESS_TRANSFER_WRITE_BIT,
1645
VK_PIPELINE_STAGE_TRANSFER_BIT,
1646
VK_PIPELINE_STAGE_TRANSFER_BIT
1647
);
1648
}
1649
1650
void VulkanQueueRunner::ResizeReadbackBuffer(CachedReadback *readback, VkDeviceSize requiredSize) {
1651
if (readback->buffer && requiredSize <= readback->bufferSize) {
1652
return;
1653
}
1654
1655
if (readback->buffer) {
1656
vulkan_->Delete().QueueDeleteBufferAllocation(readback->buffer, readback->allocation);
1657
}
1658
1659
readback->bufferSize = requiredSize;
1660
1661
VkDevice device = vulkan_->GetDevice();
1662
1663
VkBufferCreateInfo buf{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1664
buf.size = readback->bufferSize;
1665
buf.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1666
1667
VmaAllocationCreateInfo allocCreateInfo{};
1668
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
1669
VmaAllocationInfo allocInfo{};
1670
1671
VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &buf, &allocCreateInfo, &readback->buffer, &readback->allocation, &allocInfo);
1672
_assert_(res == VK_SUCCESS);
1673
1674
const VkMemoryType &memoryType = vulkan_->GetMemoryProperties().memoryTypes[allocInfo.memoryType];
1675
readback->isCoherent = (memoryType.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
1676
}
1677
1678
void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd, FrameData &frameData) {
1679
VkImage image;
1680
VkImageLayout copyLayout;
1681
// Special case for backbuffer readbacks.
1682
if (step.readback.src == nullptr) {
1683
// We only take screenshots after the main render pass (anything else would be stupid) so we need to transition out of PRESENT,
1684
// and then back into it.
1685
// Regarding layers, backbuffer currently only has one layer.
1686
recordBarrier_.TransitionImage(backbufferImage_, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT,
1687
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1688
0, VK_ACCESS_TRANSFER_READ_BIT,
1689
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
1690
copyLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1691
image = backbufferImage_;
1692
} else {
1693
VKRImage *srcImage;
1694
if (step.readback.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1695
srcImage = &step.readback.src->color;
1696
recordBarrier_.TransitionColorImageAuto(srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1697
} else if (step.readback.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1698
srcImage = &step.readback.src->depth;
1699
recordBarrier_.TransitionDepthStencilImageAuto(srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1700
_dbg_assert_(srcImage->image != VK_NULL_HANDLE);
1701
} else {
1702
_dbg_assert_msg_(false, "No image aspect to readback?");
1703
return;
1704
}
1705
image = srcImage->image;
1706
copyLayout = srcImage->layout;
1707
}
1708
1709
recordBarrier_.Flush(cmd);
1710
1711
// TODO: Handle different readback formats!
1712
u32 readbackSizeInBytes = sizeof(uint32_t) * step.readback.srcRect.extent.width * step.readback.srcRect.extent.height;
1713
1714
CachedReadback *cached = nullptr;
1715
1716
if (step.readback.delayed) {
1717
ReadbackKey key;
1718
key.framebuf = step.readback.src;
1719
key.width = step.readback.srcRect.extent.width;
1720
key.height = step.readback.srcRect.extent.height;
1721
1722
// See if there's already a buffer we can reuse
1723
if (!frameData.readbacks_.Get(key, &cached)) {
1724
cached = new CachedReadback();
1725
cached->bufferSize = 0;
1726
frameData.readbacks_.Insert(key, cached);
1727
}
1728
} else {
1729
cached = &syncReadback_;
1730
}
1731
1732
ResizeReadbackBuffer(cached, readbackSizeInBytes);
1733
1734
VkBufferImageCopy region{};
1735
region.imageOffset = { step.readback.srcRect.offset.x, step.readback.srcRect.offset.y, 0 };
1736
region.imageExtent = { step.readback.srcRect.extent.width, step.readback.srcRect.extent.height, 1 };
1737
region.imageSubresource.aspectMask = step.readback.aspectMask;
1738
region.imageSubresource.layerCount = 1;
1739
region.bufferOffset = 0;
1740
region.bufferRowLength = step.readback.srcRect.extent.width;
1741
region.bufferImageHeight = step.readback.srcRect.extent.height;
1742
1743
vkCmdCopyImageToBuffer(cmd, image, copyLayout, cached->buffer, 1, &region);
1744
1745
// NOTE: Can't read the buffer using the CPU here - need to sync first.
1746
1747
// If we copied from the backbuffer, transition it back.
1748
if (step.readback.src == nullptr) {
1749
// We only take screenshots after the main render pass (anything else would be stupid) so we need to transition out of PRESENT,
1750
// and then back into it.
1751
// Regarding layers, backbuffer currently only has one layer.
1752
recordBarrier_.TransitionImage(backbufferImage_, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT,
1753
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
1754
VK_ACCESS_TRANSFER_READ_BIT, 0,
1755
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
1756
recordBarrier_.Flush(cmd); // probably not needed
1757
copyLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1758
}
1759
}
1760
1761
void VulkanQueueRunner::PerformReadbackImage(const VKRStep &step, VkCommandBuffer cmd) {
1762
// TODO: Clean this up - just reusing `SetupTransitionToTransferSrc`.
1763
VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1764
recordBarrier_.TransitionColorImageAuto(step.readback_image.image, &layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 1, 1);
1765
recordBarrier_.Flush(cmd);
1766
1767
ResizeReadbackBuffer(&syncReadback_, sizeof(uint32_t) * step.readback_image.srcRect.extent.width * step.readback_image.srcRect.extent.height);
1768
1769
VkBufferImageCopy region{};
1770
region.imageOffset = { step.readback_image.srcRect.offset.x, step.readback_image.srcRect.offset.y, 0 };
1771
region.imageExtent = { step.readback_image.srcRect.extent.width, step.readback_image.srcRect.extent.height, 1 };
1772
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1773
region.imageSubresource.layerCount = 1;
1774
region.imageSubresource.mipLevel = step.readback_image.mipLevel;
1775
region.bufferOffset = 0;
1776
region.bufferRowLength = step.readback_image.srcRect.extent.width;
1777
region.bufferImageHeight = step.readback_image.srcRect.extent.height;
1778
vkCmdCopyImageToBuffer(cmd, step.readback_image.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, syncReadback_.buffer, 1, &region);
1779
1780
// Now transfer it back to a texture.
1781
recordBarrier_.TransitionImage(step.readback_image.image, 0, 1, 1, // I don't think we have any multilayer cases for regular textures. Above in PerformReadback, though..
1782
VK_IMAGE_ASPECT_COLOR_BIT,
1783
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1784
VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT,
1785
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
1786
recordBarrier_.Flush(cmd); // probably not needed
1787
1788
// NOTE: Can't read the buffer using the CPU here - need to sync first.
1789
// Doing that will also act like a heavyweight barrier ensuring that device writes are visible on the host.
1790
}
1791
1792
bool VulkanQueueRunner::CopyReadbackBuffer(FrameData &frameData, VKRFramebuffer *src, int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels) {
1793
CachedReadback *readback = &syncReadback_;
1794
1795
// Look up in readback cache.
1796
if (src) {
1797
ReadbackKey key;
1798
key.framebuf = src;
1799
key.width = width;
1800
key.height = height;
1801
CachedReadback *cached;
1802
if (frameData.readbacks_.Get(key, &cached)) {
1803
readback = cached;
1804
} else {
1805
// Didn't have a cached image ready yet
1806
return false;
1807
}
1808
}
1809
1810
if (!readback->buffer)
1811
return false; // Didn't find anything in cache, or something has gone really wrong.
1812
1813
// Read back to the requested address in ram from buffer.
1814
void *mappedData;
1815
const size_t srcPixelSize = DataFormatSizeInBytes(srcFormat);
1816
VkResult res = vmaMapMemory(vulkan_->Allocator(), readback->allocation, &mappedData);
1817
1818
if (res != VK_SUCCESS) {
1819
ERROR_LOG(Log::G3D, "CopyReadbackBuffer: vkMapMemory failed! result=%d", (int)res);
1820
return false;
1821
}
1822
1823
if (!readback->isCoherent) {
1824
vmaInvalidateAllocation(vulkan_->Allocator(), readback->allocation, 0, width * height * srcPixelSize);
1825
}
1826
1827
// TODO: Perform these conversions in a compute shader on the GPU.
1828
if (srcFormat == Draw::DataFormat::R8G8B8A8_UNORM) {
1829
ConvertFromRGBA8888(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, destFormat);
1830
} else if (srcFormat == Draw::DataFormat::B8G8R8A8_UNORM) {
1831
ConvertFromBGRA8888(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, destFormat);
1832
} else if (srcFormat == destFormat) {
1833
// Can just memcpy when it matches no matter the format!
1834
uint8_t *dst = pixels;
1835
const uint8_t *src = (const uint8_t *)mappedData;
1836
for (int y = 0; y < height; ++y) {
1837
memcpy(dst, src, width * srcPixelSize);
1838
src += width * srcPixelSize;
1839
dst += pixelStride * srcPixelSize;
1840
}
1841
} else if (destFormat == Draw::DataFormat::D32F) {
1842
ConvertToD32F(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, srcFormat);
1843
} else if (destFormat == Draw::DataFormat::D16) {
1844
ConvertToD16(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, srcFormat);
1845
} else {
1846
// TODO: Maybe a depth conversion or something?
1847
ERROR_LOG(Log::G3D, "CopyReadbackBuffer: Unknown format");
1848
_assert_msg_(false, "CopyReadbackBuffer: Unknown src format %d", (int)srcFormat);
1849
}
1850
1851
vmaUnmapMemory(vulkan_->Allocator(), readback->allocation);
1852
return true;
1853
}
1854
1855
const char *VKRRenderCommandToString(VKRRenderCommand cmd) {
1856
const char * const str[] = {
1857
"REMOVED",
1858
"BIND_GRAPHICS_PIPELINE", // async
1859
"STENCIL",
1860
"BLEND",
1861
"VIEWPORT",
1862
"SCISSOR",
1863
"CLEAR",
1864
"DRAW",
1865
"DRAW_INDEXED",
1866
"PUSH_CONSTANTS",
1867
"DEBUG_ANNOTATION",
1868
};
1869
if ((int)cmd < ARRAY_SIZE(str)) {
1870
return str[(int)cmd];
1871
} else {
1872
return "N/A";
1873
}
1874
}
1875
1876