CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutSign UpSign In
hrydgard

CoCalc provides the best real-time collaborative environment for Jupyter Notebooks, LaTeX documents, and SageMath, scalable from individual users to large groups and classes!

GitHub Repository: hrydgard/ppsspp
Path: blob/master/Common/GPU/Vulkan/VulkanQueueRunner.cpp
Views: 1401
1
#include <unordered_map>
2
3
#include "Common/GPU/DataFormat.h"
4
#include "Common/GPU/Vulkan/VulkanQueueRunner.h"
5
#include "Common/GPU/Vulkan/VulkanRenderManager.h"
6
#include "Common/VR/PPSSPPVR.h"
7
#include "Common/Log.h"
8
#include "Common/TimeUtil.h"
9
10
using namespace PPSSPP_VK;
11
12
// Debug help: adb logcat -s DEBUG AndroidRuntime PPSSPPNativeActivity PPSSPP NativeGLView NativeRenderer NativeSurfaceView PowerSaveModeReceiver InputDeviceState PpssppActivity CameraHelper
13
14
static void MergeRenderAreaRectInto(VkRect2D *dest, const VkRect2D &src) {
15
if (dest->offset.x > src.offset.x) {
16
dest->extent.width += (dest->offset.x - src.offset.x);
17
dest->offset.x = src.offset.x;
18
}
19
if (dest->offset.y > src.offset.y) {
20
dest->extent.height += (dest->offset.y - src.offset.y);
21
dest->offset.y = src.offset.y;
22
}
23
if (dest->offset.x + dest->extent.width < src.offset.x + src.extent.width) {
24
dest->extent.width = src.offset.x + src.extent.width - dest->offset.x;
25
}
26
if (dest->offset.y + dest->extent.height < src.offset.y + src.extent.height) {
27
dest->extent.height = src.offset.y + src.extent.height - dest->offset.y;
28
}
29
}
30
31
// We need to take the "max" of the features used in the two render passes.
32
RenderPassType MergeRPTypes(RenderPassType a, RenderPassType b) {
33
// Either both are backbuffer type, or neither are.
34
// These can't merge with other renderpasses
35
if (a == RenderPassType::BACKBUFFER || b == RenderPassType::BACKBUFFER) {
36
_dbg_assert_(a == b);
37
return a;
38
}
39
40
_dbg_assert_((a & RenderPassType::MULTIVIEW) == (b & RenderPassType::MULTIVIEW));
41
42
// The rest we can just OR together to get the maximum feature set.
43
return (RenderPassType)((u32)a | (u32)b);
44
}
45
46
void VulkanQueueRunner::CreateDeviceObjects() {
47
INFO_LOG(Log::G3D, "VulkanQueueRunner::CreateDeviceObjects");
48
49
RPKey key{
50
VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR,
51
VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE,
52
};
53
compatibleRenderPass_ = GetRenderPass(key);
54
55
#if 0
56
// Just to check whether it makes sense to split some of these. drawidx is way bigger than the others...
57
// We should probably just move to variable-size data in a raw buffer anyway...
58
VkRenderData rd;
59
INFO_LOG(Log::G3D, "sizeof(pipeline): %d", (int)sizeof(rd.pipeline));
60
INFO_LOG(Log::G3D, "sizeof(draw): %d", (int)sizeof(rd.draw));
61
INFO_LOG(Log::G3D, "sizeof(drawidx): %d", (int)sizeof(rd.drawIndexed));
62
INFO_LOG(Log::G3D, "sizeof(clear): %d", (int)sizeof(rd.clear));
63
INFO_LOG(Log::G3D, "sizeof(viewport): %d", (int)sizeof(rd.viewport));
64
INFO_LOG(Log::G3D, "sizeof(scissor): %d", (int)sizeof(rd.scissor));
65
INFO_LOG(Log::G3D, "sizeof(blendColor): %d", (int)sizeof(rd.blendColor));
66
INFO_LOG(Log::G3D, "sizeof(push): %d", (int)sizeof(rd.push));
67
#endif
68
}
69
70
void VulkanQueueRunner::DestroyDeviceObjects() {
71
INFO_LOG(Log::G3D, "VulkanQueueRunner::DestroyDeviceObjects");
72
73
syncReadback_.Destroy(vulkan_);
74
75
renderPasses_.IterateMut([&](const RPKey &rpkey, VKRRenderPass *rp) {
76
_assert_(rp);
77
rp->Destroy(vulkan_);
78
delete rp;
79
});
80
renderPasses_.Clear();
81
}
82
83
bool VulkanQueueRunner::CreateSwapchain(VkCommandBuffer cmdInit, VulkanBarrierBatch *barriers) {
84
VkResult res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, nullptr);
85
_dbg_assert_(res == VK_SUCCESS);
86
87
VkImage *swapchainImages = new VkImage[swapchainImageCount_];
88
res = vkGetSwapchainImagesKHR(vulkan_->GetDevice(), vulkan_->GetSwapchain(), &swapchainImageCount_, swapchainImages);
89
if (res != VK_SUCCESS) {
90
ERROR_LOG(Log::G3D, "vkGetSwapchainImagesKHR failed");
91
delete[] swapchainImages;
92
return false;
93
}
94
95
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
96
SwapchainImageData sc_buffer{};
97
sc_buffer.image = swapchainImages[i];
98
99
VkImageViewCreateInfo color_image_view = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
100
color_image_view.format = vulkan_->GetSwapchainFormat();
101
color_image_view.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
102
color_image_view.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
103
color_image_view.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
104
color_image_view.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
105
color_image_view.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
106
color_image_view.subresourceRange.baseMipLevel = 0;
107
color_image_view.subresourceRange.levelCount = 1;
108
color_image_view.subresourceRange.baseArrayLayer = 0;
109
color_image_view.subresourceRange.layerCount = 1; // TODO: Investigate hw-assisted stereo.
110
color_image_view.viewType = VK_IMAGE_VIEW_TYPE_2D;
111
color_image_view.flags = 0;
112
color_image_view.image = sc_buffer.image;
113
114
// We leave the images as UNDEFINED, there's no need to pre-transition them as
115
// the backbuffer renderpass starts out with them being auto-transitioned from UNDEFINED anyway.
116
// Also, turns out it's illegal to transition un-acquired images, thanks Hans-Kristian. See #11417.
117
118
res = vkCreateImageView(vulkan_->GetDevice(), &color_image_view, nullptr, &sc_buffer.view);
119
vulkan_->SetDebugName(sc_buffer.view, VK_OBJECT_TYPE_IMAGE_VIEW, "swapchain_view");
120
swapchainImages_.push_back(sc_buffer);
121
_dbg_assert_(res == VK_SUCCESS);
122
}
123
delete[] swapchainImages;
124
125
// Must be before InitBackbufferRenderPass.
126
if (InitDepthStencilBuffer(cmdInit, barriers)) {
127
InitBackbufferFramebuffers(vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
128
}
129
return true;
130
}
131
132
bool VulkanQueueRunner::InitBackbufferFramebuffers(int width, int height) {
133
VkResult res;
134
// We share the same depth buffer but have multiple color buffers, see the loop below.
135
VkImageView attachments[2] = { VK_NULL_HANDLE, depth_.view };
136
137
VkFramebufferCreateInfo fb_info = { VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO };
138
fb_info.renderPass = GetCompatibleRenderPass()->Get(vulkan_, RenderPassType::BACKBUFFER, VK_SAMPLE_COUNT_1_BIT);
139
fb_info.attachmentCount = 2;
140
fb_info.pAttachments = attachments;
141
fb_info.width = width;
142
fb_info.height = height;
143
fb_info.layers = 1;
144
145
framebuffers_.resize(swapchainImageCount_);
146
147
for (uint32_t i = 0; i < swapchainImageCount_; i++) {
148
attachments[0] = swapchainImages_[i].view;
149
res = vkCreateFramebuffer(vulkan_->GetDevice(), &fb_info, nullptr, &framebuffers_[i]);
150
_dbg_assert_(res == VK_SUCCESS);
151
if (res != VK_SUCCESS) {
152
framebuffers_.clear();
153
return false;
154
}
155
}
156
157
return true;
158
}
159
160
bool VulkanQueueRunner::InitDepthStencilBuffer(VkCommandBuffer cmd, VulkanBarrierBatch *barriers) {
161
const VkFormat depth_format = vulkan_->GetDeviceInfo().preferredDepthStencilFormat;
162
int aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
163
VkImageCreateInfo image_info = { VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO };
164
image_info.imageType = VK_IMAGE_TYPE_2D;
165
image_info.format = depth_format;
166
image_info.extent.width = vulkan_->GetBackbufferWidth();
167
image_info.extent.height = vulkan_->GetBackbufferHeight();
168
image_info.extent.depth = 1;
169
image_info.mipLevels = 1;
170
image_info.arrayLayers = 1;
171
image_info.samples = VK_SAMPLE_COUNT_1_BIT;
172
image_info.queueFamilyIndexCount = 0;
173
image_info.pQueueFamilyIndices = nullptr;
174
image_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
175
image_info.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT;
176
image_info.flags = 0;
177
178
depth_.format = depth_format;
179
180
VmaAllocationCreateInfo allocCreateInfo{};
181
VmaAllocationInfo allocInfo{};
182
183
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_ONLY;
184
185
VkResult res = vmaCreateImage(vulkan_->Allocator(), &image_info, &allocCreateInfo, &depth_.image, &depth_.alloc, &allocInfo);
186
_dbg_assert_(res == VK_SUCCESS);
187
if (res != VK_SUCCESS)
188
return false;
189
190
vulkan_->SetDebugName(depth_.image, VK_OBJECT_TYPE_IMAGE, "BackbufferDepth");
191
192
VkImageMemoryBarrier *barrier = barriers->Add(depth_.image,
193
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
194
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, 0);
195
barrier->subresourceRange.aspectMask = aspectMask;
196
barrier->oldLayout = VK_IMAGE_LAYOUT_UNDEFINED;
197
barrier->newLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
198
barrier->srcAccessMask = 0;
199
barrier->dstAccessMask = VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;
200
201
VkImageViewCreateInfo depth_view_info = { VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO };
202
depth_view_info.image = depth_.image;
203
depth_view_info.format = depth_format;
204
depth_view_info.components.r = VK_COMPONENT_SWIZZLE_IDENTITY;
205
depth_view_info.components.g = VK_COMPONENT_SWIZZLE_IDENTITY;
206
depth_view_info.components.b = VK_COMPONENT_SWIZZLE_IDENTITY;
207
depth_view_info.components.a = VK_COMPONENT_SWIZZLE_IDENTITY;
208
depth_view_info.subresourceRange.aspectMask = aspectMask;
209
depth_view_info.subresourceRange.baseMipLevel = 0;
210
depth_view_info.subresourceRange.levelCount = 1;
211
depth_view_info.subresourceRange.baseArrayLayer = 0;
212
depth_view_info.subresourceRange.layerCount = 1;
213
depth_view_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
214
depth_view_info.flags = 0;
215
216
VkDevice device = vulkan_->GetDevice();
217
218
res = vkCreateImageView(device, &depth_view_info, NULL, &depth_.view);
219
vulkan_->SetDebugName(depth_.view, VK_OBJECT_TYPE_IMAGE_VIEW, "depth_stencil_backbuffer");
220
_dbg_assert_(res == VK_SUCCESS);
221
if (res != VK_SUCCESS)
222
return false;
223
224
return true;
225
}
226
227
228
void VulkanQueueRunner::DestroyBackBuffers() {
229
for (auto &image : swapchainImages_) {
230
vulkan_->Delete().QueueDeleteImageView(image.view);
231
}
232
swapchainImages_.clear();
233
234
if (depth_.view) {
235
vulkan_->Delete().QueueDeleteImageView(depth_.view);
236
}
237
if (depth_.image) {
238
_dbg_assert_(depth_.alloc);
239
vulkan_->Delete().QueueDeleteImageAllocation(depth_.image, depth_.alloc);
240
}
241
depth_ = {};
242
for (uint32_t i = 0; i < framebuffers_.size(); i++) {
243
_dbg_assert_(framebuffers_[i] != VK_NULL_HANDLE);
244
vulkan_->Delete().QueueDeleteFramebuffer(framebuffers_[i]);
245
}
246
framebuffers_.clear();
247
248
INFO_LOG(Log::G3D, "Backbuffers destroyed");
249
}
250
251
// Self-dependency: https://github.com/gpuweb/gpuweb/issues/442#issuecomment-547604827
252
// Also see https://www.khronos.org/registry/vulkan/specs/1.3-extensions/html/vkspec.html#synchronization-pipeline-barriers-subpass-self-dependencies
253
VKRRenderPass *VulkanQueueRunner::GetRenderPass(const RPKey &key) {
254
VKRRenderPass *foundPass;
255
if (renderPasses_.Get(key, &foundPass)) {
256
return foundPass;
257
}
258
259
VKRRenderPass *pass = new VKRRenderPass(key);
260
renderPasses_.Insert(key, pass);
261
return pass;
262
}
263
264
void VulkanQueueRunner::PreprocessSteps(std::vector<VKRStep *> &steps) {
265
// Optimizes renderpasses, then sequences them.
266
// Planned optimizations:
267
// * Create copies of render target that are rendered to multiple times and textured from in sequence, and push those render passes
268
// as early as possible in the frame (Wipeout billboards). This will require taking over more of descriptor management so we can
269
// substitute descriptors, alternatively using texture array layers creatively.
270
271
for (int j = 0; j < (int)steps.size(); j++) {
272
if (steps[j]->stepType == VKRStepType::RENDER &&
273
steps[j]->render.framebuffer) {
274
if (steps[j]->render.finalColorLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
275
steps[j]->render.finalColorLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
276
}
277
if (steps[j]->render.finalDepthStencilLayout == VK_IMAGE_LAYOUT_UNDEFINED) {
278
steps[j]->render.finalDepthStencilLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
279
}
280
}
281
}
282
283
for (int j = 0; j < (int)steps.size() - 1; j++) {
284
// Push down empty "Clear/Store" renderpasses, and merge them with the first "Load/Store" to the same framebuffer.
285
if (steps.size() > 1 && steps[j]->stepType == VKRStepType::RENDER &&
286
steps[j]->render.numDraws == 0 &&
287
steps[j]->render.numReads == 0 &&
288
steps[j]->render.colorLoad == VKRRenderPassLoadAction::CLEAR &&
289
steps[j]->render.stencilLoad == VKRRenderPassLoadAction::CLEAR &&
290
steps[j]->render.depthLoad == VKRRenderPassLoadAction::CLEAR) {
291
292
// Drop the clear step, and merge it into the next step that touches the same framebuffer.
293
for (int i = j + 1; i < (int)steps.size(); i++) {
294
if (steps[i]->stepType == VKRStepType::RENDER &&
295
steps[i]->render.framebuffer == steps[j]->render.framebuffer) {
296
if (steps[i]->render.colorLoad != VKRRenderPassLoadAction::CLEAR) {
297
steps[i]->render.colorLoad = VKRRenderPassLoadAction::CLEAR;
298
steps[i]->render.clearColor = steps[j]->render.clearColor;
299
}
300
if (steps[i]->render.depthLoad != VKRRenderPassLoadAction::CLEAR) {
301
steps[i]->render.depthLoad = VKRRenderPassLoadAction::CLEAR;
302
steps[i]->render.clearDepth = steps[j]->render.clearDepth;
303
}
304
if (steps[i]->render.stencilLoad != VKRRenderPassLoadAction::CLEAR) {
305
steps[i]->render.stencilLoad = VKRRenderPassLoadAction::CLEAR;
306
steps[i]->render.clearStencil = steps[j]->render.clearStencil;
307
}
308
MergeRenderAreaRectInto(&steps[i]->render.renderArea, steps[j]->render.renderArea);
309
steps[i]->render.renderPassType = MergeRPTypes(steps[i]->render.renderPassType, steps[j]->render.renderPassType);
310
steps[i]->render.numDraws += steps[j]->render.numDraws;
311
steps[i]->render.numReads += steps[j]->render.numReads;
312
// Cheaply skip the first step.
313
steps[j]->stepType = VKRStepType::RENDER_SKIP;
314
break;
315
} else if (steps[i]->stepType == VKRStepType::COPY &&
316
steps[i]->copy.src == steps[j]->render.framebuffer) {
317
// Can't eliminate the clear if a game copies from it before it's
318
// rendered to. However this should be rare.
319
// TODO: This should never happen when we check numReads now.
320
break;
321
}
322
}
323
}
324
}
325
326
// Queue hacks.
327
if (hacksEnabled_) {
328
if (hacksEnabled_ & QUEUE_HACK_MGS2_ACID) {
329
// Massive speedup.
330
ApplyMGSHack(steps);
331
}
332
if (hacksEnabled_ & QUEUE_HACK_SONIC) {
333
ApplySonicHack(steps);
334
}
335
if (hacksEnabled_ & QUEUE_HACK_RENDERPASS_MERGE) {
336
ApplyRenderPassMerge(steps);
337
}
338
}
339
}
340
341
void VulkanQueueRunner::RunSteps(std::vector<VKRStep *> &steps, int curFrame, FrameData &frameData, FrameDataShared &frameDataShared, bool keepSteps) {
342
QueueProfileContext *profile = frameData.profile.enabled ? &frameData.profile : nullptr;
343
344
if (profile)
345
profile->cpuStartTime = time_now_d();
346
347
bool emitLabels = vulkan_->Extensions().EXT_debug_utils;
348
349
VkCommandBuffer cmd = frameData.hasPresentCommands ? frameData.presentCmd : frameData.mainCmd;
350
351
for (size_t i = 0; i < steps.size(); i++) {
352
const VKRStep &step = *steps[i];
353
if (emitLabels) {
354
VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
355
char temp[128];
356
if (step.stepType == VKRStepType::RENDER && step.render.framebuffer) {
357
snprintf(temp, sizeof(temp), "%s: %s", step.tag, step.render.framebuffer->Tag());
358
labelInfo.pLabelName = temp;
359
} else {
360
labelInfo.pLabelName = step.tag;
361
}
362
vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo);
363
}
364
365
switch (step.stepType) {
366
case VKRStepType::RENDER:
367
if (!step.render.framebuffer) {
368
if (emitLabels) {
369
vkCmdEndDebugUtilsLabelEXT(cmd);
370
}
371
frameData.Submit(vulkan_, FrameSubmitType::Pending, frameDataShared);
372
373
// When stepping in the GE debugger, we can end up here multiple times in a "frame".
374
// So only acquire once.
375
if (!frameData.hasAcquired) {
376
frameData.AcquireNextImage(vulkan_);
377
SetBackbuffer(framebuffers_[frameData.curSwapchainImage], swapchainImages_[frameData.curSwapchainImage].image);
378
}
379
380
if (!frameData.hasPresentCommands) {
381
// A RENDER step rendering to the backbuffer is normally the last step that happens in a frame,
382
// unless taking a screenshot, in which case there might be a READBACK_IMAGE after it.
383
// This is why we have to switch cmd to presentCmd, in this case.
384
VkCommandBufferBeginInfo begin{ VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO };
385
begin.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
386
vkBeginCommandBuffer(frameData.presentCmd, &begin);
387
frameData.hasPresentCommands = true;
388
}
389
cmd = frameData.presentCmd;
390
if (emitLabels) {
391
VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
392
labelInfo.pLabelName = "present";
393
vkCmdBeginDebugUtilsLabelEXT(cmd, &labelInfo);
394
}
395
}
396
PerformRenderPass(step, cmd, curFrame, frameData.profile);
397
break;
398
case VKRStepType::COPY:
399
PerformCopy(step, cmd);
400
break;
401
case VKRStepType::BLIT:
402
PerformBlit(step, cmd);
403
break;
404
case VKRStepType::READBACK:
405
PerformReadback(step, cmd, frameData);
406
break;
407
case VKRStepType::READBACK_IMAGE:
408
PerformReadbackImage(step, cmd);
409
break;
410
case VKRStepType::RENDER_SKIP:
411
break;
412
}
413
414
if (profile && profile->timestampsEnabled && profile->timestampDescriptions.size() + 1 < MAX_TIMESTAMP_QUERIES) {
415
vkCmdWriteTimestamp(cmd, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, profile->queryPool, (uint32_t)profile->timestampDescriptions.size());
416
profile->timestampDescriptions.push_back(StepToString(vulkan_, step));
417
}
418
419
if (emitLabels) {
420
vkCmdEndDebugUtilsLabelEXT(cmd);
421
}
422
}
423
424
// Deleting all in one go should be easier on the instruction cache than deleting
425
// them as we go - and easier to debug because we can look backwards in the frame.
426
if (!keepSteps) {
427
for (auto step : steps) {
428
delete step;
429
}
430
steps.clear();
431
}
432
433
if (profile)
434
profile->cpuEndTime = time_now_d();
435
}
436
437
void VulkanQueueRunner::ApplyMGSHack(std::vector<VKRStep *> &steps) {
438
// Really need a sane way to express transforms of steps.
439
440
// We want to turn a sequence of copy,render(1),copy,render(1),copy,render(1) to copy,copy,copy,render(n).
441
442
for (int i = 0; i < (int)steps.size() - 3; i++) {
443
int last = -1;
444
if (!(steps[i]->stepType == VKRStepType::COPY &&
445
steps[i + 1]->stepType == VKRStepType::RENDER &&
446
steps[i + 2]->stepType == VKRStepType::COPY &&
447
steps[i + 1]->render.numDraws == 1 &&
448
steps[i]->copy.dst == steps[i + 2]->copy.dst))
449
continue;
450
// Looks promising! Let's start by finding the last one.
451
for (int j = i; j < (int)steps.size(); j++) {
452
switch (steps[j]->stepType) {
453
case VKRStepType::RENDER:
454
if (steps[j]->render.numDraws > 1)
455
last = j - 1;
456
// should really also check descriptor sets...
457
if (steps[j]->commands.size()) {
458
const VkRenderData &cmd = steps[j]->commands.back();
459
if (cmd.cmd == VKRRenderCommand::DRAW_INDEXED && cmd.draw.count != 6)
460
last = j - 1;
461
}
462
break;
463
case VKRStepType::COPY:
464
if (steps[j]->copy.dst != steps[i]->copy.dst)
465
last = j - 1;
466
break;
467
default:
468
break;
469
}
470
if (last != -1)
471
break;
472
}
473
474
if (last != -1) {
475
// We've got a sequence from i to last that needs reordering.
476
// First, let's sort it, keeping the same length.
477
std::vector<VKRStep *> copies;
478
std::vector<VKRStep *> renders;
479
copies.reserve((last - i) / 2);
480
renders.reserve((last - i) / 2);
481
for (int n = i; n <= last; n++) {
482
if (steps[n]->stepType == VKRStepType::COPY)
483
copies.push_back(steps[n]);
484
else if (steps[n]->stepType == VKRStepType::RENDER)
485
renders.push_back(steps[n]);
486
}
487
// Write the copies back. TODO: Combine them too.
488
for (int j = 0; j < (int)copies.size(); j++) {
489
steps[i + j] = copies[j];
490
}
491
492
const int firstRender = i + (int)copies.size();
493
494
// Write the renders back (so they will be deleted properly).
495
for (int j = 0; j < (int)renders.size(); j++) {
496
steps[firstRender + j] = renders[j];
497
}
498
_assert_(steps[firstRender]->stepType == VKRStepType::RENDER);
499
// Combine the renders.
500
for (int j = 1; j < (int)renders.size(); j++) {
501
steps[firstRender]->commands.reserve(renders[j]->commands.size());
502
for (int k = 0; k < (int)renders[j]->commands.size(); k++) {
503
steps[firstRender]->commands.push_back(renders[j]->commands[k]);
504
}
505
MergeRenderAreaRectInto(&steps[firstRender]->render.renderArea, renders[j]->render.renderArea);
506
// Easier than removing them from the list, though that might be the better option.
507
steps[firstRender + j]->stepType = VKRStepType::RENDER_SKIP;
508
steps[firstRender + j]->commands.clear();
509
}
510
// We're done.
511
break;
512
}
513
}
514
515
// There's also a post processing effect using depals that's just brutal in some parts
516
// of the game.
517
for (int i = 0; i < (int)steps.size() - 3; i++) {
518
int last = -1;
519
if (!(steps[i]->stepType == VKRStepType::RENDER &&
520
steps[i + 1]->stepType == VKRStepType::RENDER &&
521
steps[i + 2]->stepType == VKRStepType::RENDER &&
522
steps[i]->render.numDraws == 1 &&
523
steps[i + 1]->render.numDraws == 1 &&
524
steps[i + 2]->render.numDraws == 1 &&
525
steps[i]->render.colorLoad == VKRRenderPassLoadAction::DONT_CARE &&
526
steps[i + 1]->render.colorLoad == VKRRenderPassLoadAction::KEEP &&
527
steps[i + 2]->render.colorLoad == VKRRenderPassLoadAction::DONT_CARE))
528
continue;
529
VKRFramebuffer *depalFramebuffer = steps[i]->render.framebuffer;
530
VKRFramebuffer *targetFramebuffer = steps[i + 1]->render.framebuffer;
531
// OK, found the start of a post-process sequence. Let's scan until we find the end.
532
for (int j = i; j < (int)steps.size() - 3; j++) {
533
if (((j - i) & 1) == 0) {
534
// This should be a depal draw.
535
if (steps[j]->render.numDraws != 1)
536
break;
537
if (steps[j]->render.colorLoad != VKRRenderPassLoadAction::DONT_CARE)
538
break;
539
if (steps[j]->render.framebuffer != depalFramebuffer)
540
break;
541
last = j;
542
} else {
543
// This should be a target draw.
544
if (steps[j]->render.numDraws != 1)
545
break;
546
if (steps[j]->render.colorLoad != VKRRenderPassLoadAction::KEEP)
547
break;
548
if (steps[j]->render.framebuffer != targetFramebuffer)
549
break;
550
last = j;
551
}
552
}
553
554
if (last == -1)
555
continue;
556
557
// Combine the depal renders.
558
for (int j = i + 2; j <= last + 1; j += 2) {
559
for (int k = 0; k < (int)steps[j]->commands.size(); k++) {
560
switch (steps[j]->commands[k].cmd) {
561
case VKRRenderCommand::DRAW:
562
case VKRRenderCommand::DRAW_INDEXED:
563
steps[i]->commands.push_back(steps[j]->commands[k]);
564
break;
565
default:
566
break;
567
}
568
}
569
MergeRenderAreaRectInto(&steps[i]->render.renderArea, steps[j]->render.renderArea);
570
steps[j]->stepType = VKRStepType::RENDER_SKIP;
571
}
572
573
// Combine the target renders.
574
for (int j = i + 3; j <= last; j += 2) {
575
for (int k = 0; k < (int)steps[j]->commands.size(); k++) {
576
switch (steps[j]->commands[k].cmd) {
577
case VKRRenderCommand::DRAW:
578
case VKRRenderCommand::DRAW_INDEXED:
579
steps[i + 1]->commands.push_back(steps[j]->commands[k]);
580
break;
581
default:
582
break;
583
}
584
}
585
MergeRenderAreaRectInto(&steps[i + 1]->render.renderArea, steps[j]->render.renderArea);
586
steps[j]->stepType = VKRStepType::RENDER_SKIP;
587
}
588
589
// We're done - we only expect one of these sequences per frame.
590
break;
591
}
592
}
593
594
void VulkanQueueRunner::ApplySonicHack(std::vector<VKRStep *> &steps) {
595
// We want to turn a sequence of render(3),render(1),render(6),render(1),render(6),render(1),render(3) to
596
// render(1), render(1), render(1), render(6), render(6), render(6)
597
598
for (int i = 0; i < (int)steps.size() - 4; i++) {
599
int last = -1;
600
if (!(steps[i]->stepType == VKRStepType::RENDER &&
601
steps[i + 1]->stepType == VKRStepType::RENDER &&
602
steps[i + 2]->stepType == VKRStepType::RENDER &&
603
steps[i + 3]->stepType == VKRStepType::RENDER &&
604
steps[i]->render.numDraws == 3 &&
605
steps[i + 1]->render.numDraws == 1 &&
606
steps[i + 2]->render.numDraws == 6 &&
607
steps[i + 3]->render.numDraws == 1 &&
608
steps[i]->render.framebuffer == steps[i + 2]->render.framebuffer &&
609
steps[i + 1]->render.framebuffer == steps[i + 3]->render.framebuffer))
610
continue;
611
// Looks promising! Let's start by finding the last one.
612
for (int j = i; j < (int)steps.size(); j++) {
613
switch (steps[j]->stepType) {
614
case VKRStepType::RENDER:
615
if ((j - i) & 1) {
616
if (steps[j]->render.framebuffer != steps[i + 1]->render.framebuffer)
617
last = j - 1;
618
if (steps[j]->render.numDraws != 1)
619
last = j - 1;
620
} else {
621
if (steps[j]->render.framebuffer != steps[i]->render.framebuffer)
622
last = j - 1;
623
if (steps[j]->render.numDraws != 3 && steps[j]->render.numDraws != 6)
624
last = j - 1;
625
}
626
break;
627
default:
628
break;
629
}
630
if (last != -1)
631
break;
632
}
633
634
if (last != -1) {
635
// We've got a sequence from i to last that needs reordering.
636
// First, let's sort it, keeping the same length.
637
std::vector<VKRStep *> type1;
638
std::vector<VKRStep *> type2;
639
type1.reserve((last - i) / 2);
640
type2.reserve((last - i) / 2);
641
for (int n = i; n <= last; n++) {
642
if (steps[n]->render.framebuffer == steps[i]->render.framebuffer)
643
type1.push_back(steps[n]);
644
else
645
type2.push_back(steps[n]);
646
}
647
648
// Write the renders back in order. Same amount, so deletion will work fine.
649
for (int j = 0; j < (int)type1.size(); j++) {
650
steps[i + j] = type1[j];
651
}
652
for (int j = 0; j < (int)type2.size(); j++) {
653
steps[i + j + type1.size()] = type2[j];
654
}
655
656
// Combine the renders.
657
for (int j = 1; j < (int)type1.size(); j++) {
658
for (int k = 0; k < (int)type1[j]->commands.size(); k++) {
659
steps[i]->commands.push_back(type1[j]->commands[k]);
660
}
661
steps[i + j]->stepType = VKRStepType::RENDER_SKIP;
662
}
663
for (int j = 1; j < (int)type2.size(); j++) {
664
for (int k = 0; k < (int)type2[j]->commands.size(); k++) {
665
steps[i + type1.size()]->commands.push_back(type2[j]->commands[k]);
666
}
667
// Technically, should merge render area here, but they're all the same so not needed.
668
steps[i + type1.size() + j]->stepType = VKRStepType::RENDER_SKIP;
669
}
670
// We're done.
671
break;
672
}
673
}
674
}
675
676
const char *AspectToString(VkImageAspectFlags aspect) {
677
switch (aspect) {
678
case VK_IMAGE_ASPECT_COLOR_BIT: return "COLOR";
679
case VK_IMAGE_ASPECT_DEPTH_BIT: return "DEPTH";
680
case VK_IMAGE_ASPECT_STENCIL_BIT: return "STENCIL";
681
case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT: return "DEPTHSTENCIL";
682
default: return "UNUSUAL";
683
}
684
}
685
686
std::string VulkanQueueRunner::StepToString(VulkanContext *vulkan, const VKRStep &step) {
687
char buffer[256];
688
switch (step.stepType) {
689
case VKRStepType::RENDER:
690
{
691
int w = step.render.framebuffer ? step.render.framebuffer->width : vulkan->GetBackbufferWidth();
692
int h = step.render.framebuffer ? step.render.framebuffer->height : vulkan->GetBackbufferHeight();
693
int actual_w = step.render.renderArea.extent.width;
694
int actual_h = step.render.renderArea.extent.height;
695
const char *renderCmd = GetRPTypeName(step.render.renderPassType);
696
snprintf(buffer, sizeof(buffer), "%s %s %s (draws: %d, %dx%d/%dx%d)", renderCmd, step.tag, step.render.framebuffer ? step.render.framebuffer->Tag() : "", step.render.numDraws, actual_w, actual_h, w, h);
697
break;
698
}
699
case VKRStepType::COPY:
700
snprintf(buffer, sizeof(buffer), "COPY '%s' %s -> %s (%dx%d, %s)", step.tag, step.copy.src->Tag(), step.copy.dst->Tag(), step.copy.srcRect.extent.width, step.copy.srcRect.extent.height, AspectToString(step.copy.aspectMask));
701
break;
702
case VKRStepType::BLIT:
703
snprintf(buffer, sizeof(buffer), "BLIT '%s' %s -> %s (%dx%d->%dx%d, %s)", step.tag, step.copy.src->Tag(), step.copy.dst->Tag(), step.blit.srcRect.extent.width, step.blit.srcRect.extent.height, step.blit.dstRect.extent.width, step.blit.dstRect.extent.height, AspectToString(step.blit.aspectMask));
704
break;
705
case VKRStepType::READBACK:
706
snprintf(buffer, sizeof(buffer), "READBACK '%s' %s (%dx%d, %s)", step.tag, step.readback.src ? step.readback.src->Tag() : "(backbuffer)", step.readback.srcRect.extent.width, step.readback.srcRect.extent.height, AspectToString(step.readback.aspectMask));
707
break;
708
case VKRStepType::READBACK_IMAGE:
709
snprintf(buffer, sizeof(buffer), "READBACK_IMAGE '%s' (%dx%d)", step.tag, step.readback_image.srcRect.extent.width, step.readback_image.srcRect.extent.height);
710
break;
711
case VKRStepType::RENDER_SKIP:
712
snprintf(buffer, sizeof(buffer), "(RENDER_SKIP) %s", step.tag);
713
break;
714
default:
715
buffer[0] = 0;
716
break;
717
}
718
return std::string(buffer);
719
}
720
721
// Ideally, this should be cheap enough to be applied to all games. At least on mobile, it's pretty
722
// much a guaranteed neutral or win in terms of GPU power. However, dependency calculation really
723
// must be perfect!
724
void VulkanQueueRunner::ApplyRenderPassMerge(std::vector<VKRStep *> &steps) {
725
// First let's count how many times each framebuffer is rendered to.
726
// If it's more than one, let's do our best to merge them. This can help God of War quite a bit.
727
std::unordered_map<VKRFramebuffer *, int> counts;
728
for (int i = 0; i < (int)steps.size(); i++) {
729
if (steps[i]->stepType == VKRStepType::RENDER) {
730
counts[steps[i]->render.framebuffer]++;
731
}
732
}
733
734
auto mergeRenderSteps = [](VKRStep *dst, VKRStep *src) {
735
// OK. Now, if it's a render, slurp up all the commands and kill the step.
736
// Also slurp up any pretransitions.
737
dst->preTransitions.append(src->preTransitions);
738
dst->commands.insert(dst->commands.end(), src->commands.begin(), src->commands.end());
739
MergeRenderAreaRectInto(&dst->render.renderArea, src->render.renderArea);
740
// So we don't consider it for other things, maybe doesn't matter.
741
src->dependencies.clear();
742
src->stepType = VKRStepType::RENDER_SKIP;
743
dst->render.numDraws += src->render.numDraws;
744
dst->render.numReads += src->render.numReads;
745
dst->render.pipelineFlags |= src->render.pipelineFlags;
746
dst->render.renderPassType = MergeRPTypes(dst->render.renderPassType, src->render.renderPassType);
747
};
748
auto renderHasClear = [](const VKRStep *step) {
749
const auto &r = step->render;
750
return r.colorLoad == VKRRenderPassLoadAction::CLEAR || r.depthLoad == VKRRenderPassLoadAction::CLEAR || r.stencilLoad == VKRRenderPassLoadAction::CLEAR;
751
};
752
753
// Now, let's go through the steps. If we find one that is rendered to more than once,
754
// we'll scan forward and slurp up any rendering that can be merged across.
755
for (int i = 0; i < (int)steps.size(); i++) {
756
if (steps[i]->stepType == VKRStepType::RENDER && counts[steps[i]->render.framebuffer] > 1) {
757
auto fb = steps[i]->render.framebuffer;
758
TinySet<VKRFramebuffer *, 8> touchedFramebuffers; // must be the same fast-size as the dependencies TinySet for annoying reasons.
759
for (int j = i + 1; j < (int)steps.size(); j++) {
760
// If any other passes are reading from this framebuffer as-is, we cancel the scan.
761
if (steps[j]->dependencies.contains(fb)) {
762
// Reading from itself means a KEEP, which is okay.
763
if (steps[j]->stepType != VKRStepType::RENDER || steps[j]->render.framebuffer != fb)
764
break;
765
}
766
switch (steps[j]->stepType) {
767
case VKRStepType::RENDER:
768
if (steps[j]->render.framebuffer == fb) {
769
// Prevent Unknown's example case from https://github.com/hrydgard/ppsspp/pull/12242
770
if (renderHasClear(steps[j]) || steps[j]->dependencies.contains(touchedFramebuffers)) {
771
goto done_fb;
772
} else {
773
// Safe to merge, great.
774
mergeRenderSteps(steps[i], steps[j]);
775
}
776
} else {
777
// Remember the framebuffer this wrote to. We can't merge with later passes that depend on these.
778
touchedFramebuffers.insert(steps[j]->render.framebuffer);
779
}
780
break;
781
case VKRStepType::COPY:
782
if (steps[j]->copy.dst == fb) {
783
// Without framebuffer "renaming", we can't merge past a clobbered fb.
784
goto done_fb;
785
}
786
touchedFramebuffers.insert(steps[j]->copy.dst);
787
break;
788
case VKRStepType::BLIT:
789
if (steps[j]->blit.dst == fb) {
790
// Without framebuffer "renaming", we can't merge past a clobbered fb.
791
goto done_fb;
792
}
793
touchedFramebuffers.insert(steps[j]->blit.dst);
794
break;
795
case VKRStepType::READBACK:
796
// Not sure this has much effect, when executed READBACK is always the last step
797
// since we stall the GPU and wait immediately after.
798
break;
799
case VKRStepType::RENDER_SKIP:
800
case VKRStepType::READBACK_IMAGE:
801
break;
802
default:
803
// We added a new step? Might be unsafe.
804
goto done_fb;
805
}
806
}
807
done_fb:
808
;
809
}
810
}
811
}
812
813
void VulkanQueueRunner::LogSteps(const std::vector<VKRStep *> &steps, bool verbose) {
814
INFO_LOG(Log::G3D, "=================== FRAME ====================");
815
for (size_t i = 0; i < steps.size(); i++) {
816
const VKRStep &step = *steps[i];
817
switch (step.stepType) {
818
case VKRStepType::RENDER:
819
LogRenderPass(step, verbose);
820
break;
821
case VKRStepType::COPY:
822
LogCopy(step);
823
break;
824
case VKRStepType::BLIT:
825
LogBlit(step);
826
break;
827
case VKRStepType::READBACK:
828
LogReadback(step);
829
break;
830
case VKRStepType::READBACK_IMAGE:
831
LogReadbackImage(step);
832
break;
833
case VKRStepType::RENDER_SKIP:
834
INFO_LOG(Log::G3D, "(skipped render pass)");
835
break;
836
}
837
}
838
INFO_LOG(Log::G3D, "------------------- SUBMIT ------------------");
839
}
840
841
const char *RenderPassActionName(VKRRenderPassLoadAction a) {
842
switch (a) {
843
case VKRRenderPassLoadAction::CLEAR:
844
return "CLEAR";
845
case VKRRenderPassLoadAction::DONT_CARE:
846
return "DONT_CARE";
847
case VKRRenderPassLoadAction::KEEP:
848
return "KEEP";
849
}
850
return "?";
851
}
852
853
const char *ImageLayoutToString(VkImageLayout layout) {
854
switch (layout) {
855
case VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL: return "COLOR_ATTACHMENT";
856
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL: return "DEPTH_STENCIL_ATTACHMENT";
857
case VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL: return "SHADER_READ_ONLY";
858
case VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL: return "TRANSFER_SRC";
859
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL: return "TRANSFER_DST";
860
case VK_IMAGE_LAYOUT_GENERAL: return "GENERAL";
861
case VK_IMAGE_LAYOUT_PRESENT_SRC_KHR: return "PRESENT_SRC_KHR";
862
case VK_IMAGE_LAYOUT_UNDEFINED: return "UNDEFINED";
863
default: return "(unknown)";
864
}
865
}
866
867
void VulkanQueueRunner::LogRenderPass(const VKRStep &pass, bool verbose) {
868
const auto &r = pass.render;
869
const char *framebuf = r.framebuffer ? r.framebuffer->Tag() : "backbuffer";
870
int w = r.framebuffer ? r.framebuffer->width : vulkan_->GetBackbufferWidth();
871
int h = r.framebuffer ? r.framebuffer->height : vulkan_->GetBackbufferHeight();
872
873
INFO_LOG(Log::G3D, "RENDER %s Begin(%s, draws: %d, %dx%d, %s, %s, %s)", pass.tag, framebuf, r.numDraws, w, h, RenderPassActionName(r.colorLoad), RenderPassActionName(r.depthLoad), RenderPassActionName(r.stencilLoad));
874
// TODO: Log these in detail.
875
for (int i = 0; i < (int)pass.preTransitions.size(); i++) {
876
INFO_LOG(Log::G3D, " PRETRANSITION: %s %s -> %s", pass.preTransitions[i].fb->Tag(), AspectToString(pass.preTransitions[i].aspect), ImageLayoutToString(pass.preTransitions[i].targetLayout));
877
}
878
879
if (verbose) {
880
for (auto &cmd : pass.commands) {
881
switch (cmd.cmd) {
882
case VKRRenderCommand::REMOVED:
883
INFO_LOG(Log::G3D, " (Removed)");
884
break;
885
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
886
INFO_LOG(Log::G3D, " BindGraphicsPipeline(%x)", (int)(intptr_t)cmd.graphics_pipeline.pipeline);
887
break;
888
case VKRRenderCommand::BLEND:
889
INFO_LOG(Log::G3D, " BlendColor(%08x)", cmd.blendColor.color);
890
break;
891
case VKRRenderCommand::CLEAR:
892
INFO_LOG(Log::G3D, " Clear");
893
break;
894
case VKRRenderCommand::DRAW:
895
INFO_LOG(Log::G3D, " Draw(%d)", cmd.draw.count);
896
break;
897
case VKRRenderCommand::DRAW_INDEXED:
898
INFO_LOG(Log::G3D, " DrawIndexed(%d)", cmd.drawIndexed.count);
899
break;
900
case VKRRenderCommand::SCISSOR:
901
INFO_LOG(Log::G3D, " Scissor(%d, %d, %d, %d)", (int)cmd.scissor.scissor.offset.x, (int)cmd.scissor.scissor.offset.y, (int)cmd.scissor.scissor.extent.width, (int)cmd.scissor.scissor.extent.height);
902
break;
903
case VKRRenderCommand::STENCIL:
904
INFO_LOG(Log::G3D, " Stencil(ref=%d, compare=%d, write=%d)", cmd.stencil.stencilRef, cmd.stencil.stencilCompareMask, cmd.stencil.stencilWriteMask);
905
break;
906
case VKRRenderCommand::VIEWPORT:
907
INFO_LOG(Log::G3D, " Viewport(%f, %f, %f, %f, %f, %f)", cmd.viewport.vp.x, cmd.viewport.vp.y, cmd.viewport.vp.width, cmd.viewport.vp.height, cmd.viewport.vp.minDepth, cmd.viewport.vp.maxDepth);
908
break;
909
case VKRRenderCommand::PUSH_CONSTANTS:
910
INFO_LOG(Log::G3D, " PushConstants(%d)", cmd.push.size);
911
break;
912
case VKRRenderCommand::DEBUG_ANNOTATION:
913
INFO_LOG(Log::G3D, " DebugAnnotation(%s)", cmd.debugAnnotation.annotation);
914
break;
915
916
case VKRRenderCommand::NUM_RENDER_COMMANDS:
917
break;
918
}
919
}
920
}
921
922
INFO_LOG(Log::G3D, " Final: %s %s", ImageLayoutToString(pass.render.finalColorLayout), ImageLayoutToString(pass.render.finalDepthStencilLayout));
923
INFO_LOG(Log::G3D, "RENDER End(%s) - %d commands executed", framebuf, (int)pass.commands.size());
924
}
925
926
void VulkanQueueRunner::LogCopy(const VKRStep &step) {
927
INFO_LOG(Log::G3D, "%s", StepToString(vulkan_, step).c_str());
928
}
929
930
void VulkanQueueRunner::LogBlit(const VKRStep &step) {
931
INFO_LOG(Log::G3D, "%s", StepToString(vulkan_, step).c_str());
932
}
933
934
void VulkanQueueRunner::LogReadback(const VKRStep &step) {
935
INFO_LOG(Log::G3D, "%s", StepToString(vulkan_, step).c_str());
936
}
937
938
void VulkanQueueRunner::LogReadbackImage(const VKRStep &step) {
939
INFO_LOG(Log::G3D, "%s", StepToString(vulkan_, step).c_str());
940
}
941
942
void VulkanQueueRunner::PerformRenderPass(const VKRStep &step, VkCommandBuffer cmd, int curFrame, QueueProfileContext &profile) {
943
for (size_t i = 0; i < step.preTransitions.size(); i++) {
944
const TransitionRequest &iter = step.preTransitions[i];
945
if (iter.aspect == VK_IMAGE_ASPECT_COLOR_BIT && iter.fb->color.layout != iter.targetLayout) {
946
recordBarrier_.TransitionColorImageAuto(
947
&iter.fb->color,
948
iter.targetLayout
949
);
950
} else if (iter.fb->depth.image != VK_NULL_HANDLE && (iter.aspect & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) && iter.fb->depth.layout != iter.targetLayout) {
951
recordBarrier_.TransitionDepthStencilImageAuto(
952
&iter.fb->depth,
953
iter.targetLayout
954
);
955
}
956
}
957
958
// Don't execute empty renderpasses that keep the contents.
959
if (step.commands.empty() && step.render.colorLoad == VKRRenderPassLoadAction::KEEP && step.render.depthLoad == VKRRenderPassLoadAction::KEEP && step.render.stencilLoad == VKRRenderPassLoadAction::KEEP) {
960
// Flush the pending barrier
961
recordBarrier_.Flush(cmd);
962
// Nothing to do.
963
// TODO: Though - a later step might have used this step's finalColorLayout etc to get things in a layout it expects.
964
// Should we just do a barrier? Or just let the later step deal with not having things in its preferred layout, like now?
965
return;
966
}
967
968
// Write-after-write hazards. Fixed flicker in God of War on ARM (before we added another fix that removed these).
969
// NOTE: These are commented out because the normal barriers no longer check for equality, effectively generating these
970
// barriers automatically. This is safe, but sometimes I think can be improved on.
971
/*
972
if (step.render.framebuffer) {
973
int n = 0;
974
int stage = 0;
975
976
if (step.render.framebuffer->color.layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL) {
977
recordBarrier_.TransitionImage(
978
step.render.framebuffer->color.image,
979
0,
980
1,
981
step.render.framebuffer->numLayers,
982
VK_IMAGE_ASPECT_COLOR_BIT,
983
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
984
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
985
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
986
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT | VK_ACCESS_COLOR_ATTACHMENT_READ_BIT,
987
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
988
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
989
);
990
}
991
if (step.render.framebuffer->depth.image != VK_NULL_HANDLE && step.render.framebuffer->depth.layout == VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL) {
992
recordBarrier_.TransitionImage(
993
step.render.framebuffer->depth.image,
994
0,
995
1,
996
step.render.framebuffer->numLayers,
997
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
998
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
999
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1000
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
1001
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT,
1002
VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT,
1003
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT,
1004
);
1005
}
1006
}*/
1007
1008
// This chooses a render pass according to the load/store attachment state. We no longer transition
1009
// image layouts as part of the passes.
1010
//
1011
// NOTE: Unconditionally flushes recordBarrier_.
1012
VKRRenderPass *renderPass = PerformBindFramebufferAsRenderTarget(step, cmd);
1013
1014
int curWidth = step.render.framebuffer ? step.render.framebuffer->width : vulkan_->GetBackbufferWidth();
1015
int curHeight = step.render.framebuffer ? step.render.framebuffer->height : vulkan_->GetBackbufferHeight();
1016
1017
VKRFramebuffer *fb = step.render.framebuffer;
1018
1019
VKRGraphicsPipeline *lastGraphicsPipeline = nullptr;
1020
VKRComputePipeline *lastComputePipeline = nullptr;
1021
1022
const auto &commands = step.commands;
1023
1024
// We can do a little bit of state tracking here to eliminate some calls into the driver.
1025
// The stencil ones are very commonly mostly redundant so let's eliminate them where possible.
1026
// Might also want to consider scissor and viewport.
1027
VkPipeline lastPipeline = VK_NULL_HANDLE;
1028
FastVec<PendingDescSet> *descSets = nullptr;
1029
VkPipelineLayout pipelineLayout = VK_NULL_HANDLE;
1030
1031
bool pipelineOK = false;
1032
1033
int lastStencilWriteMask = -1;
1034
int lastStencilCompareMask = -1;
1035
int lastStencilReference = -1;
1036
1037
const RenderPassType rpType = step.render.renderPassType;
1038
1039
for (size_t i = 0; i < commands.size(); i++) {
1040
const VkRenderData &c = commands[i];
1041
#ifdef _DEBUG
1042
if (profile.enabled) {
1043
if ((size_t)step.stepType < ARRAY_SIZE(profile.commandCounts)) {
1044
profile.commandCounts[(size_t)c.cmd]++;
1045
}
1046
}
1047
#endif
1048
switch (c.cmd) {
1049
case VKRRenderCommand::REMOVED:
1050
break;
1051
1052
case VKRRenderCommand::BIND_GRAPHICS_PIPELINE:
1053
{
1054
VKRGraphicsPipeline *graphicsPipeline = c.graphics_pipeline.pipeline;
1055
if (graphicsPipeline != lastGraphicsPipeline) {
1056
VkSampleCountFlagBits fbSampleCount = fb ? fb->sampleCount : VK_SAMPLE_COUNT_1_BIT;
1057
1058
if (RenderPassTypeHasMultisample(rpType) && fbSampleCount != graphicsPipeline->SampleCount()) {
1059
// should have been invalidated.
1060
_assert_msg_(graphicsPipeline->SampleCount() == VK_SAMPLE_COUNT_FLAG_BITS_MAX_ENUM,
1061
"expected %d sample count, got %d", fbSampleCount, graphicsPipeline->SampleCount());
1062
}
1063
1064
if (!graphicsPipeline->pipeline[(size_t)rpType]) {
1065
// NOTE: If render steps got merged, it can happen that, as they ended during recording,
1066
// they didn't know their final render pass type so they created the wrong pipelines in EndCurRenderStep().
1067
// Unfortunately I don't know if we can fix it in any more sensible place than here.
1068
// Maybe a middle pass. But let's try to just block and compile here for now, this doesn't
1069
// happen all that much.
1070
graphicsPipeline->pipeline[(size_t)rpType] = Promise<VkPipeline>::CreateEmpty();
1071
graphicsPipeline->Create(vulkan_, renderPass->Get(vulkan_, rpType, fbSampleCount), rpType, fbSampleCount, time_now_d(), -1);
1072
}
1073
1074
VkPipeline pipeline = graphicsPipeline->pipeline[(size_t)rpType]->BlockUntilReady();
1075
1076
if (pipeline != VK_NULL_HANDLE) {
1077
vkCmdBindPipeline(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);
1078
descSets = &c.graphics_pipeline.pipelineLayout->frameData[curFrame].descSets_;
1079
pipelineLayout = c.graphics_pipeline.pipelineLayout->pipelineLayout;
1080
_dbg_assert_(pipelineLayout != VK_NULL_HANDLE);
1081
lastGraphicsPipeline = graphicsPipeline;
1082
pipelineOK = true;
1083
} else {
1084
pipelineOK = false;
1085
}
1086
1087
// Reset dynamic state so it gets refreshed with the new pipeline.
1088
lastStencilWriteMask = -1;
1089
lastStencilCompareMask = -1;
1090
lastStencilReference = -1;
1091
}
1092
break;
1093
}
1094
1095
case VKRRenderCommand::VIEWPORT:
1096
if (fb != nullptr) {
1097
vkCmdSetViewport(cmd, 0, 1, &c.viewport.vp);
1098
} else {
1099
const VkViewport &vp = c.viewport.vp;
1100
DisplayRect<float> rc{ vp.x, vp.y, vp.width, vp.height };
1101
RotateRectToDisplay(rc, (float)vulkan_->GetBackbufferWidth(), (float)vulkan_->GetBackbufferHeight());
1102
VkViewport final_vp;
1103
final_vp.x = rc.x;
1104
final_vp.y = rc.y;
1105
final_vp.width = rc.w;
1106
final_vp.height = rc.h;
1107
final_vp.maxDepth = vp.maxDepth;
1108
final_vp.minDepth = vp.minDepth;
1109
vkCmdSetViewport(cmd, 0, 1, &final_vp);
1110
}
1111
break;
1112
1113
case VKRRenderCommand::SCISSOR:
1114
{
1115
if (fb != nullptr) {
1116
vkCmdSetScissor(cmd, 0, 1, &c.scissor.scissor);
1117
} else {
1118
// Rendering to backbuffer. Might need to rotate.
1119
const VkRect2D &rc = c.scissor.scissor;
1120
DisplayRect<int> rotated_rc{ rc.offset.x, rc.offset.y, (int)rc.extent.width, (int)rc.extent.height };
1121
RotateRectToDisplay(rotated_rc, vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
1122
_dbg_assert_(rotated_rc.x >= 0);
1123
_dbg_assert_(rotated_rc.y >= 0);
1124
VkRect2D finalRect = VkRect2D{ { rotated_rc.x, rotated_rc.y }, { (uint32_t)rotated_rc.w, (uint32_t)rotated_rc.h} };
1125
vkCmdSetScissor(cmd, 0, 1, &finalRect);
1126
}
1127
break;
1128
}
1129
1130
case VKRRenderCommand::BLEND:
1131
{
1132
float bc[4];
1133
Uint8x4ToFloat4(bc, c.blendColor.color);
1134
vkCmdSetBlendConstants(cmd, bc);
1135
break;
1136
}
1137
1138
case VKRRenderCommand::PUSH_CONSTANTS:
1139
if (pipelineOK) {
1140
vkCmdPushConstants(cmd, pipelineLayout, c.push.stages, c.push.offset, c.push.size, c.push.data);
1141
}
1142
break;
1143
1144
case VKRRenderCommand::STENCIL:
1145
if (lastStencilWriteMask != c.stencil.stencilWriteMask) {
1146
lastStencilWriteMask = (int)c.stencil.stencilWriteMask;
1147
vkCmdSetStencilWriteMask(cmd, VK_STENCIL_FRONT_AND_BACK, c.stencil.stencilWriteMask);
1148
}
1149
if (lastStencilCompareMask != c.stencil.stencilCompareMask) {
1150
lastStencilCompareMask = c.stencil.stencilCompareMask;
1151
vkCmdSetStencilCompareMask(cmd, VK_STENCIL_FRONT_AND_BACK, c.stencil.stencilCompareMask);
1152
}
1153
if (lastStencilReference != c.stencil.stencilRef) {
1154
lastStencilReference = c.stencil.stencilRef;
1155
vkCmdSetStencilReference(cmd, VK_STENCIL_FRONT_AND_BACK, c.stencil.stencilRef);
1156
}
1157
break;
1158
1159
case VKRRenderCommand::DRAW_INDEXED:
1160
if (pipelineOK) {
1161
VkDescriptorSet set = (*descSets)[c.drawIndexed.descSetIndex].set;
1162
_dbg_assert_(set != VK_NULL_HANDLE);
1163
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.drawIndexed.numUboOffsets, c.drawIndexed.uboOffsets);
1164
vkCmdBindIndexBuffer(cmd, c.drawIndexed.ibuffer, c.drawIndexed.ioffset, VK_INDEX_TYPE_UINT16);
1165
VkDeviceSize voffset = c.drawIndexed.voffset;
1166
vkCmdBindVertexBuffers(cmd, 0, 1, &c.drawIndexed.vbuffer, &voffset);
1167
vkCmdDrawIndexed(cmd, c.drawIndexed.count, c.drawIndexed.instances, 0, 0, 0);
1168
}
1169
break;
1170
1171
case VKRRenderCommand::DRAW:
1172
if (pipelineOK) {
1173
VkDescriptorSet set = (*descSets)[c.drawIndexed.descSetIndex].set;
1174
_dbg_assert_(set != VK_NULL_HANDLE);
1175
vkCmdBindDescriptorSets(cmd, VK_PIPELINE_BIND_POINT_GRAPHICS, pipelineLayout, 0, 1, &set, c.draw.numUboOffsets, c.draw.uboOffsets);
1176
if (c.draw.vbuffer) {
1177
vkCmdBindVertexBuffers(cmd, 0, 1, &c.draw.vbuffer, &c.draw.voffset);
1178
}
1179
vkCmdDraw(cmd, c.draw.count, 1, c.draw.offset, 0);
1180
}
1181
break;
1182
1183
case VKRRenderCommand::CLEAR:
1184
{
1185
// If we get here, we failed to merge a clear into a render pass load op. This is bad for perf.
1186
int numAttachments = 0;
1187
VkClearRect rc{};
1188
rc.baseArrayLayer = 0;
1189
rc.layerCount = 1; // In multiview mode, 1 means to replicate to all the active layers.
1190
rc.rect.extent.width = (uint32_t)curWidth;
1191
rc.rect.extent.height = (uint32_t)curHeight;
1192
VkClearAttachment attachments[2]{};
1193
if (c.clear.clearMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1194
VkClearAttachment &attachment = attachments[numAttachments++];
1195
attachment.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1196
attachment.colorAttachment = 0;
1197
Uint8x4ToFloat4(attachment.clearValue.color.float32, c.clear.clearColor);
1198
}
1199
if (c.clear.clearMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1200
VkClearAttachment &attachment = attachments[numAttachments++];
1201
attachment.aspectMask = 0;
1202
if (c.clear.clearMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1203
attachment.clearValue.depthStencil.depth = c.clear.clearZ;
1204
attachment.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1205
}
1206
if (c.clear.clearMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
1207
attachment.clearValue.depthStencil.stencil = (uint32_t)c.clear.clearStencil;
1208
attachment.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1209
}
1210
}
1211
if (numAttachments) {
1212
vkCmdClearAttachments(cmd, numAttachments, attachments, 1, &rc);
1213
}
1214
break;
1215
}
1216
1217
case VKRRenderCommand::DEBUG_ANNOTATION:
1218
if (vulkan_->Extensions().EXT_debug_utils) {
1219
VkDebugUtilsLabelEXT labelInfo{ VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT };
1220
labelInfo.pLabelName = c.debugAnnotation.annotation;
1221
vkCmdInsertDebugUtilsLabelEXT(cmd, &labelInfo);
1222
}
1223
break;
1224
1225
default:
1226
ERROR_LOG(Log::G3D, "Unimpl queue command");
1227
break;
1228
}
1229
}
1230
vkCmdEndRenderPass(cmd);
1231
1232
_dbg_assert_(recordBarrier_.empty());
1233
1234
if (fb) {
1235
// If the desired final layout aren't the optimal layout needed next, early-transition the image.
1236
if (step.render.finalColorLayout != fb->color.layout) {
1237
recordBarrier_.TransitionColorImageAuto(&fb->color, step.render.finalColorLayout);
1238
}
1239
if (fb->depth.image && step.render.finalDepthStencilLayout != fb->depth.layout) {
1240
recordBarrier_.TransitionDepthStencilImageAuto(&fb->depth, step.render.finalDepthStencilLayout);
1241
}
1242
}
1243
}
1244
1245
VKRRenderPass *VulkanQueueRunner::PerformBindFramebufferAsRenderTarget(const VKRStep &step, VkCommandBuffer cmd) {
1246
VKRRenderPass *renderPass;
1247
int numClearVals = 0;
1248
VkClearValue clearVal[4]{};
1249
VkFramebuffer framebuf;
1250
int w;
1251
int h;
1252
1253
bool hasDepth = RenderPassTypeHasDepth(step.render.renderPassType);
1254
1255
VkSampleCountFlagBits sampleCount;
1256
1257
// Can be used to separate the final*Layout barrier from the rest for debugging in renderdoc.
1258
// recordBarrier_.Flush(cmd);
1259
1260
if (step.render.framebuffer) {
1261
_dbg_assert_(step.render.finalColorLayout != VK_IMAGE_LAYOUT_UNDEFINED);
1262
_dbg_assert_(step.render.finalDepthStencilLayout != VK_IMAGE_LAYOUT_UNDEFINED);
1263
1264
RPKey key{
1265
step.render.colorLoad, step.render.depthLoad, step.render.stencilLoad,
1266
step.render.colorStore, step.render.depthStore, step.render.stencilStore,
1267
};
1268
renderPass = GetRenderPass(key);
1269
1270
VKRFramebuffer *fb = step.render.framebuffer;
1271
framebuf = fb->Get(renderPass, step.render.renderPassType);
1272
sampleCount = fb->sampleCount;
1273
_dbg_assert_(framebuf != VK_NULL_HANDLE);
1274
w = fb->width;
1275
h = fb->height;
1276
1277
// Mali driver on S8 (Android O) and S9 mishandles renderpasses that do just a clear
1278
// and then no draw calls. Memory transaction elimination gets mis-flagged or something.
1279
// To avoid this, we transition to GENERAL and back in this case (ARM-approved workaround).
1280
// See pull request #10723.
1281
bool maliBugWorkaround = step.render.numDraws == 0 &&
1282
step.render.colorLoad == VKRRenderPassLoadAction::CLEAR &&
1283
vulkan_->GetPhysicalDeviceProperties().properties.driverVersion == 0xaa9c4b29;
1284
if (maliBugWorkaround) {
1285
// A little suboptimal but let's go for maximum safety here.
1286
recordBarrier_.TransitionImage(fb->color.image, 0, 1, fb->numLayers, VK_IMAGE_ASPECT_COLOR_BIT,
1287
fb->color.layout, VK_IMAGE_LAYOUT_GENERAL,
1288
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1289
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1290
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
1291
fb->color.layout = VK_IMAGE_LAYOUT_GENERAL;
1292
}
1293
1294
recordBarrier_.TransitionColorImageAuto(&fb->color, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL);
1295
1296
// If the render pass doesn't touch depth, we can avoid a layout transition of the depth buffer.
1297
if (fb->depth.image && RenderPassTypeHasDepth(step.render.renderPassType)) {
1298
recordBarrier_.TransitionDepthStencilImageAuto(&fb->depth, VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL);
1299
}
1300
1301
// The transition from the optimal format happens after EndRenderPass, now that we don't
1302
// do it as part of the renderpass itself anymore.
1303
1304
if (sampleCount != VK_SAMPLE_COUNT_1_BIT) {
1305
// We don't initialize values for these.
1306
numClearVals = hasDepth ? 2 : 1; // Skip the resolve buffers, don't need to clear those.
1307
}
1308
if (step.render.colorLoad == VKRRenderPassLoadAction::CLEAR) {
1309
Uint8x4ToFloat4(clearVal[numClearVals].color.float32, step.render.clearColor);
1310
}
1311
numClearVals++;
1312
if (hasDepth) {
1313
if (step.render.depthLoad == VKRRenderPassLoadAction::CLEAR || step.render.stencilLoad == VKRRenderPassLoadAction::CLEAR) {
1314
clearVal[numClearVals].depthStencil.depth = step.render.clearDepth;
1315
clearVal[numClearVals].depthStencil.stencil = step.render.clearStencil;
1316
}
1317
numClearVals++;
1318
}
1319
_dbg_assert_(numClearVals != 3);
1320
} else {
1321
RPKey key{
1322
VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR, VKRRenderPassLoadAction::CLEAR,
1323
VKRRenderPassStoreAction::STORE, VKRRenderPassStoreAction::DONT_CARE, VKRRenderPassStoreAction::DONT_CARE,
1324
};
1325
renderPass = GetRenderPass(key);
1326
1327
if (IsVREnabled()) {
1328
framebuf = (VkFramebuffer)BindVRFramebuffer();
1329
} else {
1330
framebuf = backbuffer_;
1331
}
1332
1333
// Raw, rotated backbuffer size.
1334
w = vulkan_->GetBackbufferWidth();
1335
h = vulkan_->GetBackbufferHeight();
1336
1337
Uint8x4ToFloat4(clearVal[0].color.float32, step.render.clearColor);
1338
numClearVals = hasDepth ? 2 : 1; // We might do depth-less backbuffer in the future, though doubtful of the value.
1339
clearVal[1].depthStencil.depth = 0.0f;
1340
clearVal[1].depthStencil.stencil = 0;
1341
sampleCount = VK_SAMPLE_COUNT_1_BIT;
1342
}
1343
1344
VkRenderPassBeginInfo rp_begin = { VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO };
1345
rp_begin.renderPass = renderPass->Get(vulkan_, step.render.renderPassType, sampleCount);
1346
rp_begin.framebuffer = framebuf;
1347
1348
VkRect2D rc = step.render.renderArea;
1349
if (!step.render.framebuffer) {
1350
// Rendering to backbuffer, must rotate, just like scissors.
1351
DisplayRect<int> rotated_rc{ rc.offset.x, rc.offset.y, (int)rc.extent.width, (int)rc.extent.height };
1352
RotateRectToDisplay(rotated_rc, vulkan_->GetBackbufferWidth(), vulkan_->GetBackbufferHeight());
1353
1354
rc.offset.x = rotated_rc.x;
1355
rc.offset.y = rotated_rc.y;
1356
rc.extent.width = rotated_rc.w;
1357
rc.extent.height = rotated_rc.h;
1358
}
1359
1360
recordBarrier_.Flush(cmd);
1361
1362
rp_begin.renderArea = rc;
1363
rp_begin.clearValueCount = numClearVals;
1364
rp_begin.pClearValues = numClearVals ? clearVal : nullptr;
1365
vkCmdBeginRenderPass(cmd, &rp_begin, VK_SUBPASS_CONTENTS_INLINE);
1366
1367
return renderPass;
1368
}
1369
1370
void VulkanQueueRunner::PerformCopy(const VKRStep &step, VkCommandBuffer cmd) {
1371
// The barrier code doesn't handle this case. We'd need to transition to GENERAL to do an intra-image copy.
1372
_dbg_assert_(step.copy.src != step.copy.dst);
1373
1374
VKRFramebuffer *src = step.copy.src;
1375
VKRFramebuffer *dst = step.copy.dst;
1376
1377
int layerCount = std::min(step.copy.src->numLayers, step.copy.dst->numLayers);
1378
_dbg_assert_(step.copy.src->numLayers >= step.copy.dst->numLayers);
1379
1380
// TODO: If dst covers exactly the whole destination, we can set up a UNDEFINED->TRANSFER_DST_OPTIMAL transition,
1381
// which can potentially be more efficient.
1382
1383
if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1384
recordBarrier_.TransitionColorImageAuto(&src->color, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1385
recordBarrier_.TransitionColorImageAuto(&dst->color, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1386
}
1387
1388
// We can't copy only depth or only stencil unfortunately - or can we?.
1389
if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1390
_dbg_assert_(src->depth.image != VK_NULL_HANDLE);
1391
1392
recordBarrier_.TransitionDepthStencilImageAuto(&src->depth, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1393
if (dst->depth.layout != VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL) {
1394
recordBarrier_.TransitionDepthStencilImageAuto(&dst->depth, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1395
} else {
1396
// Kingdom Hearts: Subsequent copies twice to the same depth buffer without any other use.
1397
// Not super sure how that happens, but we need a barrier to pass sync validation.
1398
SetupTransferDstWriteAfterWrite(dst->depth, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, &recordBarrier_);
1399
}
1400
}
1401
1402
bool multisampled = src->sampleCount != VK_SAMPLE_COUNT_1_BIT && dst->sampleCount != VK_SAMPLE_COUNT_1_BIT;
1403
if (multisampled) {
1404
// If both the targets are multisampled, copy the msaa targets too.
1405
// For that, we need to transition them from their normally permanent VK_*_ATTACHMENT_OPTIMAL layouts, and then back.
1406
if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1407
recordBarrier_.TransitionColorImageAuto(&src->msaaColor, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1408
recordBarrier_.TransitionColorImageAuto(&dst->msaaColor, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1409
}
1410
if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1411
// Kingdom Hearts: Subsequent copies to the same depth buffer without any other use.
1412
// Not super sure how that happens, but we need a barrier to pass sync validation.
1413
recordBarrier_.TransitionDepthStencilImageAuto(&src->msaaDepth, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1414
recordBarrier_.TransitionDepthStencilImageAuto(&dst->msaaDepth, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1415
}
1416
}
1417
1418
recordBarrier_.Flush(cmd);
1419
1420
VkImageCopy copy{};
1421
copy.srcOffset.x = step.copy.srcRect.offset.x;
1422
copy.srcOffset.y = step.copy.srcRect.offset.y;
1423
copy.srcOffset.z = 0;
1424
copy.srcSubresource.mipLevel = 0;
1425
copy.srcSubresource.layerCount = layerCount;
1426
copy.dstOffset.x = step.copy.dstPos.x;
1427
copy.dstOffset.y = step.copy.dstPos.y;
1428
copy.dstOffset.z = 0;
1429
copy.dstSubresource.mipLevel = 0;
1430
copy.dstSubresource.layerCount = layerCount;
1431
copy.extent.width = step.copy.srcRect.extent.width;
1432
copy.extent.height = step.copy.srcRect.extent.height;
1433
copy.extent.depth = 1;
1434
1435
if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1436
copy.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1437
copy.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1438
vkCmdCopyImage(cmd, src->color.image, src->color.layout, dst->color.image, dst->color.layout, 1, &copy);
1439
1440
if (multisampled) {
1441
vkCmdCopyImage(cmd, src->msaaColor.image, src->msaaColor.layout, dst->msaaColor.image, dst->msaaColor.layout, 1, &copy);
1442
}
1443
}
1444
if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1445
_dbg_assert_(src->depth.image != VK_NULL_HANDLE);
1446
_dbg_assert_(dst->depth.image != VK_NULL_HANDLE);
1447
copy.srcSubresource.aspectMask = step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
1448
copy.dstSubresource.aspectMask = step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT);
1449
vkCmdCopyImage(cmd, src->depth.image, src->depth.layout, dst->depth.image, dst->depth.layout, 1, &copy);
1450
1451
if (multisampled) {
1452
vkCmdCopyImage(cmd, src->msaaDepth.image, src->msaaDepth.layout, dst->msaaDepth.image, dst->msaaDepth.layout, 1, &copy);
1453
}
1454
}
1455
1456
if (multisampled) {
1457
// Transition the MSAA surfaces back to optimal.
1458
if (step.copy.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1459
recordBarrier_.TransitionImage(
1460
src->msaaColor.image,
1461
0,
1462
1,
1463
src->msaaColor.numLayers,
1464
VK_IMAGE_ASPECT_COLOR_BIT,
1465
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1466
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1467
VK_ACCESS_TRANSFER_READ_BIT,
1468
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1469
VK_PIPELINE_STAGE_TRANSFER_BIT,
1470
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
1471
);
1472
src->msaaColor.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1473
recordBarrier_.TransitionImage(
1474
dst->msaaColor.image,
1475
0,
1476
1,
1477
dst->msaaColor.numLayers,
1478
VK_IMAGE_ASPECT_COLOR_BIT,
1479
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1480
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
1481
VK_ACCESS_TRANSFER_WRITE_BIT,
1482
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
1483
VK_PIPELINE_STAGE_TRANSFER_BIT,
1484
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT
1485
);
1486
dst->msaaColor.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
1487
}
1488
if (step.copy.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1489
recordBarrier_.TransitionImage(
1490
src->msaaDepth.image,
1491
0,
1492
1,
1493
src->msaaDepth.numLayers,
1494
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
1495
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1496
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1497
VK_ACCESS_TRANSFER_READ_BIT,
1498
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
1499
VK_PIPELINE_STAGE_TRANSFER_BIT,
1500
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
1501
);
1502
src->msaaDepth.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1503
recordBarrier_.TransitionImage(
1504
dst->msaaDepth.image,
1505
0,
1506
1,
1507
dst->msaaDepth.numLayers,
1508
VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT,
1509
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1510
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,
1511
VK_ACCESS_TRANSFER_WRITE_BIT,
1512
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT | VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,
1513
VK_PIPELINE_STAGE_TRANSFER_BIT,
1514
VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT | VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT
1515
);
1516
dst->msaaDepth.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
1517
}
1518
// Probably not necessary.
1519
recordBarrier_.Flush(cmd);
1520
}
1521
}
1522
1523
void VulkanQueueRunner::PerformBlit(const VKRStep &step, VkCommandBuffer cmd) {
1524
// The barrier code doesn't handle this case. We'd need to transition to GENERAL to do an intra-image copy.
1525
_dbg_assert_(step.blit.src != step.blit.dst);
1526
1527
int layerCount = std::min(step.blit.src->numLayers, step.blit.dst->numLayers);
1528
_dbg_assert_(step.blit.src->numLayers >= step.blit.dst->numLayers);
1529
1530
VKRFramebuffer *src = step.blit.src;
1531
VKRFramebuffer *dst = step.blit.dst;
1532
1533
// First source barriers.
1534
if (step.blit.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1535
recordBarrier_.TransitionColorImageAuto(&src->color, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1536
recordBarrier_.TransitionColorImageAuto(&dst->color, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1537
}
1538
1539
// We can't copy only depth or only stencil unfortunately.
1540
if (step.blit.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1541
_assert_(src->depth.image != VK_NULL_HANDLE);
1542
_assert_(dst->depth.image != VK_NULL_HANDLE);
1543
recordBarrier_.TransitionDepthStencilImageAuto(&src->depth, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1544
recordBarrier_.TransitionDepthStencilImageAuto(&dst->depth, VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1545
}
1546
1547
recordBarrier_.Flush(cmd);
1548
1549
// If any validation needs to be performed here, it should probably have been done
1550
// already when the blit was queued. So don't validate here.
1551
VkImageBlit blit{};
1552
blit.srcOffsets[0].x = step.blit.srcRect.offset.x;
1553
blit.srcOffsets[0].y = step.blit.srcRect.offset.y;
1554
blit.srcOffsets[0].z = 0;
1555
blit.srcOffsets[1].x = step.blit.srcRect.offset.x + step.blit.srcRect.extent.width;
1556
blit.srcOffsets[1].y = step.blit.srcRect.offset.y + step.blit.srcRect.extent.height;
1557
blit.srcOffsets[1].z = 1;
1558
blit.srcSubresource.mipLevel = 0;
1559
blit.srcSubresource.layerCount = layerCount;
1560
blit.dstOffsets[0].x = step.blit.dstRect.offset.x;
1561
blit.dstOffsets[0].y = step.blit.dstRect.offset.y;
1562
blit.dstOffsets[0].z = 0;
1563
blit.dstOffsets[1].x = step.blit.dstRect.offset.x + step.blit.dstRect.extent.width;
1564
blit.dstOffsets[1].y = step.blit.dstRect.offset.y + step.blit.dstRect.extent.height;
1565
blit.dstOffsets[1].z = 1;
1566
blit.dstSubresource.mipLevel = 0;
1567
blit.dstSubresource.layerCount = layerCount;
1568
1569
if (step.blit.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1570
blit.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1571
blit.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1572
vkCmdBlitImage(cmd, src->color.image, src->color.layout, dst->color.image, dst->color.layout, 1, &blit, step.blit.filter);
1573
}
1574
1575
// TODO: Need to check if the depth format is blittable.
1576
// Actually, we should probably almost always use copies rather than blits for depth buffers.
1577
if (step.blit.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1578
blit.srcSubresource.aspectMask = 0;
1579
blit.dstSubresource.aspectMask = 0;
1580
if (step.blit.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1581
blit.srcSubresource.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1582
blit.dstSubresource.aspectMask |= VK_IMAGE_ASPECT_DEPTH_BIT;
1583
}
1584
if (step.blit.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
1585
blit.srcSubresource.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1586
blit.dstSubresource.aspectMask |= VK_IMAGE_ASPECT_STENCIL_BIT;
1587
}
1588
vkCmdBlitImage(cmd, src->depth.image, src->depth.layout, dst->depth.image, dst->depth.layout, 1, &blit, step.blit.filter);
1589
}
1590
}
1591
1592
void VulkanQueueRunner::SetupTransferDstWriteAfterWrite(VKRImage &img, VkImageAspectFlags aspect, VulkanBarrierBatch *recordBarrier) {
1593
VkImageAspectFlags imageAspect = aspect;
1594
VkAccessFlags srcAccessMask = 0;
1595
VkPipelineStageFlags srcStageMask = 0;
1596
if (img.format == VK_FORMAT_D16_UNORM_S8_UINT || img.format == VK_FORMAT_D24_UNORM_S8_UINT || img.format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
1597
// Barrier must specify both for combined depth/stencil buffers.
1598
imageAspect = VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT;
1599
} else {
1600
imageAspect = aspect;
1601
}
1602
_dbg_assert_(img.layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL);
1603
srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;
1604
srcStageMask = VK_PIPELINE_STAGE_TRANSFER_BIT;
1605
recordBarrier->TransitionImage(
1606
img.image,
1607
0,
1608
1,
1609
img.numLayers,
1610
aspect,
1611
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1612
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
1613
VK_ACCESS_TRANSFER_WRITE_BIT,
1614
VK_ACCESS_TRANSFER_WRITE_BIT,
1615
VK_PIPELINE_STAGE_TRANSFER_BIT,
1616
VK_PIPELINE_STAGE_TRANSFER_BIT
1617
);
1618
}
1619
1620
void VulkanQueueRunner::ResizeReadbackBuffer(CachedReadback *readback, VkDeviceSize requiredSize) {
1621
if (readback->buffer && requiredSize <= readback->bufferSize) {
1622
return;
1623
}
1624
1625
if (readback->buffer) {
1626
vulkan_->Delete().QueueDeleteBufferAllocation(readback->buffer, readback->allocation);
1627
}
1628
1629
readback->bufferSize = requiredSize;
1630
1631
VkDevice device = vulkan_->GetDevice();
1632
1633
VkBufferCreateInfo buf{ VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO };
1634
buf.size = readback->bufferSize;
1635
buf.usage = VK_BUFFER_USAGE_TRANSFER_DST_BIT;
1636
1637
VmaAllocationCreateInfo allocCreateInfo{};
1638
allocCreateInfo.usage = VMA_MEMORY_USAGE_GPU_TO_CPU;
1639
VmaAllocationInfo allocInfo{};
1640
1641
VkResult res = vmaCreateBuffer(vulkan_->Allocator(), &buf, &allocCreateInfo, &readback->buffer, &readback->allocation, &allocInfo);
1642
_assert_(res == VK_SUCCESS);
1643
1644
const VkMemoryType &memoryType = vulkan_->GetMemoryProperties().memoryTypes[allocInfo.memoryType];
1645
readback->isCoherent = (memoryType.propertyFlags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT) != 0;
1646
}
1647
1648
void VulkanQueueRunner::PerformReadback(const VKRStep &step, VkCommandBuffer cmd, FrameData &frameData) {
1649
VkImage image;
1650
VkImageLayout copyLayout;
1651
// Special case for backbuffer readbacks.
1652
if (step.readback.src == nullptr) {
1653
// We only take screenshots after the main render pass (anything else would be stupid) so we need to transition out of PRESENT,
1654
// and then back into it.
1655
// Regarding layers, backbuffer currently only has one layer.
1656
recordBarrier_.TransitionImage(backbufferImage_, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT,
1657
VK_IMAGE_LAYOUT_PRESENT_SRC_KHR, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
1658
0, VK_ACCESS_TRANSFER_READ_BIT,
1659
VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT);
1660
copyLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1661
image = backbufferImage_;
1662
} else {
1663
VKRImage *srcImage;
1664
if (step.readback.aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1665
srcImage = &step.readback.src->color;
1666
recordBarrier_.TransitionColorImageAuto(srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1667
} else if (step.readback.aspectMask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) {
1668
srcImage = &step.readback.src->depth;
1669
recordBarrier_.TransitionDepthStencilImageAuto(srcImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
1670
_dbg_assert_(srcImage->image != VK_NULL_HANDLE);
1671
} else {
1672
_dbg_assert_msg_(false, "No image aspect to readback?");
1673
return;
1674
}
1675
image = srcImage->image;
1676
copyLayout = srcImage->layout;
1677
}
1678
1679
recordBarrier_.Flush(cmd);
1680
1681
// TODO: Handle different readback formats!
1682
u32 readbackSizeInBytes = sizeof(uint32_t) * step.readback.srcRect.extent.width * step.readback.srcRect.extent.height;
1683
1684
CachedReadback *cached = nullptr;
1685
1686
if (step.readback.delayed) {
1687
ReadbackKey key;
1688
key.framebuf = step.readback.src;
1689
key.width = step.readback.srcRect.extent.width;
1690
key.height = step.readback.srcRect.extent.height;
1691
1692
// See if there's already a buffer we can reuse
1693
if (!frameData.readbacks_.Get(key, &cached)) {
1694
cached = new CachedReadback();
1695
cached->bufferSize = 0;
1696
frameData.readbacks_.Insert(key, cached);
1697
}
1698
} else {
1699
cached = &syncReadback_;
1700
}
1701
1702
ResizeReadbackBuffer(cached, readbackSizeInBytes);
1703
1704
VkBufferImageCopy region{};
1705
region.imageOffset = { step.readback.srcRect.offset.x, step.readback.srcRect.offset.y, 0 };
1706
region.imageExtent = { step.readback.srcRect.extent.width, step.readback.srcRect.extent.height, 1 };
1707
region.imageSubresource.aspectMask = step.readback.aspectMask;
1708
region.imageSubresource.layerCount = 1;
1709
region.bufferOffset = 0;
1710
region.bufferRowLength = step.readback.srcRect.extent.width;
1711
region.bufferImageHeight = step.readback.srcRect.extent.height;
1712
1713
vkCmdCopyImageToBuffer(cmd, image, copyLayout, cached->buffer, 1, &region);
1714
1715
// NOTE: Can't read the buffer using the CPU here - need to sync first.
1716
1717
// If we copied from the backbuffer, transition it back.
1718
if (step.readback.src == nullptr) {
1719
// We only take screenshots after the main render pass (anything else would be stupid) so we need to transition out of PRESENT,
1720
// and then back into it.
1721
// Regarding layers, backbuffer currently only has one layer.
1722
recordBarrier_.TransitionImage(backbufferImage_, 0, 1, 1, VK_IMAGE_ASPECT_COLOR_BIT,
1723
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_PRESENT_SRC_KHR,
1724
VK_ACCESS_TRANSFER_READ_BIT, 0,
1725
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
1726
recordBarrier_.Flush(cmd); // probably not needed
1727
copyLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;
1728
}
1729
}
1730
1731
void VulkanQueueRunner::PerformReadbackImage(const VKRStep &step, VkCommandBuffer cmd) {
1732
// TODO: Clean this up - just reusing `SetupTransitionToTransferSrc`.
1733
VkImageLayout layout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
1734
recordBarrier_.TransitionColorImageAuto(step.readback_image.image, &layout, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, 0, 1, 1);
1735
recordBarrier_.Flush(cmd);
1736
1737
ResizeReadbackBuffer(&syncReadback_, sizeof(uint32_t) * step.readback_image.srcRect.extent.width * step.readback_image.srcRect.extent.height);
1738
1739
VkBufferImageCopy region{};
1740
region.imageOffset = { step.readback_image.srcRect.offset.x, step.readback_image.srcRect.offset.y, 0 };
1741
region.imageExtent = { step.readback_image.srcRect.extent.width, step.readback_image.srcRect.extent.height, 1 };
1742
region.imageSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
1743
region.imageSubresource.layerCount = 1;
1744
region.imageSubresource.mipLevel = step.readback_image.mipLevel;
1745
region.bufferOffset = 0;
1746
region.bufferRowLength = step.readback_image.srcRect.extent.width;
1747
region.bufferImageHeight = step.readback_image.srcRect.extent.height;
1748
vkCmdCopyImageToBuffer(cmd, step.readback_image.image, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, syncReadback_.buffer, 1, &region);
1749
1750
// Now transfer it back to a texture.
1751
recordBarrier_.TransitionImage(step.readback_image.image, 0, 1, 1, // I don't think we have any multilayer cases for regular textures. Above in PerformReadback, though..
1752
VK_IMAGE_ASPECT_COLOR_BIT,
1753
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL,
1754
VK_ACCESS_TRANSFER_READ_BIT, VK_ACCESS_SHADER_READ_BIT,
1755
VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
1756
recordBarrier_.Flush(cmd); // probably not needed
1757
1758
// NOTE: Can't read the buffer using the CPU here - need to sync first.
1759
// Doing that will also act like a heavyweight barrier ensuring that device writes are visible on the host.
1760
}
1761
1762
bool VulkanQueueRunner::CopyReadbackBuffer(FrameData &frameData, VKRFramebuffer *src, int width, int height, Draw::DataFormat srcFormat, Draw::DataFormat destFormat, int pixelStride, uint8_t *pixels) {
1763
CachedReadback *readback = &syncReadback_;
1764
1765
// Look up in readback cache.
1766
if (src) {
1767
ReadbackKey key;
1768
key.framebuf = src;
1769
key.width = width;
1770
key.height = height;
1771
CachedReadback *cached;
1772
if (frameData.readbacks_.Get(key, &cached)) {
1773
readback = cached;
1774
} else {
1775
// Didn't have a cached image ready yet
1776
return false;
1777
}
1778
}
1779
1780
if (!readback->buffer)
1781
return false; // Didn't find anything in cache, or something has gone really wrong.
1782
1783
// Read back to the requested address in ram from buffer.
1784
void *mappedData;
1785
const size_t srcPixelSize = DataFormatSizeInBytes(srcFormat);
1786
VkResult res = vmaMapMemory(vulkan_->Allocator(), readback->allocation, &mappedData);
1787
1788
if (res != VK_SUCCESS) {
1789
ERROR_LOG(Log::G3D, "CopyReadbackBuffer: vkMapMemory failed! result=%d", (int)res);
1790
return false;
1791
}
1792
1793
if (!readback->isCoherent) {
1794
vmaInvalidateAllocation(vulkan_->Allocator(), readback->allocation, 0, width * height * srcPixelSize);
1795
}
1796
1797
// TODO: Perform these conversions in a compute shader on the GPU.
1798
if (srcFormat == Draw::DataFormat::R8G8B8A8_UNORM) {
1799
ConvertFromRGBA8888(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, destFormat);
1800
} else if (srcFormat == Draw::DataFormat::B8G8R8A8_UNORM) {
1801
ConvertFromBGRA8888(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, destFormat);
1802
} else if (srcFormat == destFormat) {
1803
// Can just memcpy when it matches no matter the format!
1804
uint8_t *dst = pixels;
1805
const uint8_t *src = (const uint8_t *)mappedData;
1806
for (int y = 0; y < height; ++y) {
1807
memcpy(dst, src, width * srcPixelSize);
1808
src += width * srcPixelSize;
1809
dst += pixelStride * srcPixelSize;
1810
}
1811
} else if (destFormat == Draw::DataFormat::D32F) {
1812
ConvertToD32F(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, srcFormat);
1813
} else if (destFormat == Draw::DataFormat::D16) {
1814
ConvertToD16(pixels, (const uint8_t *)mappedData, pixelStride, width, width, height, srcFormat);
1815
} else {
1816
// TODO: Maybe a depth conversion or something?
1817
ERROR_LOG(Log::G3D, "CopyReadbackBuffer: Unknown format");
1818
_assert_msg_(false, "CopyReadbackBuffer: Unknown src format %d", (int)srcFormat);
1819
}
1820
1821
vmaUnmapMemory(vulkan_->Allocator(), readback->allocation);
1822
return true;
1823
}
1824
1825
const char *VKRRenderCommandToString(VKRRenderCommand cmd) {
1826
const char * const str[] = {
1827
"REMOVED",
1828
"BIND_GRAPHICS_PIPELINE", // async
1829
"STENCIL",
1830
"BLEND",
1831
"VIEWPORT",
1832
"SCISSOR",
1833
"CLEAR",
1834
"DRAW",
1835
"DRAW_INDEXED",
1836
"PUSH_CONSTANTS",
1837
"DEBUG_ANNOTATION",
1838
};
1839
if ((int)cmd < ARRAY_SIZE(str)) {
1840
return str[(int)cmd];
1841
} else {
1842
return "N/A";
1843
}
1844
}
1845
1846