Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
godotengine
GitHub Repository: godotengine/godot
Path: blob/master/drivers/vulkan/rendering_device_driver_vulkan.cpp
20941 views
1
/**************************************************************************/
2
/* rendering_device_driver_vulkan.cpp */
3
/**************************************************************************/
4
/* This file is part of: */
5
/* GODOT ENGINE */
6
/* https://godotengine.org */
7
/**************************************************************************/
8
/* Copyright (c) 2014-present Godot Engine contributors (see AUTHORS.md). */
9
/* Copyright (c) 2007-2014 Juan Linietsky, Ariel Manzur. */
10
/* */
11
/* Permission is hereby granted, free of charge, to any person obtaining */
12
/* a copy of this software and associated documentation files (the */
13
/* "Software"), to deal in the Software without restriction, including */
14
/* without limitation the rights to use, copy, modify, merge, publish, */
15
/* distribute, sublicense, and/or sell copies of the Software, and to */
16
/* permit persons to whom the Software is furnished to do so, subject to */
17
/* the following conditions: */
18
/* */
19
/* The above copyright notice and this permission notice shall be */
20
/* included in all copies or substantial portions of the Software. */
21
/* */
22
/* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, */
23
/* EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF */
24
/* MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. */
25
/* IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY */
26
/* CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, */
27
/* TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE */
28
/* SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. */
29
/**************************************************************************/
30
31
#include "rendering_device_driver_vulkan.h"
32
33
#include "core/config/project_settings.h"
34
#include "core/io/marshalls.h"
35
#include "vulkan_hooks.h"
36
37
#include "thirdparty/misc/smolv.h"
38
39
#if defined(ANDROID_ENABLED)
40
#include "platform/android/java_godot_wrapper.h"
41
#include "platform/android/os_android.h"
42
#include "platform/android/thread_jandroid.h"
43
#endif
44
45
#if defined(SWAPPY_FRAME_PACING_ENABLED)
46
#include "thirdparty/swappy-frame-pacing/swappyVk.h"
47
#endif
48
49
#define ARRAY_SIZE(a) std_size(a)
50
51
// Disable raytracing support on macOS and iOS due to MoltenVK limitations.
52
#if !(defined(MACOS_ENABLED) || defined(IOS_ENABLED))
53
#define VULKAN_RAYTRACING_ENABLED 1
54
#else
55
#define VULKAN_RAYTRACING_ENABLED 0
56
#endif
57
58
#define PRINT_NATIVE_COMMANDS 0
59
60
// Enable the use of re-spirv for optimizing shaders after applying specialization constants.
61
#define RESPV_ENABLED 1
62
63
// Only enable function inlining for re-spirv when dealing with a shader that uses specialization constants.
64
#define RESPV_ONLY_INLINE_SHADERS_WITH_SPEC_CONSTANTS 1
65
66
// Print additional information about every shader optimized with re-spirv.
67
#define RESPV_VERBOSE 0
68
69
// Disable dead code elimination when using re-spirv.
70
#define RESPV_DONT_REMOVE_DEAD_CODE 0
71
72
// Record numerous statistics about pipeline creation such as time and shader sizes. When combined with enabling
73
// and disabling re-spirv, this can be used to measure its effects.
74
#define RECORD_PIPELINE_STATISTICS 0
75
76
#if RECORD_PIPELINE_STATISTICS
77
#include "core/io/file_access.h"
78
#define RECORD_PIPELINE_STATISTICS_PATH "./pipelines.csv"
79
#endif
80
81
/*****************/
82
/**** GENERIC ****/
83
/*****************/
84
85
#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
86
static const uint32_t BREADCRUMB_BUFFER_ENTRIES = 512u;
87
#endif
88
89
static const uint32_t MAX_DYNAMIC_BUFFERS = 8u; // Minimum guaranteed by Vulkan.
90
91
static const VkFormat RD_TO_VK_FORMAT[RDD::DATA_FORMAT_MAX] = {
92
VK_FORMAT_R4G4_UNORM_PACK8,
93
VK_FORMAT_R4G4B4A4_UNORM_PACK16,
94
VK_FORMAT_B4G4R4A4_UNORM_PACK16,
95
VK_FORMAT_R5G6B5_UNORM_PACK16,
96
VK_FORMAT_B5G6R5_UNORM_PACK16,
97
VK_FORMAT_R5G5B5A1_UNORM_PACK16,
98
VK_FORMAT_B5G5R5A1_UNORM_PACK16,
99
VK_FORMAT_A1R5G5B5_UNORM_PACK16,
100
VK_FORMAT_R8_UNORM,
101
VK_FORMAT_R8_SNORM,
102
VK_FORMAT_R8_USCALED,
103
VK_FORMAT_R8_SSCALED,
104
VK_FORMAT_R8_UINT,
105
VK_FORMAT_R8_SINT,
106
VK_FORMAT_R8_SRGB,
107
VK_FORMAT_R8G8_UNORM,
108
VK_FORMAT_R8G8_SNORM,
109
VK_FORMAT_R8G8_USCALED,
110
VK_FORMAT_R8G8_SSCALED,
111
VK_FORMAT_R8G8_UINT,
112
VK_FORMAT_R8G8_SINT,
113
VK_FORMAT_R8G8_SRGB,
114
VK_FORMAT_R8G8B8_UNORM,
115
VK_FORMAT_R8G8B8_SNORM,
116
VK_FORMAT_R8G8B8_USCALED,
117
VK_FORMAT_R8G8B8_SSCALED,
118
VK_FORMAT_R8G8B8_UINT,
119
VK_FORMAT_R8G8B8_SINT,
120
VK_FORMAT_R8G8B8_SRGB,
121
VK_FORMAT_B8G8R8_UNORM,
122
VK_FORMAT_B8G8R8_SNORM,
123
VK_FORMAT_B8G8R8_USCALED,
124
VK_FORMAT_B8G8R8_SSCALED,
125
VK_FORMAT_B8G8R8_UINT,
126
VK_FORMAT_B8G8R8_SINT,
127
VK_FORMAT_B8G8R8_SRGB,
128
VK_FORMAT_R8G8B8A8_UNORM,
129
VK_FORMAT_R8G8B8A8_SNORM,
130
VK_FORMAT_R8G8B8A8_USCALED,
131
VK_FORMAT_R8G8B8A8_SSCALED,
132
VK_FORMAT_R8G8B8A8_UINT,
133
VK_FORMAT_R8G8B8A8_SINT,
134
VK_FORMAT_R8G8B8A8_SRGB,
135
VK_FORMAT_B8G8R8A8_UNORM,
136
VK_FORMAT_B8G8R8A8_SNORM,
137
VK_FORMAT_B8G8R8A8_USCALED,
138
VK_FORMAT_B8G8R8A8_SSCALED,
139
VK_FORMAT_B8G8R8A8_UINT,
140
VK_FORMAT_B8G8R8A8_SINT,
141
VK_FORMAT_B8G8R8A8_SRGB,
142
VK_FORMAT_A8B8G8R8_UNORM_PACK32,
143
VK_FORMAT_A8B8G8R8_SNORM_PACK32,
144
VK_FORMAT_A8B8G8R8_USCALED_PACK32,
145
VK_FORMAT_A8B8G8R8_SSCALED_PACK32,
146
VK_FORMAT_A8B8G8R8_UINT_PACK32,
147
VK_FORMAT_A8B8G8R8_SINT_PACK32,
148
VK_FORMAT_A8B8G8R8_SRGB_PACK32,
149
VK_FORMAT_A2R10G10B10_UNORM_PACK32,
150
VK_FORMAT_A2R10G10B10_SNORM_PACK32,
151
VK_FORMAT_A2R10G10B10_USCALED_PACK32,
152
VK_FORMAT_A2R10G10B10_SSCALED_PACK32,
153
VK_FORMAT_A2R10G10B10_UINT_PACK32,
154
VK_FORMAT_A2R10G10B10_SINT_PACK32,
155
VK_FORMAT_A2B10G10R10_UNORM_PACK32,
156
VK_FORMAT_A2B10G10R10_SNORM_PACK32,
157
VK_FORMAT_A2B10G10R10_USCALED_PACK32,
158
VK_FORMAT_A2B10G10R10_SSCALED_PACK32,
159
VK_FORMAT_A2B10G10R10_UINT_PACK32,
160
VK_FORMAT_A2B10G10R10_SINT_PACK32,
161
VK_FORMAT_R16_UNORM,
162
VK_FORMAT_R16_SNORM,
163
VK_FORMAT_R16_USCALED,
164
VK_FORMAT_R16_SSCALED,
165
VK_FORMAT_R16_UINT,
166
VK_FORMAT_R16_SINT,
167
VK_FORMAT_R16_SFLOAT,
168
VK_FORMAT_R16G16_UNORM,
169
VK_FORMAT_R16G16_SNORM,
170
VK_FORMAT_R16G16_USCALED,
171
VK_FORMAT_R16G16_SSCALED,
172
VK_FORMAT_R16G16_UINT,
173
VK_FORMAT_R16G16_SINT,
174
VK_FORMAT_R16G16_SFLOAT,
175
VK_FORMAT_R16G16B16_UNORM,
176
VK_FORMAT_R16G16B16_SNORM,
177
VK_FORMAT_R16G16B16_USCALED,
178
VK_FORMAT_R16G16B16_SSCALED,
179
VK_FORMAT_R16G16B16_UINT,
180
VK_FORMAT_R16G16B16_SINT,
181
VK_FORMAT_R16G16B16_SFLOAT,
182
VK_FORMAT_R16G16B16A16_UNORM,
183
VK_FORMAT_R16G16B16A16_SNORM,
184
VK_FORMAT_R16G16B16A16_USCALED,
185
VK_FORMAT_R16G16B16A16_SSCALED,
186
VK_FORMAT_R16G16B16A16_UINT,
187
VK_FORMAT_R16G16B16A16_SINT,
188
VK_FORMAT_R16G16B16A16_SFLOAT,
189
VK_FORMAT_R32_UINT,
190
VK_FORMAT_R32_SINT,
191
VK_FORMAT_R32_SFLOAT,
192
VK_FORMAT_R32G32_UINT,
193
VK_FORMAT_R32G32_SINT,
194
VK_FORMAT_R32G32_SFLOAT,
195
VK_FORMAT_R32G32B32_UINT,
196
VK_FORMAT_R32G32B32_SINT,
197
VK_FORMAT_R32G32B32_SFLOAT,
198
VK_FORMAT_R32G32B32A32_UINT,
199
VK_FORMAT_R32G32B32A32_SINT,
200
VK_FORMAT_R32G32B32A32_SFLOAT,
201
VK_FORMAT_R64_UINT,
202
VK_FORMAT_R64_SINT,
203
VK_FORMAT_R64_SFLOAT,
204
VK_FORMAT_R64G64_UINT,
205
VK_FORMAT_R64G64_SINT,
206
VK_FORMAT_R64G64_SFLOAT,
207
VK_FORMAT_R64G64B64_UINT,
208
VK_FORMAT_R64G64B64_SINT,
209
VK_FORMAT_R64G64B64_SFLOAT,
210
VK_FORMAT_R64G64B64A64_UINT,
211
VK_FORMAT_R64G64B64A64_SINT,
212
VK_FORMAT_R64G64B64A64_SFLOAT,
213
VK_FORMAT_B10G11R11_UFLOAT_PACK32,
214
VK_FORMAT_E5B9G9R9_UFLOAT_PACK32,
215
VK_FORMAT_D16_UNORM,
216
VK_FORMAT_X8_D24_UNORM_PACK32,
217
VK_FORMAT_D32_SFLOAT,
218
VK_FORMAT_S8_UINT,
219
VK_FORMAT_D16_UNORM_S8_UINT,
220
VK_FORMAT_D24_UNORM_S8_UINT,
221
VK_FORMAT_D32_SFLOAT_S8_UINT,
222
VK_FORMAT_BC1_RGB_UNORM_BLOCK,
223
VK_FORMAT_BC1_RGB_SRGB_BLOCK,
224
VK_FORMAT_BC1_RGBA_UNORM_BLOCK,
225
VK_FORMAT_BC1_RGBA_SRGB_BLOCK,
226
VK_FORMAT_BC2_UNORM_BLOCK,
227
VK_FORMAT_BC2_SRGB_BLOCK,
228
VK_FORMAT_BC3_UNORM_BLOCK,
229
VK_FORMAT_BC3_SRGB_BLOCK,
230
VK_FORMAT_BC4_UNORM_BLOCK,
231
VK_FORMAT_BC4_SNORM_BLOCK,
232
VK_FORMAT_BC5_UNORM_BLOCK,
233
VK_FORMAT_BC5_SNORM_BLOCK,
234
VK_FORMAT_BC6H_UFLOAT_BLOCK,
235
VK_FORMAT_BC6H_SFLOAT_BLOCK,
236
VK_FORMAT_BC7_UNORM_BLOCK,
237
VK_FORMAT_BC7_SRGB_BLOCK,
238
VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK,
239
VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK,
240
VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK,
241
VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK,
242
VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK,
243
VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK,
244
VK_FORMAT_EAC_R11_UNORM_BLOCK,
245
VK_FORMAT_EAC_R11_SNORM_BLOCK,
246
VK_FORMAT_EAC_R11G11_UNORM_BLOCK,
247
VK_FORMAT_EAC_R11G11_SNORM_BLOCK,
248
VK_FORMAT_ASTC_4x4_UNORM_BLOCK,
249
VK_FORMAT_ASTC_4x4_SRGB_BLOCK,
250
VK_FORMAT_ASTC_5x4_UNORM_BLOCK,
251
VK_FORMAT_ASTC_5x4_SRGB_BLOCK,
252
VK_FORMAT_ASTC_5x5_UNORM_BLOCK,
253
VK_FORMAT_ASTC_5x5_SRGB_BLOCK,
254
VK_FORMAT_ASTC_6x5_UNORM_BLOCK,
255
VK_FORMAT_ASTC_6x5_SRGB_BLOCK,
256
VK_FORMAT_ASTC_6x6_UNORM_BLOCK,
257
VK_FORMAT_ASTC_6x6_SRGB_BLOCK,
258
VK_FORMAT_ASTC_8x5_UNORM_BLOCK,
259
VK_FORMAT_ASTC_8x5_SRGB_BLOCK,
260
VK_FORMAT_ASTC_8x6_UNORM_BLOCK,
261
VK_FORMAT_ASTC_8x6_SRGB_BLOCK,
262
VK_FORMAT_ASTC_8x8_UNORM_BLOCK,
263
VK_FORMAT_ASTC_8x8_SRGB_BLOCK,
264
VK_FORMAT_ASTC_10x5_UNORM_BLOCK,
265
VK_FORMAT_ASTC_10x5_SRGB_BLOCK,
266
VK_FORMAT_ASTC_10x6_UNORM_BLOCK,
267
VK_FORMAT_ASTC_10x6_SRGB_BLOCK,
268
VK_FORMAT_ASTC_10x8_UNORM_BLOCK,
269
VK_FORMAT_ASTC_10x8_SRGB_BLOCK,
270
VK_FORMAT_ASTC_10x10_UNORM_BLOCK,
271
VK_FORMAT_ASTC_10x10_SRGB_BLOCK,
272
VK_FORMAT_ASTC_12x10_UNORM_BLOCK,
273
VK_FORMAT_ASTC_12x10_SRGB_BLOCK,
274
VK_FORMAT_ASTC_12x12_UNORM_BLOCK,
275
VK_FORMAT_ASTC_12x12_SRGB_BLOCK,
276
VK_FORMAT_G8B8G8R8_422_UNORM,
277
VK_FORMAT_B8G8R8G8_422_UNORM,
278
VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM,
279
VK_FORMAT_G8_B8R8_2PLANE_420_UNORM,
280
VK_FORMAT_G8_B8_R8_3PLANE_422_UNORM,
281
VK_FORMAT_G8_B8R8_2PLANE_422_UNORM,
282
VK_FORMAT_G8_B8_R8_3PLANE_444_UNORM,
283
VK_FORMAT_R10X6_UNORM_PACK16,
284
VK_FORMAT_R10X6G10X6_UNORM_2PACK16,
285
VK_FORMAT_R10X6G10X6B10X6A10X6_UNORM_4PACK16,
286
VK_FORMAT_G10X6B10X6G10X6R10X6_422_UNORM_4PACK16,
287
VK_FORMAT_B10X6G10X6R10X6G10X6_422_UNORM_4PACK16,
288
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_420_UNORM_3PACK16,
289
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_420_UNORM_3PACK16,
290
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_422_UNORM_3PACK16,
291
VK_FORMAT_G10X6_B10X6R10X6_2PLANE_422_UNORM_3PACK16,
292
VK_FORMAT_G10X6_B10X6_R10X6_3PLANE_444_UNORM_3PACK16,
293
VK_FORMAT_R12X4_UNORM_PACK16,
294
VK_FORMAT_R12X4G12X4_UNORM_2PACK16,
295
VK_FORMAT_R12X4G12X4B12X4A12X4_UNORM_4PACK16,
296
VK_FORMAT_G12X4B12X4G12X4R12X4_422_UNORM_4PACK16,
297
VK_FORMAT_B12X4G12X4R12X4G12X4_422_UNORM_4PACK16,
298
VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_420_UNORM_3PACK16,
299
VK_FORMAT_G12X4_B12X4R12X4_2PLANE_420_UNORM_3PACK16,
300
VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_422_UNORM_3PACK16,
301
VK_FORMAT_G12X4_B12X4R12X4_2PLANE_422_UNORM_3PACK16,
302
VK_FORMAT_G12X4_B12X4_R12X4_3PLANE_444_UNORM_3PACK16,
303
VK_FORMAT_G16B16G16R16_422_UNORM,
304
VK_FORMAT_B16G16R16G16_422_UNORM,
305
VK_FORMAT_G16_B16_R16_3PLANE_420_UNORM,
306
VK_FORMAT_G16_B16R16_2PLANE_420_UNORM,
307
VK_FORMAT_G16_B16_R16_3PLANE_422_UNORM,
308
VK_FORMAT_G16_B16R16_2PLANE_422_UNORM,
309
VK_FORMAT_G16_B16_R16_3PLANE_444_UNORM,
310
VK_FORMAT_ASTC_4x4_SFLOAT_BLOCK,
311
VK_FORMAT_ASTC_5x4_SFLOAT_BLOCK,
312
VK_FORMAT_ASTC_5x5_SFLOAT_BLOCK,
313
VK_FORMAT_ASTC_6x5_SFLOAT_BLOCK,
314
VK_FORMAT_ASTC_6x6_SFLOAT_BLOCK,
315
VK_FORMAT_ASTC_8x5_SFLOAT_BLOCK,
316
VK_FORMAT_ASTC_8x6_SFLOAT_BLOCK,
317
VK_FORMAT_ASTC_8x8_SFLOAT_BLOCK,
318
VK_FORMAT_ASTC_10x5_SFLOAT_BLOCK,
319
VK_FORMAT_ASTC_10x6_SFLOAT_BLOCK,
320
VK_FORMAT_ASTC_10x8_SFLOAT_BLOCK,
321
VK_FORMAT_ASTC_10x10_SFLOAT_BLOCK,
322
VK_FORMAT_ASTC_12x10_SFLOAT_BLOCK,
323
VK_FORMAT_ASTC_12x12_SFLOAT_BLOCK,
324
};
325
326
static VkImageLayout RD_TO_VK_LAYOUT[RDD::TEXTURE_LAYOUT_MAX] = {
327
VK_IMAGE_LAYOUT_UNDEFINED, // TEXTURE_LAYOUT_UNDEFINED
328
VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_GENERAL
329
VK_IMAGE_LAYOUT_GENERAL, // TEXTURE_LAYOUT_STORAGE_OPTIMAL
330
VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
331
VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL
332
VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL, // TEXTURE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL
333
VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL, // TEXTURE_LAYOUT_SHADER_READ_ONLY_OPTIMAL
334
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_COPY_SRC_OPTIMAL
335
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_COPY_DST_OPTIMAL
336
VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_SRC_OPTIMAL
337
VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, // TEXTURE_LAYOUT_RESOLVE_DST_OPTIMAL
338
VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR, // TEXTURE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL
339
VK_IMAGE_LAYOUT_FRAGMENT_DENSITY_MAP_OPTIMAL_EXT, // TEXTURE_LAYOUT_FRAGMENT_DENSITY_MAP_ATTACHMENT_OPTIMAL
340
};
341
342
static VkPipelineStageFlags _rd_to_vk_pipeline_stages(BitField<RDD::PipelineStageBits> p_stages) {
343
VkPipelineStageFlags vk_flags = 0;
344
if (p_stages.has_flag(RDD::PIPELINE_STAGE_COPY_BIT) || p_stages.has_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT)) {
345
// Transfer has been split into copy and resolve bits. Clear them and merge them into one bit.
346
vk_flags |= VK_PIPELINE_STAGE_TRANSFER_BIT;
347
p_stages.clear_flag(RDD::PIPELINE_STAGE_COPY_BIT);
348
p_stages.clear_flag(RDD::PIPELINE_STAGE_RESOLVE_BIT);
349
}
350
351
if (p_stages.has_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT)) {
352
// Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified.
353
// Therefore, storage is never cleared with an explicit command.
354
p_stages.clear_flag(RDD::PIPELINE_STAGE_CLEAR_STORAGE_BIT);
355
}
356
357
// The rest of the flags have compatible numeric values with Vulkan.
358
return VkPipelineStageFlags(p_stages) | vk_flags;
359
}
360
361
static VkAccessFlags _rd_to_vk_access_flags(BitField<RDD::BarrierAccessBits> p_access) {
362
VkAccessFlags vk_flags = 0;
363
if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT)) {
364
vk_flags |= VK_ACCESS_TRANSFER_READ_BIT;
365
p_access.clear_flag(RDD::BARRIER_ACCESS_COPY_READ_BIT);
366
p_access.clear_flag(RDD::BARRIER_ACCESS_RESOLVE_READ_BIT);
367
}
368
369
if (p_access.has_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT) || p_access.has_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT)) {
370
vk_flags |= VK_ACCESS_TRANSFER_WRITE_BIT;
371
p_access.clear_flag(RDD::BARRIER_ACCESS_COPY_WRITE_BIT);
372
p_access.clear_flag(RDD::BARRIER_ACCESS_RESOLVE_WRITE_BIT);
373
}
374
375
if (p_access.has_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT)) {
376
// Vulkan should never use this as API_TRAIT_CLEAR_RESOURCES_WITH_VIEWS is not specified.
377
// Therefore, storage is never cleared with an explicit command.
378
p_access.clear_flag(RDD::BARRIER_ACCESS_STORAGE_CLEAR_BIT);
379
}
380
381
// The rest of the flags have compatible numeric values with Vulkan.
382
return VkAccessFlags(p_access) | vk_flags;
383
}
384
385
// RDD::CompareOperator == VkCompareOp.
386
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NEVER, VK_COMPARE_OP_NEVER));
387
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS, VK_COMPARE_OP_LESS));
388
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_EQUAL, VK_COMPARE_OP_EQUAL));
389
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_LESS_OR_EQUAL, VK_COMPARE_OP_LESS_OR_EQUAL));
390
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER, VK_COMPARE_OP_GREATER));
391
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_NOT_EQUAL, VK_COMPARE_OP_NOT_EQUAL));
392
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_GREATER_OR_EQUAL, VK_COMPARE_OP_GREATER_OR_EQUAL));
393
static_assert(ENUM_MEMBERS_EQUAL(RDD::COMPARE_OP_ALWAYS, VK_COMPARE_OP_ALWAYS));
394
395
static_assert(ARRAYS_COMPATIBLE_FIELDWISE(Rect2i, VkRect2D));
396
397
uint32_t RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_stages_flags_rd() const {
398
uint32_t flags = 0;
399
400
if (supported_stages & VK_SHADER_STAGE_VERTEX_BIT) {
401
flags += SHADER_STAGE_VERTEX_BIT;
402
}
403
if (supported_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
404
flags += SHADER_STAGE_TESSELATION_CONTROL_BIT;
405
}
406
if (supported_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
407
flags += SHADER_STAGE_TESSELATION_EVALUATION_BIT;
408
}
409
if (supported_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
410
// FIXME: Add shader stage geometry bit.
411
}
412
if (supported_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
413
flags += SHADER_STAGE_FRAGMENT_BIT;
414
}
415
if (supported_stages & VK_SHADER_STAGE_COMPUTE_BIT) {
416
flags += SHADER_STAGE_COMPUTE_BIT;
417
}
418
if (supported_stages & VK_SHADER_STAGE_RAYGEN_BIT_KHR) {
419
flags += SHADER_STAGE_RAYGEN_BIT;
420
}
421
if (supported_stages & VK_SHADER_STAGE_ANY_HIT_BIT_KHR) {
422
flags += SHADER_STAGE_ANY_HIT_BIT;
423
}
424
if (supported_stages & VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR) {
425
flags += SHADER_STAGE_CLOSEST_HIT_BIT;
426
}
427
if (supported_stages & VK_SHADER_STAGE_MISS_BIT_KHR) {
428
flags += SHADER_STAGE_MISS_BIT;
429
}
430
if (supported_stages & VK_SHADER_STAGE_INTERSECTION_BIT_KHR) {
431
flags += SHADER_STAGE_INTERSECTION_BIT;
432
}
433
434
return flags;
435
}
436
437
String RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_stages_desc() const {
438
String res;
439
440
if (supported_stages & VK_SHADER_STAGE_VERTEX_BIT) {
441
res += ", STAGE_VERTEX";
442
}
443
if (supported_stages & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) {
444
res += ", STAGE_TESSELLATION_CONTROL";
445
}
446
if (supported_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) {
447
res += ", STAGE_TESSELLATION_EVALUATION";
448
}
449
if (supported_stages & VK_SHADER_STAGE_GEOMETRY_BIT) {
450
res += ", STAGE_GEOMETRY";
451
}
452
if (supported_stages & VK_SHADER_STAGE_FRAGMENT_BIT) {
453
res += ", STAGE_FRAGMENT";
454
}
455
if (supported_stages & VK_SHADER_STAGE_COMPUTE_BIT) {
456
res += ", STAGE_COMPUTE";
457
}
458
459
// These are not defined on Android GRMBL.
460
if (supported_stages & 0x00000100 /* VK_SHADER_STAGE_RAYGEN_BIT_KHR */) {
461
res += ", STAGE_RAYGEN_KHR";
462
}
463
if (supported_stages & 0x00000200 /* VK_SHADER_STAGE_ANY_HIT_BIT_KHR */) {
464
res += ", STAGE_ANY_HIT_KHR";
465
}
466
if (supported_stages & 0x00000400 /* VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR */) {
467
res += ", STAGE_CLOSEST_HIT_KHR";
468
}
469
if (supported_stages & 0x00000800 /* VK_SHADER_STAGE_MISS_BIT_KHR */) {
470
res += ", STAGE_MISS_KHR";
471
}
472
if (supported_stages & 0x00001000 /* VK_SHADER_STAGE_INTERSECTION_BIT_KHR */) {
473
res += ", STAGE_INTERSECTION_KHR";
474
}
475
if (supported_stages & 0x00002000 /* VK_SHADER_STAGE_CALLABLE_BIT_KHR */) {
476
res += ", STAGE_CALLABLE_KHR";
477
}
478
if (supported_stages & 0x00000040 /* VK_SHADER_STAGE_TASK_BIT_NV */) {
479
res += ", STAGE_TASK_NV";
480
}
481
if (supported_stages & 0x00000080 /* VK_SHADER_STAGE_MESH_BIT_NV */) {
482
res += ", STAGE_MESH_NV";
483
}
484
485
return res.substr(2); // Remove first ", ".
486
}
487
488
uint32_t RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_operations_flags_rd() const {
489
uint32_t flags = 0;
490
491
if (supported_operations & VK_SUBGROUP_FEATURE_BASIC_BIT) {
492
flags += SUBGROUP_BASIC_BIT;
493
}
494
if (supported_operations & VK_SUBGROUP_FEATURE_VOTE_BIT) {
495
flags += SUBGROUP_VOTE_BIT;
496
}
497
if (supported_operations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) {
498
flags += SUBGROUP_ARITHMETIC_BIT;
499
}
500
if (supported_operations & VK_SUBGROUP_FEATURE_BALLOT_BIT) {
501
flags += SUBGROUP_BALLOT_BIT;
502
}
503
if (supported_operations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) {
504
flags += SUBGROUP_SHUFFLE_BIT;
505
}
506
if (supported_operations & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) {
507
flags += SUBGROUP_SHUFFLE_RELATIVE_BIT;
508
}
509
if (supported_operations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) {
510
flags += SUBGROUP_CLUSTERED_BIT;
511
}
512
if (supported_operations & VK_SUBGROUP_FEATURE_QUAD_BIT) {
513
flags += SUBGROUP_QUAD_BIT;
514
}
515
516
return flags;
517
}
518
519
String RenderingDeviceDriverVulkan::SubgroupCapabilities::supported_operations_desc() const {
520
String res;
521
522
if (supported_operations & VK_SUBGROUP_FEATURE_BASIC_BIT) {
523
res += ", FEATURE_BASIC";
524
}
525
if (supported_operations & VK_SUBGROUP_FEATURE_VOTE_BIT) {
526
res += ", FEATURE_VOTE";
527
}
528
if (supported_operations & VK_SUBGROUP_FEATURE_ARITHMETIC_BIT) {
529
res += ", FEATURE_ARITHMETIC";
530
}
531
if (supported_operations & VK_SUBGROUP_FEATURE_BALLOT_BIT) {
532
res += ", FEATURE_BALLOT";
533
}
534
if (supported_operations & VK_SUBGROUP_FEATURE_SHUFFLE_BIT) {
535
res += ", FEATURE_SHUFFLE";
536
}
537
if (supported_operations & VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT) {
538
res += ", FEATURE_SHUFFLE_RELATIVE";
539
}
540
if (supported_operations & VK_SUBGROUP_FEATURE_CLUSTERED_BIT) {
541
res += ", FEATURE_CLUSTERED";
542
}
543
if (supported_operations & VK_SUBGROUP_FEATURE_QUAD_BIT) {
544
res += ", FEATURE_QUAD";
545
}
546
if (supported_operations & VK_SUBGROUP_FEATURE_PARTITIONED_BIT_NV) {
547
res += ", FEATURE_PARTITIONED_NV";
548
}
549
550
return res.substr(2); // Remove first ", ".
551
}
552
553
/*****************/
554
/**** GENERIC ****/
555
/*****************/
556
557
void RenderingDeviceDriverVulkan::_register_requested_device_extension(const CharString &p_extension_name, bool p_required) {
558
ERR_FAIL_COND(requested_device_extensions.has(p_extension_name));
559
requested_device_extensions[p_extension_name] = p_required;
560
}
561
562
Error RenderingDeviceDriverVulkan::_initialize_device_extensions() {
563
enabled_device_extension_names.clear();
564
565
_register_requested_device_extension(VK_KHR_SWAPCHAIN_EXTENSION_NAME, true);
566
_register_requested_device_extension(VK_KHR_MULTIVIEW_EXTENSION_NAME, false);
567
_register_requested_device_extension(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME, false);
568
_register_requested_device_extension(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME, false);
569
_register_requested_device_extension(VK_QCOM_FRAGMENT_DENSITY_MAP_OFFSET_EXTENSION_NAME, false);
570
_register_requested_device_extension(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME, false);
571
_register_requested_device_extension(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME, false);
572
_register_requested_device_extension(VK_KHR_STORAGE_BUFFER_STORAGE_CLASS_EXTENSION_NAME, false);
573
_register_requested_device_extension(VK_KHR_16BIT_STORAGE_EXTENSION_NAME, false);
574
_register_requested_device_extension(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME, false);
575
_register_requested_device_extension(VK_KHR_MAINTENANCE_2_EXTENSION_NAME, false);
576
_register_requested_device_extension(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME, false);
577
_register_requested_device_extension(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME, false);
578
_register_requested_device_extension(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME, false);
579
_register_requested_device_extension(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME, false);
580
_register_requested_device_extension(VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME, false);
581
_register_requested_device_extension(VK_EXT_TEXTURE_COMPRESSION_ASTC_HDR_EXTENSION_NAME, false);
582
_register_requested_device_extension(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME, false);
583
_register_requested_device_extension(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME, false);
584
_register_requested_device_extension(VK_KHR_DEFERRED_HOST_OPERATIONS_EXTENSION_NAME, false);
585
_register_requested_device_extension(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME, false);
586
_register_requested_device_extension(VK_NV_RAY_TRACING_VALIDATION_EXTENSION_NAME, false);
587
_register_requested_device_extension(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME, false);
588
589
// We don't actually use this extension, but some runtime components on some platforms
590
// can and will fill the validation layers with useless info otherwise if not enabled.
591
_register_requested_device_extension(VK_KHR_EXTERNAL_MEMORY_FD_EXTENSION_NAME, false);
592
593
if (Engine::get_singleton()->is_generate_spirv_debug_info_enabled()) {
594
_register_requested_device_extension(VK_KHR_SHADER_NON_SEMANTIC_INFO_EXTENSION_NAME, true);
595
}
596
597
#if defined(VK_TRACK_DEVICE_MEMORY)
598
if (Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
599
_register_requested_device_extension(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME, false);
600
}
601
#endif
602
_register_requested_device_extension(VK_EXT_DEVICE_FAULT_EXTENSION_NAME, false);
603
604
{
605
// Debug marker extensions.
606
// Should be last element in the array.
607
#ifdef DEV_ENABLED
608
bool want_debug_markers = true;
609
#else
610
bool want_debug_markers = OS::get_singleton()->is_stdout_verbose();
611
#endif
612
if (want_debug_markers) {
613
_register_requested_device_extension(VK_EXT_DEBUG_MARKER_EXTENSION_NAME, false);
614
}
615
}
616
617
uint32_t device_extension_count = 0;
618
VkResult err = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &device_extension_count, nullptr);
619
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
620
ERR_FAIL_COND_V_MSG(device_extension_count == 0, ERR_CANT_CREATE, "vkEnumerateDeviceExtensionProperties failed to find any extensions\n\nDo you have a compatible Vulkan installable client driver (ICD) installed?");
621
622
TightLocalVector<VkExtensionProperties> device_extensions;
623
device_extensions.resize(device_extension_count);
624
err = vkEnumerateDeviceExtensionProperties(physical_device, nullptr, &device_extension_count, device_extensions.ptr());
625
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
626
627
#if defined(SWAPPY_FRAME_PACING_ENABLED)
628
if (swappy_frame_pacer_enable) {
629
char **swappy_required_extensions;
630
uint32_t swappy_required_extensions_count = 0;
631
// Determine number of extensions required by Swappy frame pacer.
632
SwappyVk_determineDeviceExtensions(physical_device, device_extension_count, device_extensions.ptr(), &swappy_required_extensions_count, nullptr);
633
634
if (swappy_required_extensions_count < device_extension_count) {
635
// Determine the actual extensions.
636
swappy_required_extensions = (char **)malloc(swappy_required_extensions_count * sizeof(char *));
637
char *pRequiredExtensionsData = (char *)malloc(swappy_required_extensions_count * (VK_MAX_EXTENSION_NAME_SIZE + 1));
638
for (uint32_t i = 0; i < swappy_required_extensions_count; i++) {
639
swappy_required_extensions[i] = &pRequiredExtensionsData[i * (VK_MAX_EXTENSION_NAME_SIZE + 1)];
640
}
641
SwappyVk_determineDeviceExtensions(physical_device, device_extension_count,
642
device_extensions.ptr(), &swappy_required_extensions_count, swappy_required_extensions);
643
644
// Enable extensions requested by Swappy.
645
for (uint32_t i = 0; i < swappy_required_extensions_count; i++) {
646
CharString extension_name(swappy_required_extensions[i]);
647
if (requested_device_extensions.has(extension_name)) {
648
enabled_device_extension_names.insert(extension_name);
649
}
650
}
651
652
free(pRequiredExtensionsData);
653
free(swappy_required_extensions);
654
}
655
}
656
#endif
657
658
#ifdef DEV_ENABLED
659
for (uint32_t i = 0; i < device_extension_count; i++) {
660
print_verbose(String("VULKAN: Found device extension ") + String::utf8(device_extensions[i].extensionName));
661
}
662
#endif
663
664
// Enable all extensions that are supported and requested.
665
for (uint32_t i = 0; i < device_extension_count; i++) {
666
CharString extension_name(device_extensions[i].extensionName);
667
if (requested_device_extensions.has(extension_name)) {
668
enabled_device_extension_names.insert(extension_name);
669
}
670
}
671
672
// Now check our requested extensions.
673
for (KeyValue<CharString, bool> &requested_extension : requested_device_extensions) {
674
if (!enabled_device_extension_names.has(requested_extension.key)) {
675
if (requested_extension.value) {
676
ERR_FAIL_V_MSG(ERR_BUG, String("Required extension ") + String::utf8(requested_extension.key) + String(" not found."));
677
} else {
678
print_verbose(String("Optional extension ") + String::utf8(requested_extension.key) + String(" not found"));
679
}
680
}
681
}
682
683
return OK;
684
}
685
686
Error RenderingDeviceDriverVulkan::_check_device_features() {
687
vkGetPhysicalDeviceFeatures(physical_device, &physical_device_features);
688
689
// Check for required features.
690
if (!physical_device_features.imageCubeArray || !physical_device_features.independentBlend) {
691
String error_string = vformat("Your GPU (%s) does not support the following features which are required to use Vulkan-based renderers in Godot:\n\n", context_device.name);
692
if (!physical_device_features.imageCubeArray) {
693
error_string += "- No support for image cube arrays.\n";
694
}
695
if (!physical_device_features.independentBlend) {
696
error_string += "- No support for independentBlend.\n";
697
}
698
error_string += "\nThis is usually a hardware limitation, so updating graphics drivers won't help in most cases.";
699
700
#if defined(ANDROID_ENABLED) || defined(IOS_ENABLED)
701
// Android/iOS platform ports currently don't exit themselves when this method returns `ERR_CANT_CREATE`.
702
OS::get_singleton()->alert(error_string + "\nClick OK to exit (black screen will be visible).");
703
#else
704
OS::get_singleton()->alert(error_string + "\nClick OK to exit.");
705
#endif
706
707
return ERR_CANT_CREATE;
708
}
709
710
// Opt-in to the features we actually need/use. These can be changed in the future.
711
// We do this for multiple reasons:
712
//
713
// 1. Certain features (like sparse* stuff) cause unnecessary internal driver allocations.
714
// 2. Others like shaderStorageImageMultisample are a huge red flag
715
// (MSAA + Storage is rarely needed).
716
// 3. Most features when turned off aren't actually off (we just promise the driver not to use them)
717
// and it is validation what will complain. This allows us to target a minimum baseline.
718
//
719
// TODO: Allow the user to override these settings (i.e. turn off more stuff) using profiles
720
// so they can target a broad range of HW. For example Mali HW does not have
721
// shaderClipDistance/shaderCullDistance; thus validation would complain if such feature is used;
722
// allowing them to fix the problem without even owning Mali HW to test on.
723
//
724
// The excluded features are:
725
// - robustBufferAccess (can hamper performance on some hardware)
726
// - occlusionQueryPrecise
727
// - pipelineStatisticsQuery
728
// - shaderStorageImageMultisample (unsupported by Intel Arc, prevents from using MSAA storage accidentally)
729
// - shaderResourceResidency
730
// - sparseBinding (we don't use sparse features and enabling them cause extra internal allocations inside the Vulkan driver we don't need)
731
// - sparseResidencyBuffer
732
// - sparseResidencyImage2D
733
// - sparseResidencyImage3D
734
// - sparseResidency2Samples
735
// - sparseResidency4Samples
736
// - sparseResidency8Samples
737
// - sparseResidency16Samples
738
// - sparseResidencyAliased
739
// - inheritedQueries
740
741
#define VK_DEVICEFEATURE_ENABLE_IF(x) \
742
if (physical_device_features.x) { \
743
requested_device_features.x = physical_device_features.x; \
744
} else \
745
((void)0)
746
747
requested_device_features = {};
748
VK_DEVICEFEATURE_ENABLE_IF(fullDrawIndexUint32);
749
VK_DEVICEFEATURE_ENABLE_IF(imageCubeArray);
750
VK_DEVICEFEATURE_ENABLE_IF(independentBlend);
751
VK_DEVICEFEATURE_ENABLE_IF(geometryShader);
752
VK_DEVICEFEATURE_ENABLE_IF(tessellationShader);
753
VK_DEVICEFEATURE_ENABLE_IF(sampleRateShading);
754
VK_DEVICEFEATURE_ENABLE_IF(dualSrcBlend);
755
VK_DEVICEFEATURE_ENABLE_IF(logicOp);
756
VK_DEVICEFEATURE_ENABLE_IF(multiDrawIndirect);
757
VK_DEVICEFEATURE_ENABLE_IF(drawIndirectFirstInstance);
758
VK_DEVICEFEATURE_ENABLE_IF(depthClamp);
759
VK_DEVICEFEATURE_ENABLE_IF(depthBiasClamp);
760
VK_DEVICEFEATURE_ENABLE_IF(fillModeNonSolid);
761
VK_DEVICEFEATURE_ENABLE_IF(depthBounds);
762
VK_DEVICEFEATURE_ENABLE_IF(wideLines);
763
VK_DEVICEFEATURE_ENABLE_IF(largePoints);
764
VK_DEVICEFEATURE_ENABLE_IF(alphaToOne);
765
VK_DEVICEFEATURE_ENABLE_IF(multiViewport);
766
VK_DEVICEFEATURE_ENABLE_IF(samplerAnisotropy);
767
VK_DEVICEFEATURE_ENABLE_IF(textureCompressionETC2);
768
VK_DEVICEFEATURE_ENABLE_IF(textureCompressionASTC_LDR);
769
VK_DEVICEFEATURE_ENABLE_IF(textureCompressionBC);
770
VK_DEVICEFEATURE_ENABLE_IF(vertexPipelineStoresAndAtomics);
771
VK_DEVICEFEATURE_ENABLE_IF(fragmentStoresAndAtomics);
772
VK_DEVICEFEATURE_ENABLE_IF(shaderTessellationAndGeometryPointSize);
773
VK_DEVICEFEATURE_ENABLE_IF(shaderImageGatherExtended);
774
VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageExtendedFormats);
775
VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageReadWithoutFormat);
776
VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageWriteWithoutFormat);
777
VK_DEVICEFEATURE_ENABLE_IF(shaderUniformBufferArrayDynamicIndexing);
778
VK_DEVICEFEATURE_ENABLE_IF(shaderSampledImageArrayDynamicIndexing);
779
VK_DEVICEFEATURE_ENABLE_IF(shaderStorageBufferArrayDynamicIndexing);
780
VK_DEVICEFEATURE_ENABLE_IF(shaderStorageImageArrayDynamicIndexing);
781
VK_DEVICEFEATURE_ENABLE_IF(shaderClipDistance);
782
VK_DEVICEFEATURE_ENABLE_IF(shaderCullDistance);
783
VK_DEVICEFEATURE_ENABLE_IF(shaderFloat64);
784
VK_DEVICEFEATURE_ENABLE_IF(shaderInt64);
785
VK_DEVICEFEATURE_ENABLE_IF(shaderInt16);
786
VK_DEVICEFEATURE_ENABLE_IF(shaderResourceMinLod);
787
VK_DEVICEFEATURE_ENABLE_IF(variableMultisampleRate);
788
789
return OK;
790
}
791
792
static uint32_t _align_up(uint32_t size, uint32_t alignment) {
793
return (size + (alignment - 1)) & ~(alignment - 1);
794
}
795
796
Error RenderingDeviceDriverVulkan::_check_device_capabilities() {
797
// Fill device family and version.
798
device_capabilities.device_family = DEVICE_VULKAN;
799
device_capabilities.version_major = VK_API_VERSION_MAJOR(physical_device_properties.apiVersion);
800
device_capabilities.version_minor = VK_API_VERSION_MINOR(physical_device_properties.apiVersion);
801
802
// Cache extension availability we query often.
803
framebuffer_depth_resolve = enabled_device_extension_names.has(VK_KHR_DEPTH_STENCIL_RESOLVE_EXTENSION_NAME);
804
805
bool use_fdm_offsets = false;
806
if (VulkanHooks::get_singleton() != nullptr) {
807
use_fdm_offsets = VulkanHooks::get_singleton()->use_fragment_density_offsets();
808
}
809
810
// References:
811
// https://www.khronos.org/registry/vulkan/specs/1.2-extensions/man/html/VK_KHR_multiview.html
812
// https://www.khronos.org/blog/vulkan-subgroup-tutorial
813
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
814
if (functions.GetPhysicalDeviceFeatures2 != nullptr) {
815
// We must check that the corresponding extension is present before assuming a feature as enabled.
816
// See also: https://github.com/godotengine/godot/issues/65409
817
818
void *next_features = nullptr;
819
VkPhysicalDeviceVulkan12Features device_features_vk_1_2 = {};
820
VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = {};
821
VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_device_address_features = {};
822
VkPhysicalDeviceVulkanMemoryModelFeaturesKHR vulkan_memory_model_features = {};
823
VkPhysicalDeviceFragmentShadingRateFeaturesKHR fsr_features = {};
824
VkPhysicalDeviceFragmentDensityMapFeaturesEXT fdm_features = {};
825
VkPhysicalDeviceFragmentDensityMapOffsetFeaturesQCOM fdmo_features_qcom = {};
826
VkPhysicalDevice16BitStorageFeaturesKHR storage_feature = {};
827
VkPhysicalDeviceMultiviewFeatures multiview_features = {};
828
VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {};
829
VkPhysicalDeviceVulkanMemoryModelFeatures memory_model_features = {};
830
VkPhysicalDeviceAccelerationStructureFeaturesKHR acceleration_structure_features = {};
831
VkPhysicalDeviceRayTracingPipelineFeaturesKHR raytracing_pipeline_features = {};
832
VkPhysicalDeviceSynchronization2FeaturesKHR sync_2_features = {};
833
VkPhysicalDeviceRayTracingValidationFeaturesNV raytracing_validation_features = {};
834
835
const bool use_1_2_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_2;
836
if (use_1_2_features) {
837
device_features_vk_1_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES;
838
device_features_vk_1_2.pNext = next_features;
839
next_features = &device_features_vk_1_2;
840
} else {
841
if (enabled_device_extension_names.has(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) {
842
shader_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR;
843
shader_features.pNext = next_features;
844
next_features = &shader_features;
845
}
846
if (enabled_device_extension_names.has(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) {
847
buffer_device_address_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR;
848
buffer_device_address_features.pNext = next_features;
849
next_features = &buffer_device_address_features;
850
}
851
if (enabled_device_extension_names.has(VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME)) {
852
vulkan_memory_model_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR;
853
vulkan_memory_model_features.pNext = next_features;
854
next_features = &vulkan_memory_model_features;
855
}
856
}
857
858
if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) {
859
fsr_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR;
860
fsr_features.pNext = next_features;
861
next_features = &fsr_features;
862
}
863
864
if (enabled_device_extension_names.has(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME)) {
865
fdm_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT;
866
fdm_features.pNext = next_features;
867
next_features = &fdm_features;
868
}
869
870
if (use_fdm_offsets && enabled_device_extension_names.has(VK_QCOM_FRAGMENT_DENSITY_MAP_OFFSET_EXTENSION_NAME)) {
871
fdmo_features_qcom.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_OFFSET_FEATURES_QCOM;
872
fdmo_features_qcom.pNext = next_features;
873
next_features = &fdmo_features_qcom;
874
}
875
876
if (enabled_device_extension_names.has(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) {
877
storage_feature.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
878
storage_feature.pNext = next_features;
879
next_features = &storage_feature;
880
}
881
882
if (enabled_device_extension_names.has(VK_KHR_MULTIVIEW_EXTENSION_NAME)) {
883
multiview_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES;
884
multiview_features.pNext = next_features;
885
next_features = &multiview_features;
886
}
887
888
if (enabled_device_extension_names.has(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME)) {
889
pipeline_cache_control_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES;
890
pipeline_cache_control_features.pNext = next_features;
891
next_features = &pipeline_cache_control_features;
892
}
893
894
if (enabled_device_extension_names.has(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) {
895
memory_model_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES;
896
memory_model_features.pNext = next_features;
897
next_features = &memory_model_features;
898
}
899
900
if (enabled_device_extension_names.has(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) {
901
acceleration_structure_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR;
902
acceleration_structure_features.pNext = next_features;
903
next_features = &acceleration_structure_features;
904
}
905
906
if (enabled_device_extension_names.has(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) {
907
raytracing_pipeline_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR;
908
raytracing_pipeline_features.pNext = next_features;
909
next_features = &raytracing_pipeline_features;
910
}
911
912
if (enabled_device_extension_names.has(VK_NV_RAY_TRACING_VALIDATION_EXTENSION_NAME)) {
913
raytracing_validation_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_VALIDATION_FEATURES_NV;
914
raytracing_validation_features.pNext = next_features;
915
next_features = &raytracing_validation_features;
916
}
917
918
if (enabled_device_extension_names.has(VK_KHR_SYNCHRONIZATION_2_EXTENSION_NAME)) {
919
sync_2_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SYNCHRONIZATION_2_FEATURES;
920
sync_2_features.pNext = next_features;
921
next_features = &sync_2_features;
922
}
923
924
VkPhysicalDeviceFeatures2 device_features_2 = {};
925
device_features_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2;
926
device_features_2.pNext = next_features;
927
functions.GetPhysicalDeviceFeatures2(physical_device, &device_features_2);
928
929
if (use_1_2_features) {
930
#ifdef MACOS_ENABLED
931
ERR_FAIL_COND_V_MSG(!device_features_vk_1_2.shaderSampledImageArrayNonUniformIndexing, ERR_CANT_CREATE, "Your GPU doesn't support shaderSampledImageArrayNonUniformIndexing which is required to use the Vulkan-based renderers in Godot.");
932
#endif
933
if (enabled_device_extension_names.has(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) {
934
shader_capabilities.shader_float16_is_supported = device_features_vk_1_2.shaderFloat16;
935
shader_capabilities.shader_int8_is_supported = device_features_vk_1_2.shaderInt8;
936
}
937
if (enabled_device_extension_names.has(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) {
938
buffer_device_address_support = device_features_vk_1_2.bufferDeviceAddress;
939
}
940
if (enabled_device_extension_names.has(VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME)) {
941
vulkan_memory_model_support = device_features_vk_1_2.vulkanMemoryModel;
942
vulkan_memory_model_device_scope_support = device_features_vk_1_2.vulkanMemoryModelDeviceScope;
943
}
944
} else {
945
if (enabled_device_extension_names.has(VK_KHR_SHADER_FLOAT16_INT8_EXTENSION_NAME)) {
946
shader_capabilities.shader_float16_is_supported = shader_features.shaderFloat16;
947
shader_capabilities.shader_int8_is_supported = shader_features.shaderInt8;
948
}
949
if (enabled_device_extension_names.has(VK_KHR_BUFFER_DEVICE_ADDRESS_EXTENSION_NAME)) {
950
buffer_device_address_support = buffer_device_address_features.bufferDeviceAddress;
951
}
952
if (enabled_device_extension_names.has(VK_KHR_VULKAN_MEMORY_MODEL_EXTENSION_NAME)) {
953
vulkan_memory_model_support = vulkan_memory_model_features.vulkanMemoryModel;
954
vulkan_memory_model_device_scope_support = vulkan_memory_model_features.vulkanMemoryModelDeviceScope;
955
}
956
}
957
958
if (enabled_device_extension_names.has(VK_KHR_FRAGMENT_SHADING_RATE_EXTENSION_NAME)) {
959
fsr_capabilities.pipeline_supported = fsr_features.pipelineFragmentShadingRate;
960
fsr_capabilities.primitive_supported = fsr_features.primitiveFragmentShadingRate;
961
fsr_capabilities.attachment_supported = fsr_features.attachmentFragmentShadingRate;
962
}
963
964
if (enabled_device_extension_names.has(VK_EXT_FRAGMENT_DENSITY_MAP_EXTENSION_NAME)) {
965
fdm_capabilities.attachment_supported = fdm_features.fragmentDensityMap;
966
fdm_capabilities.dynamic_attachment_supported = fdm_features.fragmentDensityMapDynamic;
967
fdm_capabilities.non_subsampled_images_supported = fdm_features.fragmentDensityMapNonSubsampledImages;
968
}
969
970
if (enabled_device_extension_names.has(VK_QCOM_FRAGMENT_DENSITY_MAP_OFFSET_EXTENSION_NAME)) {
971
fdm_capabilities.offset_supported = fdmo_features_qcom.fragmentDensityMapOffset;
972
}
973
974
// Multiple VRS techniques can't co-exist during the existence of one device, so we must
975
// choose one at creation time and only report one of them as available.
976
_choose_vrs_capabilities();
977
978
if (enabled_device_extension_names.has(VK_KHR_MULTIVIEW_EXTENSION_NAME)) {
979
multiview_capabilities.is_supported = multiview_features.multiview;
980
multiview_capabilities.geometry_shader_is_supported = multiview_features.multiviewGeometryShader;
981
multiview_capabilities.tessellation_shader_is_supported = multiview_features.multiviewTessellationShader;
982
}
983
984
if (enabled_device_extension_names.has(VK_KHR_16BIT_STORAGE_EXTENSION_NAME)) {
985
storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported = storage_feature.storageBuffer16BitAccess;
986
storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported = storage_feature.uniformAndStorageBuffer16BitAccess;
987
storage_buffer_capabilities.storage_push_constant_16_is_supported = storage_feature.storagePushConstant16;
988
storage_buffer_capabilities.storage_input_output_16 = storage_feature.storageInputOutput16;
989
}
990
991
if (enabled_device_extension_names.has(VK_EXT_PIPELINE_CREATION_CACHE_CONTROL_EXTENSION_NAME)) {
992
pipeline_cache_control_support = pipeline_cache_control_features.pipelineCreationCacheControl;
993
}
994
995
if (enabled_device_extension_names.has(VK_EXT_DEVICE_FAULT_EXTENSION_NAME)) {
996
device_fault_support = true;
997
}
998
#if defined(VK_TRACK_DEVICE_MEMORY)
999
if (enabled_device_extension_names.has(VK_EXT_DEVICE_MEMORY_REPORT_EXTENSION_NAME)) {
1000
device_memory_report_support = true;
1001
}
1002
#endif
1003
1004
if (enabled_device_extension_names.has(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) {
1005
acceleration_structure_capabilities.acceleration_structure_support = acceleration_structure_features.accelerationStructure;
1006
}
1007
1008
if (enabled_device_extension_names.has(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) {
1009
raytracing_capabilities.raytracing_pipeline_support = raytracing_pipeline_features.rayTracingPipeline;
1010
raytracing_capabilities.validation = raytracing_validation_features.rayTracingValidation;
1011
}
1012
}
1013
1014
if (functions.GetPhysicalDeviceProperties2 != nullptr) {
1015
void *next_properties = nullptr;
1016
VkPhysicalDeviceFragmentShadingRatePropertiesKHR fsr_properties = {};
1017
VkPhysicalDeviceFragmentDensityMapPropertiesEXT fdm_properties = {};
1018
VkPhysicalDeviceFragmentDensityMapOffsetPropertiesQCOM fdmo_properties = {};
1019
VkPhysicalDeviceMultiviewProperties multiview_properties = {};
1020
VkPhysicalDeviceSubgroupProperties subgroup_properties = {};
1021
VkPhysicalDeviceSubgroupSizeControlProperties subgroup_size_control_properties = {};
1022
VkPhysicalDeviceAccelerationStructurePropertiesKHR acceleration_structure_properties = {};
1023
VkPhysicalDeviceRayTracingPipelinePropertiesKHR raytracing_properties = {};
1024
VkPhysicalDeviceProperties2 physical_device_properties_2 = {};
1025
1026
const bool use_1_1_properties = physical_device_properties.apiVersion >= VK_API_VERSION_1_1;
1027
if (use_1_1_properties) {
1028
subgroup_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
1029
subgroup_properties.pNext = next_properties;
1030
next_properties = &subgroup_properties;
1031
1032
subgroup_capabilities.size_control_is_supported = enabled_device_extension_names.has(VK_EXT_SUBGROUP_SIZE_CONTROL_EXTENSION_NAME);
1033
if (subgroup_capabilities.size_control_is_supported) {
1034
subgroup_size_control_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES;
1035
subgroup_size_control_properties.pNext = next_properties;
1036
next_properties = &subgroup_size_control_properties;
1037
}
1038
}
1039
1040
if (multiview_capabilities.is_supported) {
1041
multiview_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES;
1042
multiview_properties.pNext = next_properties;
1043
next_properties = &multiview_properties;
1044
}
1045
1046
if (fsr_capabilities.attachment_supported) {
1047
fsr_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR;
1048
fsr_properties.pNext = next_properties;
1049
next_properties = &fsr_properties;
1050
}
1051
1052
if (fdm_capabilities.attachment_supported) {
1053
fdm_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_PROPERTIES_EXT;
1054
fdm_properties.pNext = next_properties;
1055
next_properties = &fdm_properties;
1056
}
1057
1058
if (fdm_capabilities.offset_supported) {
1059
fdmo_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_OFFSET_PROPERTIES_QCOM;
1060
fdmo_properties.pNext = next_properties;
1061
next_properties = &fdmo_properties;
1062
}
1063
1064
if (acceleration_structure_capabilities.acceleration_structure_support) {
1065
acceleration_structure_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR;
1066
acceleration_structure_properties.pNext = next_properties;
1067
next_properties = &acceleration_structure_properties;
1068
}
1069
1070
if (raytracing_capabilities.raytracing_pipeline_support) {
1071
raytracing_properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_PROPERTIES_KHR;
1072
raytracing_properties.pNext = next_properties;
1073
next_properties = &raytracing_properties;
1074
}
1075
1076
physical_device_properties_2.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
1077
physical_device_properties_2.pNext = next_properties;
1078
functions.GetPhysicalDeviceProperties2(physical_device, &physical_device_properties_2);
1079
1080
subgroup_capabilities.size = subgroup_properties.subgroupSize;
1081
subgroup_capabilities.min_size = subgroup_properties.subgroupSize;
1082
subgroup_capabilities.max_size = subgroup_properties.subgroupSize;
1083
subgroup_capabilities.supported_stages = subgroup_properties.supportedStages;
1084
subgroup_capabilities.supported_operations = subgroup_properties.supportedOperations;
1085
1086
// Note: quadOperationsInAllStages will be true if:
1087
// - supportedStages has VK_SHADER_STAGE_ALL_GRAPHICS + VK_SHADER_STAGE_COMPUTE_BIT.
1088
// - supportedOperations has VK_SUBGROUP_FEATURE_QUAD_BIT.
1089
subgroup_capabilities.quad_operations_in_all_stages = subgroup_properties.quadOperationsInAllStages;
1090
1091
if (subgroup_capabilities.size_control_is_supported && (subgroup_size_control_properties.requiredSubgroupSizeStages & VK_SHADER_STAGE_COMPUTE_BIT)) {
1092
subgroup_capabilities.min_size = subgroup_size_control_properties.minSubgroupSize;
1093
subgroup_capabilities.max_size = subgroup_size_control_properties.maxSubgroupSize;
1094
}
1095
1096
if (fsr_capabilities.pipeline_supported || fsr_capabilities.primitive_supported || fsr_capabilities.attachment_supported) {
1097
print_verbose("- Vulkan Fragment Shading Rate supported:");
1098
if (fsr_capabilities.pipeline_supported) {
1099
print_verbose(" Pipeline fragment shading rate");
1100
}
1101
if (fsr_capabilities.primitive_supported) {
1102
print_verbose(" Primitive fragment shading rate");
1103
}
1104
if (fsr_capabilities.attachment_supported) {
1105
// TODO: Expose these somehow to the end user.
1106
fsr_capabilities.min_texel_size.x = fsr_properties.minFragmentShadingRateAttachmentTexelSize.width;
1107
fsr_capabilities.min_texel_size.y = fsr_properties.minFragmentShadingRateAttachmentTexelSize.height;
1108
fsr_capabilities.max_texel_size.x = fsr_properties.maxFragmentShadingRateAttachmentTexelSize.width;
1109
fsr_capabilities.max_texel_size.y = fsr_properties.maxFragmentShadingRateAttachmentTexelSize.height;
1110
fsr_capabilities.max_fragment_size.x = fsr_properties.maxFragmentSize.width; // either 4 or 8
1111
fsr_capabilities.max_fragment_size.y = fsr_properties.maxFragmentSize.height; // generally the same as width
1112
1113
print_verbose(String(" Attachment fragment shading rate") +
1114
String(", min texel size: (") + itos(fsr_capabilities.min_texel_size.x) + String(", ") + itos(fsr_capabilities.min_texel_size.y) + String(")") +
1115
String(", max texel size: (") + itos(fsr_capabilities.max_texel_size.x) + String(", ") + itos(fsr_capabilities.max_texel_size.y) + String(")") +
1116
String(", max fragment size: (") + itos(fsr_capabilities.max_fragment_size.x) + String(", ") + itos(fsr_capabilities.max_fragment_size.y) + String(")"));
1117
}
1118
1119
} else {
1120
print_verbose("- Vulkan Variable Rate Shading not supported");
1121
}
1122
1123
if (fdm_capabilities.attachment_supported || fdm_capabilities.dynamic_attachment_supported || fdm_capabilities.non_subsampled_images_supported) {
1124
fdm_capabilities.min_texel_size.x = fdm_properties.minFragmentDensityTexelSize.width;
1125
fdm_capabilities.min_texel_size.y = fdm_properties.minFragmentDensityTexelSize.height;
1126
fdm_capabilities.max_texel_size.x = fdm_properties.maxFragmentDensityTexelSize.width;
1127
fdm_capabilities.max_texel_size.y = fdm_properties.maxFragmentDensityTexelSize.height;
1128
fdm_capabilities.invocations_supported = fdm_properties.fragmentDensityInvocations;
1129
1130
print_verbose(String("- Vulkan Fragment Density Map supported") +
1131
String(", min texel size: (") + itos(fdm_capabilities.min_texel_size.x) + String(", ") + itos(fdm_capabilities.min_texel_size.y) + String(")") +
1132
String(", max texel size: (") + itos(fdm_capabilities.max_texel_size.x) + String(", ") + itos(fdm_capabilities.max_texel_size.y) + String(")"));
1133
1134
if (fdm_capabilities.dynamic_attachment_supported) {
1135
print_verbose(" - dynamic fragment density map supported");
1136
}
1137
1138
if (fdm_capabilities.non_subsampled_images_supported) {
1139
print_verbose(" - non-subsampled images supported");
1140
}
1141
} else {
1142
print_verbose("- Vulkan Fragment Density Map not supported");
1143
}
1144
1145
if (fdm_capabilities.offset_supported) {
1146
print_verbose("- Vulkan Fragment Density Map Offset supported");
1147
1148
fdm_capabilities.offset_granularity.x = fdmo_properties.fragmentDensityOffsetGranularity.width;
1149
fdm_capabilities.offset_granularity.y = fdmo_properties.fragmentDensityOffsetGranularity.height;
1150
1151
print_verbose(vformat(" Offset granularity: (%d, %d)", fdm_capabilities.offset_granularity.x, fdm_capabilities.offset_granularity.y));
1152
} else if (use_fdm_offsets) {
1153
print_verbose("- Vulkan Fragment Density Map Offset not supported");
1154
}
1155
1156
if (multiview_capabilities.is_supported) {
1157
multiview_capabilities.max_view_count = multiview_properties.maxMultiviewViewCount;
1158
multiview_capabilities.max_instance_count = multiview_properties.maxMultiviewInstanceIndex;
1159
1160
print_verbose("- Vulkan multiview supported:");
1161
print_verbose(" max view count: " + itos(multiview_capabilities.max_view_count));
1162
print_verbose(" max instances: " + itos(multiview_capabilities.max_instance_count));
1163
} else {
1164
print_verbose("- Vulkan multiview not supported");
1165
}
1166
1167
print_verbose("- Vulkan subgroup:");
1168
print_verbose(" size: " + itos(subgroup_capabilities.size));
1169
print_verbose(" min size: " + itos(subgroup_capabilities.min_size));
1170
print_verbose(" max size: " + itos(subgroup_capabilities.max_size));
1171
print_verbose(" stages: " + subgroup_capabilities.supported_stages_desc());
1172
print_verbose(" supported ops: " + subgroup_capabilities.supported_operations_desc());
1173
if (subgroup_capabilities.quad_operations_in_all_stages) {
1174
print_verbose(" quad operations in all stages");
1175
}
1176
1177
if (acceleration_structure_capabilities.acceleration_structure_support) {
1178
print_verbose("- Vulkan Acceleration Structure supported");
1179
acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment = acceleration_structure_properties.minAccelerationStructureScratchOffsetAlignment;
1180
print_verbose(" min acceleration structure scratch offset alignment: " + itos(acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment));
1181
} else {
1182
print_verbose("- Vulkan Acceleration Structure not supported");
1183
}
1184
1185
if (raytracing_capabilities.raytracing_pipeline_support) {
1186
raytracing_capabilities.shader_group_handle_size = raytracing_properties.shaderGroupHandleSize;
1187
raytracing_capabilities.shader_group_handle_alignment = raytracing_properties.shaderGroupHandleAlignment;
1188
raytracing_capabilities.shader_group_handle_size_aligned = _align_up(raytracing_capabilities.shader_group_handle_size, raytracing_capabilities.shader_group_handle_alignment);
1189
raytracing_capabilities.shader_group_base_alignment = raytracing_properties.shaderGroupBaseAlignment;
1190
1191
print_verbose("- Vulkan Raytracing supported");
1192
print_verbose(" shader group handle size: " + itos(raytracing_capabilities.shader_group_handle_size));
1193
print_verbose(" shader group handle alignment: " + itos(raytracing_capabilities.shader_group_handle_alignment));
1194
print_verbose(" shader group handle size aligned: " + itos(raytracing_capabilities.shader_group_handle_size_aligned));
1195
print_verbose(" shader group base alignment: " + itos(raytracing_capabilities.shader_group_base_alignment));
1196
} else {
1197
print_verbose("- Vulkan Raytracing not supported");
1198
}
1199
}
1200
1201
return OK;
1202
}
1203
1204
void RenderingDeviceDriverVulkan::_choose_vrs_capabilities() {
1205
bool prefer_fdm_on_qualcomm = physical_device_properties.vendorID == RenderingContextDriver::Vendor::VENDOR_QUALCOMM;
1206
if (fdm_capabilities.attachment_supported && (!fsr_capabilities.attachment_supported || prefer_fdm_on_qualcomm)) {
1207
// If available, we prefer using fragment density maps on Qualcomm as they adjust tile distribution when using
1208
// this technique. Performance as a result is higher than when using fragment shading rate.
1209
fsr_capabilities = FragmentShadingRateCapabilities();
1210
} else if (fsr_capabilities.attachment_supported) {
1211
// Disable any possibility of fragment density maps being used.
1212
fdm_capabilities = FragmentDensityMapCapabilities();
1213
} else {
1214
// Do not report or enable any VRS capabilities if attachment is not supported.
1215
fsr_capabilities = FragmentShadingRateCapabilities();
1216
fdm_capabilities = FragmentDensityMapCapabilities();
1217
}
1218
}
1219
1220
Error RenderingDeviceDriverVulkan::_add_queue_create_info(LocalVector<VkDeviceQueueCreateInfo> &r_queue_create_info) {
1221
uint32_t queue_family_count = queue_family_properties.size();
1222
queue_families.resize(queue_family_count);
1223
1224
VkQueueFlags queue_flags_mask = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT;
1225
const uint32_t max_queue_count_per_family = 1;
1226
static const float queue_priorities[max_queue_count_per_family] = {};
1227
for (uint32_t i = 0; i < queue_family_count; i++) {
1228
if ((queue_family_properties[i].queueFlags & queue_flags_mask) == 0) {
1229
// We ignore creating queues in families that don't support any of the operations we require.
1230
continue;
1231
}
1232
1233
VkDeviceQueueCreateInfo create_info = {};
1234
create_info.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_CREATE_INFO;
1235
create_info.queueFamilyIndex = i;
1236
create_info.queueCount = MIN(queue_family_properties[i].queueCount, max_queue_count_per_family);
1237
create_info.pQueuePriorities = queue_priorities;
1238
r_queue_create_info.push_back(create_info);
1239
1240
// Prepare the vectors where the queues will be filled out.
1241
queue_families[i].resize(create_info.queueCount);
1242
}
1243
1244
return OK;
1245
}
1246
1247
Error RenderingDeviceDriverVulkan::_initialize_device(const LocalVector<VkDeviceQueueCreateInfo> &p_queue_create_info) {
1248
TightLocalVector<const char *> enabled_extension_names;
1249
enabled_extension_names.reserve(enabled_device_extension_names.size());
1250
for (const CharString &extension_name : enabled_device_extension_names) {
1251
enabled_extension_names.push_back(extension_name.ptr());
1252
}
1253
1254
void *create_info_next = nullptr;
1255
VkPhysicalDeviceShaderFloat16Int8FeaturesKHR shader_features = {};
1256
shader_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES_KHR;
1257
shader_features.pNext = create_info_next;
1258
shader_features.shaderFloat16 = shader_capabilities.shader_float16_is_supported;
1259
shader_features.shaderInt8 = shader_capabilities.shader_int8_is_supported;
1260
create_info_next = &shader_features;
1261
1262
VkPhysicalDeviceBufferDeviceAddressFeaturesKHR buffer_device_address_features = {};
1263
if (buffer_device_address_support) {
1264
buffer_device_address_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_KHR;
1265
buffer_device_address_features.pNext = create_info_next;
1266
buffer_device_address_features.bufferDeviceAddress = buffer_device_address_support;
1267
create_info_next = &buffer_device_address_features;
1268
}
1269
1270
VkPhysicalDeviceVulkanMemoryModelFeaturesKHR vulkan_memory_model_features = {};
1271
if (vulkan_memory_model_support && vulkan_memory_model_device_scope_support) {
1272
vulkan_memory_model_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR;
1273
vulkan_memory_model_features.pNext = create_info_next;
1274
vulkan_memory_model_features.vulkanMemoryModel = vulkan_memory_model_support;
1275
vulkan_memory_model_features.vulkanMemoryModelDeviceScope = vulkan_memory_model_device_scope_support;
1276
create_info_next = &vulkan_memory_model_features;
1277
}
1278
1279
VkPhysicalDeviceFragmentShadingRateFeaturesKHR fsr_features = {};
1280
if (fsr_capabilities.pipeline_supported || fsr_capabilities.primitive_supported || fsr_capabilities.attachment_supported) {
1281
fsr_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR;
1282
fsr_features.pNext = create_info_next;
1283
fsr_features.pipelineFragmentShadingRate = fsr_capabilities.pipeline_supported;
1284
fsr_features.primitiveFragmentShadingRate = fsr_capabilities.primitive_supported;
1285
fsr_features.attachmentFragmentShadingRate = fsr_capabilities.attachment_supported;
1286
create_info_next = &fsr_features;
1287
}
1288
1289
VkPhysicalDeviceFragmentDensityMapFeaturesEXT fdm_features = {};
1290
if (fdm_capabilities.attachment_supported || fdm_capabilities.dynamic_attachment_supported || fdm_capabilities.non_subsampled_images_supported) {
1291
fdm_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_FEATURES_EXT;
1292
fdm_features.pNext = create_info_next;
1293
fdm_features.fragmentDensityMap = fdm_capabilities.attachment_supported;
1294
fdm_features.fragmentDensityMapDynamic = fdm_capabilities.dynamic_attachment_supported;
1295
fdm_features.fragmentDensityMapNonSubsampledImages = fdm_capabilities.non_subsampled_images_supported;
1296
create_info_next = &fdm_features;
1297
}
1298
1299
VkPhysicalDeviceFragmentDensityMapOffsetFeaturesQCOM fdm_offset_features = {};
1300
if (fdm_capabilities.offset_supported) {
1301
fdm_offset_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_DENSITY_MAP_OFFSET_FEATURES_QCOM;
1302
fdm_offset_features.pNext = create_info_next;
1303
fdm_offset_features.fragmentDensityMapOffset = VK_TRUE;
1304
create_info_next = &fdm_offset_features;
1305
}
1306
1307
VkPhysicalDevicePipelineCreationCacheControlFeatures pipeline_cache_control_features = {};
1308
if (pipeline_cache_control_support) {
1309
pipeline_cache_control_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES;
1310
pipeline_cache_control_features.pNext = create_info_next;
1311
pipeline_cache_control_features.pipelineCreationCacheControl = pipeline_cache_control_support;
1312
create_info_next = &pipeline_cache_control_features;
1313
}
1314
1315
VkPhysicalDeviceFaultFeaturesEXT device_fault_features = {};
1316
if (device_fault_support) {
1317
device_fault_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FAULT_FEATURES_EXT;
1318
device_fault_features.pNext = create_info_next;
1319
create_info_next = &device_fault_features;
1320
}
1321
1322
#if defined(VK_TRACK_DEVICE_MEMORY)
1323
VkDeviceDeviceMemoryReportCreateInfoEXT memory_report_info = {};
1324
if (device_memory_report_support) {
1325
memory_report_info.sType = VK_STRUCTURE_TYPE_DEVICE_DEVICE_MEMORY_REPORT_CREATE_INFO_EXT;
1326
memory_report_info.pfnUserCallback = RenderingContextDriverVulkan::memory_report_callback;
1327
memory_report_info.pNext = create_info_next;
1328
memory_report_info.flags = 0;
1329
memory_report_info.pUserData = this;
1330
1331
create_info_next = &memory_report_info;
1332
}
1333
#endif
1334
1335
VkPhysicalDeviceAccelerationStructureFeaturesKHR acceleration_structure_features = {};
1336
if (acceleration_structure_capabilities.acceleration_structure_support) {
1337
acceleration_structure_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR;
1338
acceleration_structure_features.pNext = create_info_next;
1339
acceleration_structure_features.accelerationStructure = acceleration_structure_capabilities.acceleration_structure_support;
1340
create_info_next = &acceleration_structure_features;
1341
}
1342
1343
VkPhysicalDeviceRayTracingPipelineFeaturesKHR raytracing_pipeline_features = {};
1344
if (raytracing_capabilities.raytracing_pipeline_support) {
1345
raytracing_pipeline_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_PIPELINE_FEATURES_KHR;
1346
raytracing_pipeline_features.pNext = create_info_next;
1347
raytracing_pipeline_features.rayTracingPipeline = raytracing_capabilities.raytracing_pipeline_support;
1348
create_info_next = &raytracing_pipeline_features;
1349
}
1350
1351
VkPhysicalDeviceRayTracingValidationFeaturesNV raytracing_validation_features = {};
1352
if (raytracing_capabilities.validation) {
1353
raytracing_validation_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_RAY_TRACING_VALIDATION_FEATURES_NV;
1354
raytracing_validation_features.pNext = create_info_next;
1355
raytracing_validation_features.rayTracingValidation = raytracing_capabilities.validation;
1356
create_info_next = &raytracing_validation_features;
1357
}
1358
1359
VkPhysicalDeviceVulkan11Features vulkan_1_1_features = {};
1360
VkPhysicalDevice16BitStorageFeaturesKHR storage_features = {};
1361
VkPhysicalDeviceMultiviewFeatures multiview_features = {};
1362
const bool enable_1_2_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_2;
1363
if (enable_1_2_features) {
1364
// In Vulkan 1.2 and newer we use a newer struct to enable various features.
1365
vulkan_1_1_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES;
1366
vulkan_1_1_features.pNext = create_info_next;
1367
vulkan_1_1_features.storageBuffer16BitAccess = storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported;
1368
vulkan_1_1_features.uniformAndStorageBuffer16BitAccess = storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported;
1369
vulkan_1_1_features.storagePushConstant16 = storage_buffer_capabilities.storage_push_constant_16_is_supported;
1370
vulkan_1_1_features.storageInputOutput16 = storage_buffer_capabilities.storage_input_output_16;
1371
vulkan_1_1_features.multiview = multiview_capabilities.is_supported;
1372
vulkan_1_1_features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported;
1373
vulkan_1_1_features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported;
1374
vulkan_1_1_features.variablePointersStorageBuffer = 0;
1375
vulkan_1_1_features.variablePointers = 0;
1376
vulkan_1_1_features.protectedMemory = 0;
1377
vulkan_1_1_features.samplerYcbcrConversion = 0;
1378
vulkan_1_1_features.shaderDrawParameters = 0;
1379
create_info_next = &vulkan_1_1_features;
1380
} else {
1381
// On Vulkan 1.0 and 1.1 we use our older structs to initialize these features.
1382
storage_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES_KHR;
1383
storage_features.pNext = create_info_next;
1384
storage_features.storageBuffer16BitAccess = storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported;
1385
storage_features.uniformAndStorageBuffer16BitAccess = storage_buffer_capabilities.uniform_and_storage_buffer_16_bit_access_is_supported;
1386
storage_features.storagePushConstant16 = storage_buffer_capabilities.storage_push_constant_16_is_supported;
1387
storage_features.storageInputOutput16 = storage_buffer_capabilities.storage_input_output_16;
1388
create_info_next = &storage_features;
1389
1390
const bool enable_1_1_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_1;
1391
if (enable_1_1_features) {
1392
multiview_features.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES;
1393
multiview_features.pNext = create_info_next;
1394
multiview_features.multiview = multiview_capabilities.is_supported;
1395
multiview_features.multiviewGeometryShader = multiview_capabilities.geometry_shader_is_supported;
1396
multiview_features.multiviewTessellationShader = multiview_capabilities.tessellation_shader_is_supported;
1397
create_info_next = &multiview_features;
1398
}
1399
}
1400
1401
VkDeviceCreateInfo create_info = {};
1402
create_info.sType = VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO;
1403
create_info.pNext = create_info_next;
1404
create_info.queueCreateInfoCount = p_queue_create_info.size();
1405
create_info.pQueueCreateInfos = p_queue_create_info.ptr();
1406
create_info.enabledExtensionCount = enabled_extension_names.size();
1407
create_info.ppEnabledExtensionNames = enabled_extension_names.ptr();
1408
create_info.pEnabledFeatures = &requested_device_features;
1409
1410
if (VulkanHooks::get_singleton() != nullptr) {
1411
bool device_created = VulkanHooks::get_singleton()->create_vulkan_device(&create_info, &vk_device);
1412
ERR_FAIL_COND_V(!device_created, ERR_CANT_CREATE);
1413
} else {
1414
VkResult err = vkCreateDevice(physical_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DEVICE), &vk_device);
1415
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
1416
}
1417
1418
for (uint32_t i = 0; i < queue_families.size(); i++) {
1419
for (uint32_t j = 0; j < queue_families[i].size(); j++) {
1420
vkGetDeviceQueue(vk_device, i, j, &queue_families[i][j].queue);
1421
}
1422
}
1423
1424
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
1425
if (functions.GetDeviceProcAddr != nullptr) {
1426
device_functions.CreateSwapchainKHR = PFN_vkCreateSwapchainKHR(functions.GetDeviceProcAddr(vk_device, "vkCreateSwapchainKHR"));
1427
device_functions.DestroySwapchainKHR = PFN_vkDestroySwapchainKHR(functions.GetDeviceProcAddr(vk_device, "vkDestroySwapchainKHR"));
1428
device_functions.GetSwapchainImagesKHR = PFN_vkGetSwapchainImagesKHR(functions.GetDeviceProcAddr(vk_device, "vkGetSwapchainImagesKHR"));
1429
device_functions.AcquireNextImageKHR = PFN_vkAcquireNextImageKHR(functions.GetDeviceProcAddr(vk_device, "vkAcquireNextImageKHR"));
1430
device_functions.QueuePresentKHR = PFN_vkQueuePresentKHR(functions.GetDeviceProcAddr(vk_device, "vkQueuePresentKHR"));
1431
1432
if (enabled_device_extension_names.has(VK_KHR_CREATE_RENDERPASS_2_EXTENSION_NAME)) {
1433
device_functions.CreateRenderPass2KHR = PFN_vkCreateRenderPass2KHR(functions.GetDeviceProcAddr(vk_device, "vkCreateRenderPass2KHR"));
1434
device_functions.EndRenderPass2KHR = PFN_vkCmdEndRenderPass2KHR(functions.GetDeviceProcAddr(vk_device, "vkCmdEndRenderPass2KHR"));
1435
}
1436
1437
// Debug marker extensions.
1438
if (enabled_device_extension_names.has(VK_EXT_DEBUG_MARKER_EXTENSION_NAME)) {
1439
device_functions.CmdDebugMarkerBeginEXT = (PFN_vkCmdDebugMarkerBeginEXT)functions.GetDeviceProcAddr(vk_device, "vkCmdDebugMarkerBeginEXT");
1440
device_functions.CmdDebugMarkerEndEXT = (PFN_vkCmdDebugMarkerEndEXT)functions.GetDeviceProcAddr(vk_device, "vkCmdDebugMarkerEndEXT");
1441
device_functions.CmdDebugMarkerInsertEXT = (PFN_vkCmdDebugMarkerInsertEXT)functions.GetDeviceProcAddr(vk_device, "vkCmdDebugMarkerInsertEXT");
1442
device_functions.DebugMarkerSetObjectNameEXT = (PFN_vkDebugMarkerSetObjectNameEXT)functions.GetDeviceProcAddr(vk_device, "vkDebugMarkerSetObjectNameEXT");
1443
}
1444
1445
// Debug device fault extension.
1446
if (device_fault_support) {
1447
device_functions.GetDeviceFaultInfoEXT = (PFN_vkGetDeviceFaultInfoEXT)functions.GetDeviceProcAddr(vk_device, "vkGetDeviceFaultInfoEXT");
1448
}
1449
1450
// Device raytracing extensions.
1451
if (enabled_device_extension_names.has(VK_KHR_ACCELERATION_STRUCTURE_EXTENSION_NAME)) {
1452
device_functions.CreateAccelerationStructureKHR = PFN_vkCreateAccelerationStructureKHR(functions.GetDeviceProcAddr(vk_device, "vkCreateAccelerationStructureKHR"));
1453
}
1454
1455
if (enabled_device_extension_names.has(VK_KHR_RAY_TRACING_PIPELINE_EXTENSION_NAME)) {
1456
device_functions.CreateRaytracingPipelinesKHR = PFN_vkCreateRayTracingPipelinesKHR(functions.GetDeviceProcAddr(vk_device, "vkCreateRayTracingPipelinesKHR"));
1457
}
1458
}
1459
1460
return OK;
1461
}
1462
1463
Error RenderingDeviceDriverVulkan::_initialize_allocator() {
1464
VmaAllocatorCreateInfo allocator_info = {};
1465
allocator_info.physicalDevice = physical_device;
1466
allocator_info.device = vk_device;
1467
allocator_info.instance = context_driver->instance_get();
1468
const bool use_1_3_features = physical_device_properties.apiVersion >= VK_API_VERSION_1_3;
1469
if (use_1_3_features) {
1470
allocator_info.flags |= VMA_ALLOCATOR_CREATE_KHR_MAINTENANCE5_BIT;
1471
}
1472
if (buffer_device_address_support) {
1473
allocator_info.flags |= VMA_ALLOCATOR_CREATE_BUFFER_DEVICE_ADDRESS_BIT;
1474
}
1475
VkResult err = vmaCreateAllocator(&allocator_info, &allocator);
1476
ERR_FAIL_COND_V_MSG(err, ERR_CANT_CREATE, "vmaCreateAllocator failed with error " + itos(err) + ".");
1477
1478
return OK;
1479
}
1480
1481
Error RenderingDeviceDriverVulkan::_initialize_pipeline_cache() {
1482
pipelines_cache.buffer.resize(sizeof(PipelineCacheHeader));
1483
PipelineCacheHeader *header = (PipelineCacheHeader *)(pipelines_cache.buffer.ptrw());
1484
*header = {};
1485
header->magic = 868 + VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
1486
header->device_id = physical_device_properties.deviceID;
1487
header->vendor_id = physical_device_properties.vendorID;
1488
header->driver_version = physical_device_properties.driverVersion;
1489
memcpy(header->uuid, physical_device_properties.pipelineCacheUUID, VK_UUID_SIZE);
1490
header->driver_abi = sizeof(void *);
1491
1492
pipeline_cache_id = String::hex_encode_buffer(physical_device_properties.pipelineCacheUUID, VK_UUID_SIZE);
1493
pipeline_cache_id += "-driver-" + itos(physical_device_properties.driverVersion);
1494
1495
return OK;
1496
}
1497
1498
static void _convert_subpass_attachments(const VkAttachmentReference2 *p_attachment_references_2, uint32_t p_attachment_references_count, TightLocalVector<VkAttachmentReference> &r_attachment_references) {
1499
r_attachment_references.resize(p_attachment_references_count);
1500
for (uint32_t i = 0; i < p_attachment_references_count; i++) {
1501
// Ignore sType, pNext and aspectMask (which is currently unused).
1502
r_attachment_references[i].attachment = p_attachment_references_2[i].attachment;
1503
r_attachment_references[i].layout = p_attachment_references_2[i].layout;
1504
}
1505
}
1506
1507
VkResult RenderingDeviceDriverVulkan::_create_render_pass(VkDevice p_device, const VkRenderPassCreateInfo2 *p_create_info, const VkAllocationCallbacks *p_allocator, VkRenderPass *p_render_pass) {
1508
if (device_functions.CreateRenderPass2KHR != nullptr) {
1509
return device_functions.CreateRenderPass2KHR(p_device, p_create_info, p_allocator, p_render_pass);
1510
} else {
1511
// Compatibility fallback with regular create render pass but by converting the inputs from the newer version to the older one.
1512
TightLocalVector<VkAttachmentDescription> attachments;
1513
attachments.resize(p_create_info->attachmentCount);
1514
for (uint32_t i = 0; i < p_create_info->attachmentCount; i++) {
1515
// Ignores sType and pNext from the attachment.
1516
const VkAttachmentDescription2 &src = p_create_info->pAttachments[i];
1517
VkAttachmentDescription &dst = attachments[i];
1518
dst.flags = src.flags;
1519
dst.format = src.format;
1520
dst.samples = src.samples;
1521
dst.loadOp = src.loadOp;
1522
dst.storeOp = src.storeOp;
1523
dst.stencilLoadOp = src.stencilLoadOp;
1524
dst.stencilStoreOp = src.stencilStoreOp;
1525
dst.initialLayout = src.initialLayout;
1526
dst.finalLayout = src.finalLayout;
1527
}
1528
1529
const uint32_t attachment_vectors_per_subpass = 4;
1530
TightLocalVector<TightLocalVector<VkAttachmentReference>> subpasses_attachments;
1531
TightLocalVector<VkSubpassDescription> subpasses;
1532
subpasses_attachments.resize(p_create_info->subpassCount * attachment_vectors_per_subpass);
1533
subpasses.resize(p_create_info->subpassCount);
1534
1535
for (uint32_t i = 0; i < p_create_info->subpassCount; i++) {
1536
const uint32_t vector_base_index = i * attachment_vectors_per_subpass;
1537
const uint32_t input_attachments_index = vector_base_index + 0;
1538
const uint32_t color_attachments_index = vector_base_index + 1;
1539
const uint32_t resolve_attachments_index = vector_base_index + 2;
1540
const uint32_t depth_attachment_index = vector_base_index + 3;
1541
_convert_subpass_attachments(p_create_info->pSubpasses[i].pInputAttachments, p_create_info->pSubpasses[i].inputAttachmentCount, subpasses_attachments[input_attachments_index]);
1542
_convert_subpass_attachments(p_create_info->pSubpasses[i].pColorAttachments, p_create_info->pSubpasses[i].colorAttachmentCount, subpasses_attachments[color_attachments_index]);
1543
_convert_subpass_attachments(p_create_info->pSubpasses[i].pResolveAttachments, (p_create_info->pSubpasses[i].pResolveAttachments != nullptr) ? p_create_info->pSubpasses[i].colorAttachmentCount : 0, subpasses_attachments[resolve_attachments_index]);
1544
_convert_subpass_attachments(p_create_info->pSubpasses[i].pDepthStencilAttachment, (p_create_info->pSubpasses[i].pDepthStencilAttachment != nullptr) ? 1 : 0, subpasses_attachments[depth_attachment_index]);
1545
1546
// Ignores sType and pNext from the subpass.
1547
const VkSubpassDescription2 &src_subpass = p_create_info->pSubpasses[i];
1548
VkSubpassDescription &dst_subpass = subpasses[i];
1549
dst_subpass.flags = src_subpass.flags;
1550
dst_subpass.pipelineBindPoint = src_subpass.pipelineBindPoint;
1551
dst_subpass.inputAttachmentCount = src_subpass.inputAttachmentCount;
1552
dst_subpass.pInputAttachments = subpasses_attachments[input_attachments_index].ptr();
1553
dst_subpass.colorAttachmentCount = src_subpass.colorAttachmentCount;
1554
dst_subpass.pColorAttachments = subpasses_attachments[color_attachments_index].ptr();
1555
dst_subpass.pResolveAttachments = subpasses_attachments[resolve_attachments_index].ptr();
1556
dst_subpass.pDepthStencilAttachment = subpasses_attachments[depth_attachment_index].ptr();
1557
dst_subpass.preserveAttachmentCount = src_subpass.preserveAttachmentCount;
1558
dst_subpass.pPreserveAttachments = src_subpass.pPreserveAttachments;
1559
}
1560
1561
TightLocalVector<VkSubpassDependency> dependencies;
1562
dependencies.resize(p_create_info->dependencyCount);
1563
1564
for (uint32_t i = 0; i < p_create_info->dependencyCount; i++) {
1565
// Ignores sType and pNext from the dependency, and viewMask which is currently unused.
1566
const VkSubpassDependency2 &src_dependency = p_create_info->pDependencies[i];
1567
VkSubpassDependency &dst_dependency = dependencies[i];
1568
dst_dependency.srcSubpass = src_dependency.srcSubpass;
1569
dst_dependency.dstSubpass = src_dependency.dstSubpass;
1570
dst_dependency.srcStageMask = src_dependency.srcStageMask;
1571
dst_dependency.dstStageMask = src_dependency.dstStageMask;
1572
dst_dependency.srcAccessMask = src_dependency.srcAccessMask;
1573
dst_dependency.dstAccessMask = src_dependency.dstAccessMask;
1574
dst_dependency.dependencyFlags = src_dependency.dependencyFlags;
1575
}
1576
1577
VkRenderPassCreateInfo create_info = {};
1578
create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO;
1579
create_info.pNext = p_create_info->pNext;
1580
create_info.flags = p_create_info->flags;
1581
create_info.attachmentCount = attachments.size();
1582
create_info.pAttachments = attachments.ptr();
1583
create_info.subpassCount = subpasses.size();
1584
create_info.pSubpasses = subpasses.ptr();
1585
create_info.dependencyCount = dependencies.size();
1586
create_info.pDependencies = dependencies.ptr();
1587
return vkCreateRenderPass(vk_device, &create_info, p_allocator, p_render_pass);
1588
}
1589
}
1590
1591
bool RenderingDeviceDriverVulkan::_release_image_semaphore(CommandQueue *p_command_queue, uint32_t p_semaphore_index, bool p_release_on_swap_chain) {
1592
SwapChain *swap_chain = p_command_queue->image_semaphores_swap_chains[p_semaphore_index];
1593
if (swap_chain != nullptr) {
1594
// Clear the swap chain from the command queue's vector.
1595
p_command_queue->image_semaphores_swap_chains[p_semaphore_index] = nullptr;
1596
1597
if (p_release_on_swap_chain) {
1598
// Remove the acquired semaphore from the swap chain's vectors.
1599
for (uint32_t i = 0; i < swap_chain->command_queues_acquired.size(); i++) {
1600
if (swap_chain->command_queues_acquired[i] == p_command_queue && swap_chain->command_queues_acquired_semaphores[i] == p_semaphore_index) {
1601
swap_chain->command_queues_acquired.remove_at(i);
1602
swap_chain->command_queues_acquired_semaphores.remove_at(i);
1603
break;
1604
}
1605
}
1606
}
1607
1608
return true;
1609
}
1610
1611
return false;
1612
}
1613
1614
bool RenderingDeviceDriverVulkan::_recreate_image_semaphore(CommandQueue *p_command_queue, uint32_t p_semaphore_index, bool p_release_on_swap_chain) {
1615
_release_image_semaphore(p_command_queue, p_semaphore_index, p_release_on_swap_chain);
1616
1617
VkSemaphore semaphore;
1618
VkSemaphoreCreateInfo create_info = {};
1619
create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
1620
VkResult err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore);
1621
ERR_FAIL_COND_V(err != VK_SUCCESS, false);
1622
1623
// Indicate the semaphore is free again and destroy the previous one before storing the new one.
1624
vkDestroySemaphore(vk_device, p_command_queue->image_semaphores[p_semaphore_index], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE));
1625
1626
p_command_queue->image_semaphores[p_semaphore_index] = semaphore;
1627
p_command_queue->free_image_semaphores.push_back(p_semaphore_index);
1628
1629
return true;
1630
}
1631
// Debug marker extensions.
1632
VkDebugReportObjectTypeEXT RenderingDeviceDriverVulkan::_convert_to_debug_report_objectType(VkObjectType p_object_type) {
1633
switch (p_object_type) {
1634
case VK_OBJECT_TYPE_UNKNOWN:
1635
return VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT;
1636
case VK_OBJECT_TYPE_INSTANCE:
1637
return VK_DEBUG_REPORT_OBJECT_TYPE_INSTANCE_EXT;
1638
case VK_OBJECT_TYPE_PHYSICAL_DEVICE:
1639
return VK_DEBUG_REPORT_OBJECT_TYPE_PHYSICAL_DEVICE_EXT;
1640
case VK_OBJECT_TYPE_DEVICE:
1641
return VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT;
1642
case VK_OBJECT_TYPE_QUEUE:
1643
return VK_DEBUG_REPORT_OBJECT_TYPE_QUEUE_EXT;
1644
case VK_OBJECT_TYPE_SEMAPHORE:
1645
return VK_DEBUG_REPORT_OBJECT_TYPE_SEMAPHORE_EXT;
1646
case VK_OBJECT_TYPE_COMMAND_BUFFER:
1647
return VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_BUFFER_EXT;
1648
case VK_OBJECT_TYPE_FENCE:
1649
return VK_DEBUG_REPORT_OBJECT_TYPE_FENCE_EXT;
1650
case VK_OBJECT_TYPE_DEVICE_MEMORY:
1651
return VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_MEMORY_EXT;
1652
case VK_OBJECT_TYPE_BUFFER:
1653
return VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_EXT;
1654
case VK_OBJECT_TYPE_IMAGE:
1655
return VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_EXT;
1656
case VK_OBJECT_TYPE_EVENT:
1657
return VK_DEBUG_REPORT_OBJECT_TYPE_EVENT_EXT;
1658
case VK_OBJECT_TYPE_QUERY_POOL:
1659
return VK_DEBUG_REPORT_OBJECT_TYPE_QUERY_POOL_EXT;
1660
case VK_OBJECT_TYPE_BUFFER_VIEW:
1661
return VK_DEBUG_REPORT_OBJECT_TYPE_BUFFER_VIEW_EXT;
1662
case VK_OBJECT_TYPE_IMAGE_VIEW:
1663
return VK_DEBUG_REPORT_OBJECT_TYPE_IMAGE_VIEW_EXT;
1664
case VK_OBJECT_TYPE_SHADER_MODULE:
1665
return VK_DEBUG_REPORT_OBJECT_TYPE_SHADER_MODULE_EXT;
1666
case VK_OBJECT_TYPE_PIPELINE_CACHE:
1667
return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_CACHE_EXT;
1668
case VK_OBJECT_TYPE_PIPELINE_LAYOUT:
1669
return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_LAYOUT_EXT;
1670
case VK_OBJECT_TYPE_RENDER_PASS:
1671
return VK_DEBUG_REPORT_OBJECT_TYPE_RENDER_PASS_EXT;
1672
case VK_OBJECT_TYPE_PIPELINE:
1673
return VK_DEBUG_REPORT_OBJECT_TYPE_PIPELINE_EXT;
1674
case VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT:
1675
return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT_EXT;
1676
case VK_OBJECT_TYPE_SAMPLER:
1677
return VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_EXT;
1678
case VK_OBJECT_TYPE_DESCRIPTOR_POOL:
1679
return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_POOL_EXT;
1680
case VK_OBJECT_TYPE_DESCRIPTOR_SET:
1681
return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_SET_EXT;
1682
case VK_OBJECT_TYPE_FRAMEBUFFER:
1683
return VK_DEBUG_REPORT_OBJECT_TYPE_FRAMEBUFFER_EXT;
1684
case VK_OBJECT_TYPE_COMMAND_POOL:
1685
return VK_DEBUG_REPORT_OBJECT_TYPE_COMMAND_POOL_EXT;
1686
case VK_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION:
1687
return VK_DEBUG_REPORT_OBJECT_TYPE_SAMPLER_YCBCR_CONVERSION_EXT;
1688
case VK_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE:
1689
return VK_DEBUG_REPORT_OBJECT_TYPE_DESCRIPTOR_UPDATE_TEMPLATE_EXT;
1690
case VK_OBJECT_TYPE_SURFACE_KHR:
1691
return VK_DEBUG_REPORT_OBJECT_TYPE_SURFACE_KHR_EXT;
1692
case VK_OBJECT_TYPE_SWAPCHAIN_KHR:
1693
return VK_DEBUG_REPORT_OBJECT_TYPE_SWAPCHAIN_KHR_EXT;
1694
case VK_OBJECT_TYPE_DISPLAY_KHR:
1695
return VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_KHR_EXT;
1696
case VK_OBJECT_TYPE_DISPLAY_MODE_KHR:
1697
return VK_DEBUG_REPORT_OBJECT_TYPE_DISPLAY_MODE_KHR_EXT;
1698
case VK_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT:
1699
return VK_DEBUG_REPORT_OBJECT_TYPE_DEBUG_REPORT_CALLBACK_EXT_EXT;
1700
case VK_OBJECT_TYPE_CU_MODULE_NVX:
1701
return VK_DEBUG_REPORT_OBJECT_TYPE_CU_MODULE_NVX_EXT;
1702
case VK_OBJECT_TYPE_CU_FUNCTION_NVX:
1703
return VK_DEBUG_REPORT_OBJECT_TYPE_CU_FUNCTION_NVX_EXT;
1704
case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR:
1705
return VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR_EXT;
1706
case VK_OBJECT_TYPE_VALIDATION_CACHE_EXT:
1707
return VK_DEBUG_REPORT_OBJECT_TYPE_VALIDATION_CACHE_EXT_EXT;
1708
case VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV:
1709
return VK_DEBUG_REPORT_OBJECT_TYPE_ACCELERATION_STRUCTURE_NV_EXT;
1710
default:
1711
break;
1712
}
1713
1714
return VK_DEBUG_REPORT_OBJECT_TYPE_UNKNOWN_EXT;
1715
}
1716
1717
void RenderingDeviceDriverVulkan::_set_object_name(VkObjectType p_object_type, uint64_t p_object_handle, String p_object_name) {
1718
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
1719
if (functions.SetDebugUtilsObjectNameEXT != nullptr) {
1720
CharString obj_data = p_object_name.utf8();
1721
VkDebugUtilsObjectNameInfoEXT name_info;
1722
name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT;
1723
name_info.pNext = nullptr;
1724
name_info.objectType = p_object_type;
1725
name_info.objectHandle = p_object_handle;
1726
name_info.pObjectName = obj_data.get_data();
1727
functions.SetDebugUtilsObjectNameEXT(vk_device, &name_info);
1728
} else if (functions.DebugMarkerSetObjectNameEXT != nullptr) {
1729
// Debug marker extensions.
1730
CharString obj_data = p_object_name.utf8();
1731
VkDebugMarkerObjectNameInfoEXT name_info;
1732
name_info.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_OBJECT_NAME_INFO_EXT;
1733
name_info.pNext = nullptr;
1734
name_info.objectType = _convert_to_debug_report_objectType(p_object_type);
1735
name_info.object = p_object_handle;
1736
name_info.pObjectName = obj_data.get_data();
1737
functions.DebugMarkerSetObjectNameEXT(vk_device, &name_info);
1738
}
1739
}
1740
1741
Error RenderingDeviceDriverVulkan::initialize(uint32_t p_device_index, uint32_t p_frame_count) {
1742
context_device = context_driver->device_get(p_device_index);
1743
physical_device = context_driver->physical_device_get(p_device_index);
1744
vkGetPhysicalDeviceProperties(physical_device, &physical_device_properties);
1745
1746
// Workaround a driver bug on Adreno 730 GPUs that keeps leaking memory on each call to vkResetDescriptorPool.
1747
// Which eventually run out of memory. In such case we should not be using linear allocated pools
1748
// Bug introduced in driver 512.597.0 and fixed in 512.671.0.
1749
// Confirmed by Qualcomm.
1750
if (linear_descriptor_pools_enabled) {
1751
const uint32_t reset_descriptor_pool_broken_driver_begin = VK_MAKE_VERSION(512u, 597u, 0u);
1752
const uint32_t reset_descriptor_pool_fixed_driver_begin = VK_MAKE_VERSION(512u, 671u, 0u);
1753
linear_descriptor_pools_enabled = physical_device_properties.driverVersion < reset_descriptor_pool_broken_driver_begin || physical_device_properties.driverVersion > reset_descriptor_pool_fixed_driver_begin;
1754
}
1755
1756
// Workaround a driver bug on Adreno 5XX GPUs that causes a crash when
1757
// there are empty descriptor set layouts placed between non-empty ones.
1758
adreno_5xx_empty_descriptor_set_layout_workaround =
1759
physical_device_properties.vendorID == RenderingContextDriver::Vendor::VENDOR_QUALCOMM &&
1760
physical_device_properties.deviceID >= 0x5000000 &&
1761
physical_device_properties.deviceID < 0x6000000;
1762
1763
frame_count = p_frame_count;
1764
1765
// Copy the queue family properties the context already retrieved.
1766
uint32_t queue_family_count = context_driver->queue_family_get_count(p_device_index);
1767
queue_family_properties.resize(queue_family_count);
1768
for (uint32_t i = 0; i < queue_family_count; i++) {
1769
queue_family_properties[i] = context_driver->queue_family_get(p_device_index, i);
1770
}
1771
1772
Error err = _initialize_device_extensions();
1773
ERR_FAIL_COND_V(err != OK, err);
1774
1775
err = _check_device_features();
1776
ERR_FAIL_COND_V(err != OK, err);
1777
1778
err = _check_device_capabilities();
1779
ERR_FAIL_COND_V(err != OK, err);
1780
1781
LocalVector<VkDeviceQueueCreateInfo> queue_create_info;
1782
err = _add_queue_create_info(queue_create_info);
1783
ERR_FAIL_COND_V(err != OK, err);
1784
1785
err = _initialize_device(queue_create_info);
1786
ERR_FAIL_COND_V(err != OK, err);
1787
1788
err = _initialize_allocator();
1789
ERR_FAIL_COND_V(err != OK, err);
1790
1791
err = _initialize_pipeline_cache();
1792
ERR_FAIL_COND_V(err != OK, err);
1793
1794
max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool");
1795
1796
#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
1797
breadcrumb_buffer = buffer_create(2u * sizeof(uint32_t) * BREADCRUMB_BUFFER_ENTRIES, BufferUsageBits::BUFFER_USAGE_TRANSFER_TO_BIT, MemoryAllocationType::MEMORY_ALLOCATION_TYPE_CPU, UINT64_MAX);
1798
#endif
1799
1800
#if defined(SWAPPY_FRAME_PACING_ENABLED)
1801
swappy_frame_pacer_enable = GLOBAL_GET("display/window/frame_pacing/android/enable_frame_pacing");
1802
swappy_mode = GLOBAL_GET("display/window/frame_pacing/android/swappy_mode");
1803
1804
if (VulkanHooks::get_singleton() != nullptr) {
1805
// Hooks control device creation & possibly presentation
1806
// (e.g. OpenXR) thus it's too risky to use Swappy.
1807
swappy_frame_pacer_enable = false;
1808
OS::get_singleton()->print("VulkanHooks detected (e.g. OpenXR): Force-disabling Swappy Frame Pacing.\n");
1809
}
1810
#endif
1811
1812
shader_container_format.set_debug_info_enabled(Engine::get_singleton()->is_generate_spirv_debug_info_enabled());
1813
1814
#if RECORD_PIPELINE_STATISTICS
1815
pipeline_statistics.file_access = FileAccess::open(RECORD_PIPELINE_STATISTICS_PATH, FileAccess::WRITE);
1816
ERR_FAIL_NULL_V_MSG(pipeline_statistics.file_access, ERR_CANT_CREATE, "Unable to write pipeline statistics file.");
1817
1818
pipeline_statistics.file_access->store_csv_line({ "name", "hash", "stage", "spec", "glslang", "re-spirv", "time" });
1819
pipeline_statistics.file_access->flush();
1820
#endif
1821
1822
return OK;
1823
}
1824
1825
/****************/
1826
/**** MEMORY ****/
1827
/****************/
1828
1829
static const uint32_t SMALL_ALLOCATION_MAX_SIZE = 4096;
1830
1831
VmaPool RenderingDeviceDriverVulkan::_find_or_create_small_allocs_pool(uint32_t p_mem_type_index) {
1832
if (small_allocs_pools.has(p_mem_type_index)) {
1833
return small_allocs_pools[p_mem_type_index];
1834
}
1835
1836
print_verbose("Creating VMA small objects pool for memory type index " + itos(p_mem_type_index));
1837
1838
VmaPoolCreateInfo pci = {};
1839
pci.memoryTypeIndex = p_mem_type_index;
1840
pci.flags = 0;
1841
pci.blockSize = 0;
1842
pci.minBlockCount = 0;
1843
pci.maxBlockCount = SIZE_MAX;
1844
pci.priority = 0.5f;
1845
pci.minAllocationAlignment = 0;
1846
pci.pMemoryAllocateNext = nullptr;
1847
VmaPool pool = VK_NULL_HANDLE;
1848
VkResult res = vmaCreatePool(allocator, &pci, &pool);
1849
small_allocs_pools[p_mem_type_index] = pool; // Don't try to create it again if failed the first time.
1850
ERR_FAIL_COND_V_MSG(res, pool, "vmaCreatePool failed with error " + itos(res) + ".");
1851
1852
return pool;
1853
}
1854
1855
/*****************/
1856
/**** BUFFERS ****/
1857
/*****************/
1858
1859
// RDD::BufferUsageBits == VkBufferUsageFlagBits.
1860
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_TRANSFER_FROM_BIT, VK_BUFFER_USAGE_TRANSFER_SRC_BIT));
1861
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_TRANSFER_TO_BIT, VK_BUFFER_USAGE_TRANSFER_DST_BIT));
1862
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_TEXEL_BIT, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT));
1863
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_UNIFORM_BIT, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
1864
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_STORAGE_BIT, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT));
1865
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_INDEX_BIT, VK_BUFFER_USAGE_INDEX_BUFFER_BIT));
1866
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_VERTEX_BIT, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT));
1867
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_INDIRECT_BIT, VK_BUFFER_USAGE_INDIRECT_BUFFER_BIT));
1868
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT, VK_BUFFER_USAGE_SHADER_DEVICE_ADDRESS_BIT));
1869
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_BUILD_INPUT_READ_ONLY_BIT_KHR));
1870
static_assert(ENUM_MEMBERS_EQUAL(RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT, VK_BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT_KHR));
1871
1872
RDD::BufferID RenderingDeviceDriverVulkan::buffer_create(uint64_t p_size, BitField<BufferUsageBits> p_usage, MemoryAllocationType p_allocation_type, uint64_t p_frames_drawn) {
1873
uint32_t alignment = 16u; // 16 bytes is reasonable.
1874
if (p_usage.has_flag(BUFFER_USAGE_UNIFORM_BIT)) {
1875
// Some GPUs (e.g. NVIDIA) have absurdly high alignments, like 256 bytes.
1876
alignment = MAX(alignment, physical_device_properties.limits.minUniformBufferOffsetAlignment);
1877
}
1878
if (p_usage.has_flag(BUFFER_USAGE_STORAGE_BIT)) {
1879
// This shouldn't be a problem since it's often <= 16 bytes. But do it just in case.
1880
alignment = MAX(alignment, physical_device_properties.limits.minStorageBufferOffsetAlignment);
1881
}
1882
// Align the size. This is specially important for BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT buffers.
1883
// For the rest, it should work thanks to VMA taking care of the details. But still align just in case.
1884
p_size = STEPIFY(p_size, alignment);
1885
1886
const size_t original_size = p_size;
1887
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
1888
p_size = p_size * frame_count;
1889
}
1890
VkBufferCreateInfo create_info = {};
1891
create_info.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO;
1892
create_info.size = p_size;
1893
create_info.usage = p_usage & ~BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT;
1894
create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
1895
1896
VmaMemoryUsage vma_usage = VMA_MEMORY_USAGE_UNKNOWN;
1897
uint32_t vma_flags_to_remove = 0;
1898
1899
VmaAllocationCreateInfo alloc_create_info = {};
1900
switch (p_allocation_type) {
1901
case MEMORY_ALLOCATION_TYPE_CPU: {
1902
bool is_src = p_usage.has_flag(BUFFER_USAGE_TRANSFER_FROM_BIT);
1903
bool is_dst = p_usage.has_flag(BUFFER_USAGE_TRANSFER_TO_BIT);
1904
if (is_src && !is_dst) {
1905
// Looks like a staging buffer: CPU maps, writes sequentially, then GPU copies to VRAM.
1906
alloc_create_info.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
1907
alloc_create_info.preferredFlags |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
1908
vma_flags_to_remove |= VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
1909
}
1910
if (is_dst && !is_src) {
1911
// Looks like a readback buffer: GPU copies from VRAM, then CPU maps and reads.
1912
alloc_create_info.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT;
1913
alloc_create_info.preferredFlags |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1914
vma_flags_to_remove |= VK_MEMORY_PROPERTY_HOST_CACHED_BIT;
1915
}
1916
vma_usage = VMA_MEMORY_USAGE_AUTO_PREFER_HOST;
1917
alloc_create_info.requiredFlags = (VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT);
1918
} break;
1919
case MEMORY_ALLOCATION_TYPE_GPU: {
1920
vma_usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
1921
if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
1922
// We must set it right now or else vmaFindMemoryTypeIndexForBufferInfo will use wrong parameters.
1923
alloc_create_info.usage = vma_usage;
1924
}
1925
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
1926
alloc_create_info.flags = VMA_ALLOCATION_CREATE_HOST_ACCESS_SEQUENTIAL_WRITE_BIT;
1927
}
1928
alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
1929
if (p_size <= SMALL_ALLOCATION_MAX_SIZE) {
1930
uint32_t mem_type_index = 0;
1931
vmaFindMemoryTypeIndexForBufferInfo(allocator, &create_info, &alloc_create_info, &mem_type_index);
1932
alloc_create_info.pool = _find_or_create_small_allocs_pool(mem_type_index);
1933
}
1934
} break;
1935
}
1936
1937
VkBuffer vk_buffer = VK_NULL_HANDLE;
1938
VmaAllocation allocation = nullptr;
1939
VmaAllocationInfo alloc_info = {};
1940
1941
if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
1942
alloc_create_info.preferredFlags &= ~vma_flags_to_remove;
1943
alloc_create_info.usage = vma_usage;
1944
VkResult err = vmaCreateBuffer(allocator, &create_info, &alloc_create_info, &vk_buffer, &allocation, &alloc_info);
1945
ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't create buffer of size: " + itos(p_size) + ", error " + itos(err) + ".");
1946
} else {
1947
VkResult err = vkCreateBuffer(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER), &vk_buffer);
1948
ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't create buffer of size: " + itos(p_size) + ", error " + itos(err) + ".");
1949
err = vmaAllocateMemoryForBuffer(allocator, vk_buffer, &alloc_create_info, &allocation, &alloc_info);
1950
ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't allocate memory for buffer of size: " + itos(p_size) + ", error " + itos(err) + ".");
1951
err = vmaBindBufferMemory2(allocator, allocation, 0, vk_buffer, nullptr);
1952
ERR_FAIL_COND_V_MSG(err, BufferID(), "Can't bind memory to buffer of size: " + itos(p_size) + ", error " + itos(err) + ".");
1953
}
1954
1955
// Bookkeep.
1956
BufferInfo *buf_info;
1957
if (p_usage.has_flag(BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT)) {
1958
void *persistent_ptr = nullptr;
1959
VkResult err = vmaMapMemory(allocator, allocation, &persistent_ptr);
1960
ERR_FAIL_COND_V_MSG(err, BufferID(), "vmaMapMemory failed with error " + itos(err) + ".");
1961
1962
BufferDynamicInfo *dyn_buffer = VersatileResource::allocate<BufferDynamicInfo>(resources_allocator);
1963
buf_info = dyn_buffer;
1964
#ifdef DEBUG_ENABLED
1965
dyn_buffer->last_frame_mapped = p_frames_drawn - 1ul;
1966
#endif
1967
dyn_buffer->frame_idx = 0u;
1968
dyn_buffer->persistent_ptr = (uint8_t *)persistent_ptr;
1969
} else {
1970
buf_info = VersatileResource::allocate<BufferInfo>(resources_allocator);
1971
}
1972
buf_info->vk_buffer = vk_buffer;
1973
buf_info->allocation.handle = allocation;
1974
buf_info->allocation.size = alloc_info.size;
1975
buf_info->size = original_size;
1976
1977
return BufferID(buf_info);
1978
}
1979
1980
bool RenderingDeviceDriverVulkan::buffer_set_texel_format(BufferID p_buffer, DataFormat p_format) {
1981
BufferInfo *buf_info = (BufferInfo *)p_buffer.id;
1982
1983
DEV_ASSERT(!buf_info->vk_view);
1984
1985
VkBufferViewCreateInfo view_create_info = {};
1986
view_create_info.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO;
1987
view_create_info.buffer = buf_info->vk_buffer;
1988
view_create_info.format = RD_TO_VK_FORMAT[p_format];
1989
view_create_info.range = buf_info->allocation.size;
1990
1991
VkResult res = vkCreateBufferView(vk_device, &view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER_VIEW), &buf_info->vk_view);
1992
ERR_FAIL_COND_V_MSG(res, false, "Unable to create buffer view, error " + itos(res) + ".");
1993
1994
return true;
1995
}
1996
1997
void RenderingDeviceDriverVulkan::buffer_free(BufferID p_buffer) {
1998
BufferInfo *buf_info = (BufferInfo *)p_buffer.id;
1999
if (buf_info->vk_view) {
2000
vkDestroyBufferView(vk_device, buf_info->vk_view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER_VIEW));
2001
}
2002
2003
if (buf_info->is_dynamic()) {
2004
vmaUnmapMemory(allocator, buf_info->allocation.handle);
2005
}
2006
2007
if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
2008
vmaDestroyBuffer(allocator, buf_info->vk_buffer, buf_info->allocation.handle);
2009
} else {
2010
vkDestroyBuffer(vk_device, buf_info->vk_buffer, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER));
2011
vmaFreeMemory(allocator, buf_info->allocation.handle);
2012
}
2013
2014
if (buf_info->is_dynamic()) {
2015
VersatileResource::free(resources_allocator, (BufferDynamicInfo *)buf_info);
2016
} else {
2017
VersatileResource::free(resources_allocator, buf_info);
2018
}
2019
}
2020
2021
uint64_t RenderingDeviceDriverVulkan::buffer_get_allocation_size(BufferID p_buffer) {
2022
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
2023
return buf_info->allocation.size;
2024
}
2025
2026
uint8_t *RenderingDeviceDriverVulkan::buffer_map(BufferID p_buffer) {
2027
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
2028
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), nullptr, "Buffer must NOT have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_persistent_map_advance() instead.");
2029
void *data_ptr = nullptr;
2030
VkResult err = vmaMapMemory(allocator, buf_info->allocation.handle, &data_ptr);
2031
ERR_FAIL_COND_V_MSG(err, nullptr, "vmaMapMemory failed with error " + itos(err) + ".");
2032
return (uint8_t *)data_ptr;
2033
}
2034
2035
void RenderingDeviceDriverVulkan::buffer_unmap(BufferID p_buffer) {
2036
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
2037
vmaUnmapMemory(allocator, buf_info->allocation.handle);
2038
}
2039
2040
uint8_t *RenderingDeviceDriverVulkan::buffer_persistent_map_advance(BufferID p_buffer, uint64_t p_frames_drawn) {
2041
BufferDynamicInfo *buf_info = (BufferDynamicInfo *)p_buffer.id;
2042
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), nullptr, "Buffer must have BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT. Use buffer_map() instead.");
2043
#ifdef DEBUG_ENABLED
2044
ERR_FAIL_COND_V_MSG(buf_info->last_frame_mapped == p_frames_drawn, nullptr, "Buffers with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT must only be mapped once per frame. Otherwise there could be race conditions with the GPU. Amalgamate all data uploading into one map(), use an extra buffer or remove the bit.");
2045
buf_info->last_frame_mapped = p_frames_drawn;
2046
#endif
2047
buf_info->frame_idx = (buf_info->frame_idx + 1u) % frame_count;
2048
return buf_info->persistent_ptr + buf_info->frame_idx * buf_info->size;
2049
}
2050
2051
uint64_t RenderingDeviceDriverVulkan::buffer_get_dynamic_offsets(Span<BufferID> p_buffers) {
2052
uint64_t mask = 0u;
2053
uint64_t shift = 0u;
2054
2055
for (const BufferID &buf : p_buffers) {
2056
const BufferInfo *buf_info = (const BufferInfo *)buf.id;
2057
if (!buf_info->is_dynamic()) {
2058
continue;
2059
}
2060
mask |= buf_info->frame_idx << shift;
2061
// We can encode the frame index in 2 bits since frame_count won't be > 4.
2062
shift += 2UL;
2063
}
2064
2065
return mask;
2066
}
2067
2068
void RenderingDeviceDriverVulkan::buffer_flush(BufferID p_buffer) {
2069
BufferDynamicInfo *buf_info = (BufferDynamicInfo *)p_buffer.id;
2070
2071
VkMemoryPropertyFlags mem_props_flags;
2072
vmaGetAllocationMemoryProperties(allocator, buf_info->allocation.handle, &mem_props_flags);
2073
2074
const bool needs_flushing = !(mem_props_flags & VK_MEMORY_PROPERTY_HOST_COHERENT_BIT);
2075
2076
if (needs_flushing) {
2077
if (buf_info->is_dynamic()) {
2078
pending_flushes.allocations.push_back(buf_info->allocation.handle);
2079
pending_flushes.offsets.push_back(buf_info->frame_idx * buf_info->size);
2080
pending_flushes.sizes.push_back(buf_info->size);
2081
} else {
2082
pending_flushes.allocations.push_back(buf_info->allocation.handle);
2083
pending_flushes.offsets.push_back(0u);
2084
pending_flushes.sizes.push_back(VK_WHOLE_SIZE);
2085
}
2086
}
2087
}
2088
2089
uint64_t RenderingDeviceDriverVulkan::buffer_get_device_address(BufferID p_buffer) {
2090
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
2091
VkBufferDeviceAddressInfo address_info = {};
2092
address_info.sType = VK_STRUCTURE_TYPE_BUFFER_DEVICE_ADDRESS_INFO;
2093
address_info.pNext = nullptr;
2094
address_info.buffer = buf_info->vk_buffer;
2095
return vkGetBufferDeviceAddress(vk_device, &address_info);
2096
}
2097
2098
/*****************/
2099
/**** TEXTURE ****/
2100
/*****************/
2101
2102
static const VkImageType RD_TEX_TYPE_TO_VK_IMG_TYPE[RDD::TEXTURE_TYPE_MAX] = {
2103
VK_IMAGE_TYPE_1D,
2104
VK_IMAGE_TYPE_2D,
2105
VK_IMAGE_TYPE_3D,
2106
VK_IMAGE_TYPE_2D,
2107
VK_IMAGE_TYPE_1D,
2108
VK_IMAGE_TYPE_2D,
2109
VK_IMAGE_TYPE_2D,
2110
};
2111
2112
static const VkSampleCountFlagBits RD_TO_VK_SAMPLE_COUNT[RDD::TEXTURE_SAMPLES_MAX] = {
2113
VK_SAMPLE_COUNT_1_BIT,
2114
VK_SAMPLE_COUNT_2_BIT,
2115
VK_SAMPLE_COUNT_4_BIT,
2116
VK_SAMPLE_COUNT_8_BIT,
2117
VK_SAMPLE_COUNT_16_BIT,
2118
VK_SAMPLE_COUNT_32_BIT,
2119
VK_SAMPLE_COUNT_64_BIT,
2120
};
2121
2122
// RDD::TextureType == VkImageViewType.
2123
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_TYPE_1D, VK_IMAGE_VIEW_TYPE_1D));
2124
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_TYPE_2D, VK_IMAGE_VIEW_TYPE_2D));
2125
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_TYPE_3D, VK_IMAGE_VIEW_TYPE_3D));
2126
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_TYPE_CUBE, VK_IMAGE_VIEW_TYPE_CUBE));
2127
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_TYPE_1D_ARRAY, VK_IMAGE_VIEW_TYPE_1D_ARRAY));
2128
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_TYPE_2D_ARRAY, VK_IMAGE_VIEW_TYPE_2D_ARRAY));
2129
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_TYPE_CUBE_ARRAY, VK_IMAGE_VIEW_TYPE_CUBE_ARRAY));
2130
2131
// RDD::TextureSwizzle == VkComponentSwizzle.
2132
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY));
2133
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_ZERO, VK_COMPONENT_SWIZZLE_ZERO));
2134
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_ONE, VK_COMPONENT_SWIZZLE_ONE));
2135
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_R, VK_COMPONENT_SWIZZLE_R));
2136
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_G, VK_COMPONENT_SWIZZLE_G));
2137
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_B, VK_COMPONENT_SWIZZLE_B));
2138
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_SWIZZLE_A, VK_COMPONENT_SWIZZLE_A));
2139
2140
// RDD::TextureAspectBits == VkImageAspectFlagBits.
2141
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_COLOR_BIT, VK_IMAGE_ASPECT_COLOR_BIT));
2142
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_DEPTH_BIT, VK_IMAGE_ASPECT_DEPTH_BIT));
2143
static_assert(ENUM_MEMBERS_EQUAL(RDD::TEXTURE_ASPECT_STENCIL_BIT, VK_IMAGE_ASPECT_STENCIL_BIT));
2144
2145
VkSampleCountFlagBits RenderingDeviceDriverVulkan::_ensure_supported_sample_count(TextureSamples p_requested_sample_count) {
2146
VkSampleCountFlags sample_count_flags = (physical_device_properties.limits.framebufferColorSampleCounts & physical_device_properties.limits.framebufferDepthSampleCounts);
2147
2148
if ((sample_count_flags & RD_TO_VK_SAMPLE_COUNT[p_requested_sample_count])) {
2149
// The requested sample count is supported.
2150
return RD_TO_VK_SAMPLE_COUNT[p_requested_sample_count];
2151
} else {
2152
// Find the closest lower supported sample count.
2153
VkSampleCountFlagBits sample_count = RD_TO_VK_SAMPLE_COUNT[p_requested_sample_count];
2154
while (sample_count > VK_SAMPLE_COUNT_1_BIT) {
2155
if (sample_count_flags & sample_count) {
2156
return sample_count;
2157
}
2158
sample_count = (VkSampleCountFlagBits)(sample_count >> 1);
2159
}
2160
}
2161
return VK_SAMPLE_COUNT_1_BIT;
2162
}
2163
2164
RDD::TextureID RenderingDeviceDriverVulkan::texture_create(const TextureFormat &p_format, const TextureView &p_view) {
2165
VkImageCreateInfo create_info = {};
2166
create_info.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
2167
2168
if (p_format.shareable_formats.size()) {
2169
create_info.flags |= VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT;
2170
2171
if (enabled_device_extension_names.has(VK_KHR_IMAGE_FORMAT_LIST_EXTENSION_NAME)) {
2172
VkFormat *vk_allowed_formats = ALLOCA_ARRAY(VkFormat, p_format.shareable_formats.size());
2173
for (int i = 0; i < p_format.shareable_formats.size(); i++) {
2174
vk_allowed_formats[i] = RD_TO_VK_FORMAT[p_format.shareable_formats[i]];
2175
}
2176
2177
VkImageFormatListCreateInfoKHR *format_list_create_info = ALLOCA_SINGLE(VkImageFormatListCreateInfoKHR);
2178
*format_list_create_info = {};
2179
format_list_create_info->sType = VK_STRUCTURE_TYPE_IMAGE_FORMAT_LIST_CREATE_INFO_KHR;
2180
format_list_create_info->viewFormatCount = p_format.shareable_formats.size();
2181
format_list_create_info->pViewFormats = vk_allowed_formats;
2182
2183
create_info.pNext = format_list_create_info;
2184
}
2185
}
2186
2187
if (p_format.texture_type == TEXTURE_TYPE_CUBE || p_format.texture_type == TEXTURE_TYPE_CUBE_ARRAY) {
2188
create_info.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
2189
}
2190
/*if (p_format.texture_type == TEXTURE_TYPE_2D || p_format.texture_type == TEXTURE_TYPE_2D_ARRAY) {
2191
create_info.flags |= VK_IMAGE_CREATE_2D_ARRAY_COMPATIBLE_BIT;
2192
}*/
2193
2194
if (fdm_capabilities.offset_supported && (p_format.usage_bits & (TEXTURE_USAGE_COLOR_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_INPUT_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_RESOLVE_ATTACHMENT_BIT | TEXTURE_USAGE_VRS_ATTACHMENT_BIT))) {
2195
create_info.flags |= VK_IMAGE_CREATE_FRAGMENT_DENSITY_MAP_OFFSET_BIT_QCOM;
2196
}
2197
2198
create_info.imageType = RD_TEX_TYPE_TO_VK_IMG_TYPE[p_format.texture_type];
2199
2200
create_info.format = RD_TO_VK_FORMAT[p_format.format];
2201
2202
create_info.extent.width = p_format.width;
2203
create_info.extent.height = p_format.height;
2204
create_info.extent.depth = p_format.depth;
2205
2206
create_info.mipLevels = p_format.mipmaps;
2207
create_info.arrayLayers = p_format.array_layers;
2208
2209
create_info.samples = _ensure_supported_sample_count(p_format.samples);
2210
create_info.tiling = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? VK_IMAGE_TILING_LINEAR : VK_IMAGE_TILING_OPTIMAL;
2211
2212
// Usage.
2213
if ((p_format.usage_bits & TEXTURE_USAGE_SAMPLING_BIT)) {
2214
create_info.usage |= VK_IMAGE_USAGE_SAMPLED_BIT;
2215
}
2216
if ((p_format.usage_bits & TEXTURE_USAGE_STORAGE_BIT)) {
2217
create_info.usage |= VK_IMAGE_USAGE_STORAGE_BIT;
2218
}
2219
if ((p_format.usage_bits & TEXTURE_USAGE_COLOR_ATTACHMENT_BIT)) {
2220
create_info.usage |= VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
2221
}
2222
if ((p_format.usage_bits & (TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_RESOLVE_ATTACHMENT_BIT))) {
2223
create_info.usage |= VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT;
2224
}
2225
if ((p_format.usage_bits & TEXTURE_USAGE_INPUT_ATTACHMENT_BIT)) {
2226
create_info.usage |= VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT;
2227
}
2228
if ((p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && (p_format.usage_bits & TEXTURE_USAGE_VRS_FRAGMENT_SHADING_RATE_BIT)) {
2229
create_info.usage |= VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR;
2230
}
2231
if ((p_format.usage_bits & TEXTURE_USAGE_VRS_ATTACHMENT_BIT) && (p_format.usage_bits & TEXTURE_USAGE_VRS_FRAGMENT_DENSITY_MAP_BIT)) {
2232
create_info.usage |= VK_IMAGE_USAGE_FRAGMENT_DENSITY_MAP_BIT_EXT;
2233
}
2234
if ((p_format.usage_bits & TEXTURE_USAGE_CAN_UPDATE_BIT)) {
2235
create_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
2236
}
2237
if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_FROM_BIT)) {
2238
create_info.usage |= VK_IMAGE_USAGE_TRANSFER_SRC_BIT;
2239
}
2240
if ((p_format.usage_bits & TEXTURE_USAGE_CAN_COPY_TO_BIT)) {
2241
create_info.usage |= VK_IMAGE_USAGE_TRANSFER_DST_BIT;
2242
}
2243
2244
create_info.sharingMode = VK_SHARING_MODE_EXCLUSIVE;
2245
create_info.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
2246
2247
// Allocate memory.
2248
2249
uint32_t width = 0, height = 0;
2250
uint32_t image_size = get_image_format_required_size(p_format.format, p_format.width, p_format.height, p_format.depth, p_format.mipmaps, &width, &height);
2251
2252
VmaAllocationCreateInfo alloc_create_info = {};
2253
alloc_create_info.flags = (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) ? VMA_ALLOCATION_CREATE_HOST_ACCESS_RANDOM_BIT : 0;
2254
2255
if (p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) {
2256
uint32_t memory_type_index = 0;
2257
VmaAllocationCreateInfo lazy_memory_requirements = alloc_create_info;
2258
lazy_memory_requirements.usage = VMA_MEMORY_USAGE_GPU_LAZILY_ALLOCATED;
2259
VkResult result = vmaFindMemoryTypeIndex(allocator, UINT32_MAX, &lazy_memory_requirements, &memory_type_index);
2260
if (VK_SUCCESS == result) {
2261
alloc_create_info = lazy_memory_requirements;
2262
create_info.usage |= VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT;
2263
// VUID-VkImageCreateInfo-usage-00963 :
2264
// If usage includes VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT,
2265
// then bits other than VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
2266
// and VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT must not be set.
2267
create_info.usage &= (VK_IMAGE_USAGE_TRANSIENT_ATTACHMENT_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT);
2268
} else {
2269
alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
2270
}
2271
} else if (p_format.usage_bits & TEXTURE_USAGE_CPU_READ_BIT) {
2272
alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT;
2273
} else {
2274
alloc_create_info.preferredFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT;
2275
}
2276
2277
if (image_size <= SMALL_ALLOCATION_MAX_SIZE) {
2278
uint32_t mem_type_index = 0;
2279
vmaFindMemoryTypeIndexForImageInfo(allocator, &create_info, &alloc_create_info, &mem_type_index);
2280
alloc_create_info.pool = _find_or_create_small_allocs_pool(mem_type_index);
2281
}
2282
2283
// Create.
2284
2285
VkImage vk_image = VK_NULL_HANDLE;
2286
VmaAllocation allocation = nullptr;
2287
VmaAllocationInfo alloc_info = {};
2288
2289
if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
2290
alloc_create_info.usage = VMA_MEMORY_USAGE_AUTO_PREFER_DEVICE;
2291
VkResult err = vmaCreateImage(allocator, &create_info, &alloc_create_info, &vk_image, &allocation, &alloc_info);
2292
ERR_FAIL_COND_V_MSG(err, TextureID(), "vmaCreateImage failed with error " + itos(err) + ".");
2293
} else {
2294
VkResult err = vkCreateImage(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE), &vk_image);
2295
ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImage failed with error " + itos(err) + ".");
2296
err = vmaAllocateMemoryForImage(allocator, vk_image, &alloc_create_info, &allocation, &alloc_info);
2297
ERR_FAIL_COND_V_MSG(err, TextureID(), "Can't allocate memory for image, error: " + itos(err) + ".");
2298
err = vmaBindImageMemory2(allocator, allocation, 0, vk_image, nullptr);
2299
ERR_FAIL_COND_V_MSG(err, TextureID(), "Can't bind memory to image, error: " + itos(err) + ".");
2300
}
2301
2302
// Create view.
2303
2304
VkImageViewCreateInfo image_view_create_info = {};
2305
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
2306
image_view_create_info.image = vk_image;
2307
image_view_create_info.viewType = (VkImageViewType)p_format.texture_type;
2308
image_view_create_info.format = RD_TO_VK_FORMAT[p_view.format];
2309
image_view_create_info.components.r = (VkComponentSwizzle)p_view.swizzle_r;
2310
image_view_create_info.components.g = (VkComponentSwizzle)p_view.swizzle_g;
2311
image_view_create_info.components.b = (VkComponentSwizzle)p_view.swizzle_b;
2312
image_view_create_info.components.a = (VkComponentSwizzle)p_view.swizzle_a;
2313
image_view_create_info.subresourceRange.levelCount = create_info.mipLevels;
2314
image_view_create_info.subresourceRange.layerCount = create_info.arrayLayers;
2315
if ((p_format.usage_bits & (TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | TEXTURE_USAGE_DEPTH_RESOLVE_ATTACHMENT_BIT))) {
2316
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT;
2317
} else {
2318
image_view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
2319
}
2320
2321
VkImageViewASTCDecodeModeEXT decode_mode;
2322
if (enabled_device_extension_names.has(VK_EXT_ASTC_DECODE_MODE_EXTENSION_NAME)) {
2323
if (image_view_create_info.format >= VK_FORMAT_ASTC_4x4_UNORM_BLOCK && image_view_create_info.format <= VK_FORMAT_ASTC_12x12_SRGB_BLOCK) {
2324
decode_mode.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_ASTC_DECODE_MODE_EXT;
2325
decode_mode.pNext = nullptr;
2326
decode_mode.decodeMode = VK_FORMAT_R8G8B8A8_UNORM;
2327
image_view_create_info.pNext = &decode_mode;
2328
}
2329
}
2330
2331
VkImageView vk_image_view = VK_NULL_HANDLE;
2332
VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &vk_image_view);
2333
if (err) {
2334
if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
2335
vmaDestroyImage(allocator, vk_image, allocation);
2336
} else {
2337
vkDestroyImage(vk_device, vk_image, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE));
2338
vmaFreeMemory(allocator, allocation);
2339
}
2340
2341
ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + ".");
2342
}
2343
2344
// Bookkeep.
2345
2346
TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);
2347
tex_info->vk_image = vk_image;
2348
tex_info->vk_view = vk_image_view;
2349
tex_info->rd_format = p_format.format;
2350
tex_info->vk_create_info = create_info;
2351
tex_info->vk_view_create_info = image_view_create_info;
2352
tex_info->allocation.handle = allocation;
2353
#ifdef DEBUG_ENABLED
2354
tex_info->transient = (p_format.usage_bits & TEXTURE_USAGE_TRANSIENT_BIT) != 0;
2355
#endif
2356
vmaGetAllocationInfo(allocator, tex_info->allocation.handle, &tex_info->allocation.info);
2357
2358
#if PRINT_NATIVE_COMMANDS
2359
print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(vk_image_view), uint64_t(vk_image)));
2360
#endif
2361
2362
return TextureID(tex_info);
2363
}
2364
2365
RDD::TextureID RenderingDeviceDriverVulkan::texture_create_from_extension(uint64_t p_native_texture, TextureType p_type, DataFormat p_format, uint32_t p_array_layers, bool p_depth_stencil, uint32_t p_mipmaps) {
2366
VkImage vk_image = (VkImage)p_native_texture;
2367
2368
// We only need to create a view into the already existing natively-provided texture.
2369
2370
VkImageViewCreateInfo image_view_create_info = {};
2371
image_view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
2372
image_view_create_info.image = vk_image;
2373
image_view_create_info.viewType = (VkImageViewType)p_type;
2374
image_view_create_info.format = RD_TO_VK_FORMAT[p_format];
2375
image_view_create_info.components.r = VK_COMPONENT_SWIZZLE_R;
2376
image_view_create_info.components.g = VK_COMPONENT_SWIZZLE_G;
2377
image_view_create_info.components.b = VK_COMPONENT_SWIZZLE_B;
2378
image_view_create_info.components.a = VK_COMPONENT_SWIZZLE_A;
2379
image_view_create_info.subresourceRange.baseMipLevel = 0;
2380
image_view_create_info.subresourceRange.levelCount = p_mipmaps;
2381
image_view_create_info.subresourceRange.layerCount = p_array_layers;
2382
image_view_create_info.subresourceRange.aspectMask = p_depth_stencil ? VK_IMAGE_ASPECT_DEPTH_BIT : VK_IMAGE_ASPECT_COLOR_BIT;
2383
2384
VkImageView vk_image_view = VK_NULL_HANDLE;
2385
VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &vk_image_view);
2386
if (err) {
2387
ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + ".");
2388
}
2389
2390
// Bookkeep.
2391
2392
TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);
2393
tex_info->vk_view = vk_image_view;
2394
tex_info->rd_format = p_format;
2395
tex_info->vk_view_create_info = image_view_create_info;
2396
#ifdef DEBUG_ENABLED
2397
tex_info->created_from_extension = true;
2398
#endif
2399
return TextureID(tex_info);
2400
}
2401
2402
RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared(TextureID p_original_texture, const TextureView &p_view) {
2403
const TextureInfo *owner_tex_info = (const TextureInfo *)p_original_texture.id;
2404
#ifdef DEBUG_ENABLED
2405
ERR_FAIL_COND_V(!owner_tex_info->allocation.handle && !owner_tex_info->created_from_extension, TextureID());
2406
#endif
2407
VkImageViewCreateInfo image_view_create_info = owner_tex_info->vk_view_create_info;
2408
image_view_create_info.format = RD_TO_VK_FORMAT[p_view.format];
2409
image_view_create_info.components.r = (VkComponentSwizzle)p_view.swizzle_r;
2410
image_view_create_info.components.g = (VkComponentSwizzle)p_view.swizzle_g;
2411
image_view_create_info.components.b = (VkComponentSwizzle)p_view.swizzle_b;
2412
image_view_create_info.components.a = (VkComponentSwizzle)p_view.swizzle_a;
2413
2414
if (enabled_device_extension_names.has(VK_KHR_MAINTENANCE_2_EXTENSION_NAME)) {
2415
// May need to make VK_KHR_maintenance2 mandatory and thus has Vulkan 1.1 be our minimum supported version
2416
// if we require setting this information. Vulkan 1.0 may simply not care.
2417
if (image_view_create_info.format != owner_tex_info->vk_view_create_info.format) {
2418
VkImageViewUsageCreateInfo *usage_info = ALLOCA_SINGLE(VkImageViewUsageCreateInfo);
2419
*usage_info = {};
2420
usage_info->sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_USAGE_CREATE_INFO;
2421
usage_info->usage = owner_tex_info->vk_create_info.usage;
2422
2423
// Certain features may not be available for the format of the view.
2424
{
2425
VkFormatProperties properties = {};
2426
vkGetPhysicalDeviceFormatProperties(physical_device, RD_TO_VK_FORMAT[p_view.format], &properties);
2427
const VkFormatFeatureFlags &supported_flags = owner_tex_info->vk_create_info.tiling == VK_IMAGE_TILING_LINEAR ? properties.linearTilingFeatures : properties.optimalTilingFeatures;
2428
if ((usage_info->usage & VK_IMAGE_USAGE_STORAGE_BIT) && !(supported_flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
2429
usage_info->usage &= ~uint32_t(VK_IMAGE_USAGE_STORAGE_BIT);
2430
}
2431
if ((usage_info->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) && !(supported_flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
2432
usage_info->usage &= ~uint32_t(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT);
2433
}
2434
}
2435
2436
image_view_create_info.pNext = usage_info;
2437
}
2438
}
2439
2440
VkImageView new_vk_image_view = VK_NULL_HANDLE;
2441
VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &new_vk_image_view);
2442
ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + ".");
2443
2444
// Bookkeep.
2445
2446
TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);
2447
*tex_info = *owner_tex_info;
2448
tex_info->vk_view = new_vk_image_view;
2449
tex_info->vk_view_create_info = image_view_create_info;
2450
tex_info->allocation = {};
2451
2452
#if PRINT_NATIVE_COMMANDS
2453
print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image)));
2454
#endif
2455
2456
return TextureID(tex_info);
2457
}
2458
2459
RDD::TextureID RenderingDeviceDriverVulkan::texture_create_shared_from_slice(TextureID p_original_texture, const TextureView &p_view, TextureSliceType p_slice_type, uint32_t p_layer, uint32_t p_layers, uint32_t p_mipmap, uint32_t p_mipmaps) {
2460
const TextureInfo *owner_tex_info = (const TextureInfo *)p_original_texture.id;
2461
#ifdef DEBUG_ENABLED
2462
ERR_FAIL_COND_V(!owner_tex_info->allocation.handle && !owner_tex_info->created_from_extension, TextureID());
2463
#endif
2464
2465
VkImageViewCreateInfo image_view_create_info = owner_tex_info->vk_view_create_info;
2466
switch (p_slice_type) {
2467
case TEXTURE_SLICE_2D: {
2468
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
2469
} break;
2470
case TEXTURE_SLICE_3D: {
2471
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_3D;
2472
} break;
2473
case TEXTURE_SLICE_CUBEMAP: {
2474
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_CUBE;
2475
} break;
2476
case TEXTURE_SLICE_2D_ARRAY: {
2477
image_view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D_ARRAY;
2478
} break;
2479
default: {
2480
return TextureID(nullptr);
2481
}
2482
}
2483
image_view_create_info.format = RD_TO_VK_FORMAT[p_view.format];
2484
image_view_create_info.components.r = (VkComponentSwizzle)p_view.swizzle_r;
2485
image_view_create_info.components.g = (VkComponentSwizzle)p_view.swizzle_g;
2486
image_view_create_info.components.b = (VkComponentSwizzle)p_view.swizzle_b;
2487
image_view_create_info.components.a = (VkComponentSwizzle)p_view.swizzle_a;
2488
image_view_create_info.subresourceRange.baseMipLevel = p_mipmap;
2489
image_view_create_info.subresourceRange.levelCount = p_mipmaps;
2490
image_view_create_info.subresourceRange.baseArrayLayer = p_layer;
2491
image_view_create_info.subresourceRange.layerCount = p_layers;
2492
2493
VkImageView new_vk_image_view = VK_NULL_HANDLE;
2494
VkResult err = vkCreateImageView(vk_device, &image_view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &new_vk_image_view);
2495
ERR_FAIL_COND_V_MSG(err, TextureID(), "vkCreateImageView failed with error " + itos(err) + ".");
2496
2497
// Bookkeep.
2498
2499
TextureInfo *tex_info = VersatileResource::allocate<TextureInfo>(resources_allocator);
2500
*tex_info = *owner_tex_info;
2501
tex_info->vk_view = new_vk_image_view;
2502
tex_info->vk_view_create_info = image_view_create_info;
2503
tex_info->allocation = {};
2504
2505
#if PRINT_NATIVE_COMMANDS
2506
print_line(vformat("vkCreateImageView: 0x%uX for 0x%uX (%d %d %d %d)", uint64_t(new_vk_image_view), uint64_t(owner_tex_info->vk_view_create_info.image), p_mipmap, p_mipmaps, p_layer, p_layers));
2507
#endif
2508
2509
return TextureID(tex_info);
2510
}
2511
2512
void RenderingDeviceDriverVulkan::texture_free(TextureID p_texture) {
2513
TextureInfo *tex_info = (TextureInfo *)p_texture.id;
2514
vkDestroyImageView(vk_device, tex_info->vk_view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW));
2515
if (tex_info->allocation.handle) {
2516
if (!Engine::get_singleton()->is_extra_gpu_memory_tracking_enabled()) {
2517
vmaDestroyImage(allocator, tex_info->vk_view_create_info.image, tex_info->allocation.handle);
2518
} else {
2519
vkDestroyImage(vk_device, tex_info->vk_image, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_BUFFER));
2520
vmaFreeMemory(allocator, tex_info->allocation.handle);
2521
}
2522
}
2523
VersatileResource::free(resources_allocator, tex_info);
2524
}
2525
2526
uint64_t RenderingDeviceDriverVulkan::texture_get_allocation_size(TextureID p_texture) {
2527
const TextureInfo *tex_info = (const TextureInfo *)p_texture.id;
2528
return tex_info->allocation.info.size;
2529
}
2530
2531
void RenderingDeviceDriverVulkan::texture_get_copyable_layout(TextureID p_texture, const TextureSubresource &p_subresource, TextureCopyableLayout *r_layout) {
2532
const TextureInfo *tex_info = (const TextureInfo *)p_texture.id;
2533
2534
uint32_t w = MAX(1u, tex_info->vk_create_info.extent.width >> p_subresource.mipmap);
2535
uint32_t h = MAX(1u, tex_info->vk_create_info.extent.height >> p_subresource.mipmap);
2536
uint32_t d = MAX(1u, tex_info->vk_create_info.extent.depth >> p_subresource.mipmap);
2537
2538
uint32_t bw = 0, bh = 0;
2539
get_compressed_image_format_block_dimensions(tex_info->rd_format, bw, bh);
2540
2541
uint32_t sbw = 0, sbh = 0;
2542
*r_layout = {};
2543
r_layout->size = get_image_format_required_size(tex_info->rd_format, w, h, d, 1, &sbw, &sbh);
2544
r_layout->row_pitch = r_layout->size / ((sbh / bh) * d);
2545
}
2546
2547
Vector<uint8_t> RenderingDeviceDriverVulkan::texture_get_data(TextureID p_texture, uint32_t p_layer) {
2548
const TextureInfo *tex = (const TextureInfo *)p_texture.id;
2549
2550
DataFormat tex_format = tex->rd_format;
2551
uint32_t tex_width = tex->vk_create_info.extent.width;
2552
uint32_t tex_height = tex->vk_create_info.extent.height;
2553
uint32_t tex_depth = tex->vk_create_info.extent.depth;
2554
uint32_t tex_mipmaps = tex->vk_create_info.mipLevels;
2555
2556
uint32_t width, height, depth;
2557
uint32_t tight_mip_size = get_image_format_required_size(tex_format, tex_width, tex_height, tex_depth, tex_mipmaps, &width, &height, &depth);
2558
2559
Vector<uint8_t> image_data;
2560
image_data.resize(tight_mip_size);
2561
2562
uint32_t blockw, blockh;
2563
get_compressed_image_format_block_dimensions(tex_format, blockw, blockh);
2564
uint32_t block_size = get_compressed_image_format_block_byte_size(tex_format);
2565
uint32_t pixel_size = get_image_format_pixel_size(tex_format);
2566
2567
void *data_ptr = nullptr;
2568
VkResult err = vmaMapMemory(allocator, tex->allocation.handle, &data_ptr);
2569
ERR_FAIL_COND_V_MSG(err, Vector<uint8_t>(), "vmaMapMemory failed with error " + itos(err) + ".");
2570
2571
{
2572
uint8_t *w = image_data.ptrw();
2573
2574
uint32_t mipmap_offset = 0;
2575
for (uint32_t mm_i = 0; mm_i < tex_mipmaps; mm_i++) {
2576
uint32_t image_total = get_image_format_required_size(tex_format, tex_width, tex_height, tex_depth, mm_i + 1, &width, &height, &depth);
2577
2578
uint8_t *write_ptr_mipmap = w + mipmap_offset;
2579
tight_mip_size = image_total - mipmap_offset;
2580
2581
VkImageSubresource vk_subres = {};
2582
vk_subres.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
2583
vk_subres.arrayLayer = p_layer;
2584
vk_subres.mipLevel = mm_i;
2585
2586
VkSubresourceLayout vk_layout = {};
2587
vkGetImageSubresourceLayout(vk_device, tex->vk_view_create_info.image, &vk_subres, &vk_layout);
2588
2589
for (uint32_t z = 0; z < depth; z++) {
2590
uint8_t *write_ptr = write_ptr_mipmap + z * tight_mip_size / depth;
2591
const uint8_t *slice_read_ptr = (uint8_t *)data_ptr + vk_layout.offset + z * vk_layout.depthPitch;
2592
2593
if (block_size > 1) {
2594
// Compressed.
2595
uint32_t line_width = (block_size * (width / blockw));
2596
for (uint32_t y = 0; y < height / blockh; y++) {
2597
const uint8_t *rptr = slice_read_ptr + y * vk_layout.rowPitch;
2598
uint8_t *wptr = write_ptr + y * line_width;
2599
2600
memcpy(wptr, rptr, line_width);
2601
}
2602
} else {
2603
// Uncompressed.
2604
for (uint32_t y = 0; y < height; y++) {
2605
const uint8_t *rptr = slice_read_ptr + y * vk_layout.rowPitch;
2606
uint8_t *wptr = write_ptr + y * pixel_size * width;
2607
memcpy(wptr, rptr, (uint64_t)pixel_size * width);
2608
}
2609
}
2610
}
2611
2612
mipmap_offset = image_total;
2613
}
2614
}
2615
2616
vmaUnmapMemory(allocator, tex->allocation.handle);
2617
2618
return image_data;
2619
}
2620
2621
BitField<RDD::TextureUsageBits> RenderingDeviceDriverVulkan::texture_get_usages_supported_by_format(DataFormat p_format, bool p_cpu_readable) {
2622
if (p_format >= DATA_FORMAT_ASTC_4x4_SFLOAT_BLOCK && p_format <= DATA_FORMAT_ASTC_12x12_SFLOAT_BLOCK && !enabled_device_extension_names.has(VK_EXT_TEXTURE_COMPRESSION_ASTC_HDR_EXTENSION_NAME)) {
2623
// Formats that were introduced later with extensions must not reach vkGetPhysicalDeviceFormatProperties if the extension isn't available. This means it's not supported.
2624
return 0;
2625
}
2626
VkFormatProperties properties = {};
2627
vkGetPhysicalDeviceFormatProperties(physical_device, RD_TO_VK_FORMAT[p_format], &properties);
2628
2629
const VkFormatFeatureFlags &flags = p_cpu_readable ? properties.linearTilingFeatures : properties.optimalTilingFeatures;
2630
2631
// Everything supported by default makes an all-or-nothing check easier for the caller.
2632
BitField<RDD::TextureUsageBits> supported = INT64_MAX;
2633
2634
if (!(flags & VK_FORMAT_FEATURE_SAMPLED_IMAGE_BIT)) {
2635
supported.clear_flag(TEXTURE_USAGE_SAMPLING_BIT);
2636
}
2637
if (!(flags & VK_FORMAT_FEATURE_COLOR_ATTACHMENT_BIT)) {
2638
supported.clear_flag(TEXTURE_USAGE_COLOR_ATTACHMENT_BIT);
2639
}
2640
if (!(flags & VK_FORMAT_FEATURE_DEPTH_STENCIL_ATTACHMENT_BIT)) {
2641
supported.clear_flag(TEXTURE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT);
2642
supported.clear_flag(TEXTURE_USAGE_DEPTH_RESOLVE_ATTACHMENT_BIT);
2643
}
2644
if (!(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_BIT)) {
2645
supported.clear_flag(TEXTURE_USAGE_STORAGE_BIT);
2646
}
2647
if (!(flags & VK_FORMAT_FEATURE_STORAGE_IMAGE_ATOMIC_BIT)) {
2648
supported.clear_flag(TEXTURE_USAGE_STORAGE_ATOMIC_BIT);
2649
}
2650
if (p_format != DATA_FORMAT_R8_UINT && p_format != DATA_FORMAT_R8G8_UNORM) {
2651
supported.clear_flag(TEXTURE_USAGE_VRS_ATTACHMENT_BIT);
2652
}
2653
2654
return supported;
2655
}
2656
2657
bool RenderingDeviceDriverVulkan::texture_can_make_shared_with_format(TextureID p_texture, DataFormat p_format, bool &r_raw_reinterpretation) {
2658
r_raw_reinterpretation = false;
2659
return true;
2660
}
2661
2662
/*****************/
2663
/**** SAMPLER ****/
2664
/*****************/
2665
2666
// RDD::SamplerRepeatMode == VkSamplerAddressMode.
2667
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_REPEAT_MODE_REPEAT, VK_SAMPLER_ADDRESS_MODE_REPEAT));
2668
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_REPEAT_MODE_MIRRORED_REPEAT, VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT));
2669
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_REPEAT_MODE_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE));
2670
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_REPEAT_MODE_CLAMP_TO_BORDER, VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER));
2671
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_REPEAT_MODE_MIRROR_CLAMP_TO_EDGE, VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE));
2672
2673
// RDD::SamplerBorderColor == VkBorderColor.
2674
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK));
2675
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_BORDER_COLOR_INT_TRANSPARENT_BLACK, VK_BORDER_COLOR_INT_TRANSPARENT_BLACK));
2676
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_BORDER_COLOR_FLOAT_OPAQUE_BLACK, VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK));
2677
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_BORDER_COLOR_INT_OPAQUE_BLACK, VK_BORDER_COLOR_INT_OPAQUE_BLACK));
2678
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_BORDER_COLOR_FLOAT_OPAQUE_WHITE, VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE));
2679
static_assert(ENUM_MEMBERS_EQUAL(RDD::SAMPLER_BORDER_COLOR_INT_OPAQUE_WHITE, VK_BORDER_COLOR_INT_OPAQUE_WHITE));
2680
2681
RDD::SamplerID RenderingDeviceDriverVulkan::sampler_create(const SamplerState &p_state) {
2682
VkSamplerCreateInfo sampler_create_info = {};
2683
sampler_create_info.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO;
2684
sampler_create_info.pNext = nullptr;
2685
sampler_create_info.flags = 0;
2686
sampler_create_info.magFilter = p_state.mag_filter == SAMPLER_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
2687
sampler_create_info.minFilter = p_state.min_filter == SAMPLER_FILTER_LINEAR ? VK_FILTER_LINEAR : VK_FILTER_NEAREST;
2688
sampler_create_info.mipmapMode = p_state.mip_filter == SAMPLER_FILTER_LINEAR ? VK_SAMPLER_MIPMAP_MODE_LINEAR : VK_SAMPLER_MIPMAP_MODE_NEAREST;
2689
sampler_create_info.addressModeU = (VkSamplerAddressMode)p_state.repeat_u;
2690
sampler_create_info.addressModeV = (VkSamplerAddressMode)p_state.repeat_v;
2691
sampler_create_info.addressModeW = (VkSamplerAddressMode)p_state.repeat_w;
2692
sampler_create_info.mipLodBias = p_state.lod_bias;
2693
sampler_create_info.anisotropyEnable = p_state.use_anisotropy && (physical_device_features.samplerAnisotropy == VK_TRUE);
2694
sampler_create_info.maxAnisotropy = p_state.anisotropy_max;
2695
sampler_create_info.compareEnable = p_state.enable_compare;
2696
sampler_create_info.compareOp = (VkCompareOp)p_state.compare_op;
2697
sampler_create_info.minLod = p_state.min_lod;
2698
sampler_create_info.maxLod = p_state.max_lod;
2699
sampler_create_info.borderColor = (VkBorderColor)p_state.border_color;
2700
sampler_create_info.unnormalizedCoordinates = p_state.unnormalized_uvw;
2701
2702
VkSampler vk_sampler = VK_NULL_HANDLE;
2703
VkResult res = vkCreateSampler(vk_device, &sampler_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SAMPLER), &vk_sampler);
2704
ERR_FAIL_COND_V_MSG(res, SamplerID(), "vkCreateSampler failed with error " + itos(res) + ".");
2705
2706
return SamplerID(vk_sampler);
2707
}
2708
2709
void RenderingDeviceDriverVulkan::sampler_free(SamplerID p_sampler) {
2710
vkDestroySampler(vk_device, (VkSampler)p_sampler.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SAMPLER));
2711
}
2712
2713
bool RenderingDeviceDriverVulkan::sampler_is_format_supported_for_filter(DataFormat p_format, SamplerFilter p_filter) {
2714
switch (p_filter) {
2715
case SAMPLER_FILTER_NEAREST: {
2716
return true;
2717
}
2718
case SAMPLER_FILTER_LINEAR: {
2719
VkFormatProperties properties = {};
2720
vkGetPhysicalDeviceFormatProperties(physical_device, RD_TO_VK_FORMAT[p_format], &properties);
2721
return (properties.optimalTilingFeatures & VK_FORMAT_FEATURE_SAMPLED_IMAGE_FILTER_LINEAR_BIT);
2722
}
2723
}
2724
return false;
2725
}
2726
2727
/**********************/
2728
/**** VERTEX ARRAY ****/
2729
/**********************/
2730
2731
RDD::VertexFormatID RenderingDeviceDriverVulkan::vertex_format_create(Span<VertexAttribute> p_vertex_attribs, const VertexAttributeBindingsMap &p_vertex_bindings) {
2732
// Pre-bookkeep.
2733
VertexFormatInfo *vf_info = VersatileResource::allocate<VertexFormatInfo>(resources_allocator);
2734
2735
vf_info->vk_bindings.reserve(p_vertex_bindings.size());
2736
for (const VertexAttributeBindingsMap::KV &E : p_vertex_bindings) {
2737
const VertexAttributeBinding &binding = E.value;
2738
VkVertexInputBindingDescription vk_binding = {};
2739
vk_binding.binding = E.key;
2740
vk_binding.stride = binding.stride;
2741
vk_binding.inputRate = binding.frequency == VERTEX_FREQUENCY_INSTANCE ? VK_VERTEX_INPUT_RATE_INSTANCE : VK_VERTEX_INPUT_RATE_VERTEX;
2742
vf_info->vk_bindings.push_back(vk_binding);
2743
}
2744
vf_info->vk_attributes.resize(p_vertex_attribs.size());
2745
for (uint32_t i = 0; i < p_vertex_attribs.size(); i++) {
2746
vf_info->vk_attributes[i] = {};
2747
vf_info->vk_attributes[i].binding = p_vertex_attribs[i].binding;
2748
vf_info->vk_attributes[i].location = p_vertex_attribs[i].location;
2749
vf_info->vk_attributes[i].format = RD_TO_VK_FORMAT[p_vertex_attribs[i].format];
2750
vf_info->vk_attributes[i].offset = p_vertex_attribs[i].offset;
2751
}
2752
2753
vf_info->vk_create_info = {};
2754
vf_info->vk_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
2755
vf_info->vk_create_info.vertexBindingDescriptionCount = vf_info->vk_bindings.size();
2756
vf_info->vk_create_info.pVertexBindingDescriptions = vf_info->vk_bindings.ptr();
2757
vf_info->vk_create_info.vertexAttributeDescriptionCount = vf_info->vk_attributes.size();
2758
vf_info->vk_create_info.pVertexAttributeDescriptions = vf_info->vk_attributes.ptr();
2759
2760
return VertexFormatID(vf_info);
2761
}
2762
2763
void RenderingDeviceDriverVulkan::vertex_format_free(VertexFormatID p_vertex_format) {
2764
VertexFormatInfo *vf_info = (VertexFormatInfo *)p_vertex_format.id;
2765
VersatileResource::free(resources_allocator, vf_info);
2766
}
2767
2768
/******************/
2769
/**** BARRIERS ****/
2770
/******************/
2771
2772
// RDD::PipelineStageBits == VkPipelineStageFlagBits.
2773
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT));
2774
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_DRAW_INDIRECT_BIT, VK_PIPELINE_STAGE_DRAW_INDIRECT_BIT));
2775
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_VERTEX_INPUT_BIT, VK_PIPELINE_STAGE_VERTEX_INPUT_BIT));
2776
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_VERTEX_SHADER_BIT, VK_PIPELINE_STAGE_VERTEX_SHADER_BIT));
2777
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT, VK_PIPELINE_STAGE_TESSELLATION_CONTROL_SHADER_BIT));
2778
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT, VK_PIPELINE_STAGE_TESSELLATION_EVALUATION_SHADER_BIT));
2779
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_GEOMETRY_SHADER_BIT, VK_PIPELINE_STAGE_GEOMETRY_SHADER_BIT));
2780
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_FRAGMENT_SHADER_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT));
2781
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT));
2782
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT, VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT));
2783
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT));
2784
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT));
2785
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT));
2786
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_GRAPHICS_BIT, VK_PIPELINE_STAGE_ALL_GRAPHICS_BIT));
2787
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ALL_COMMANDS_BIT, VK_PIPELINE_STAGE_ALL_COMMANDS_BIT));
2788
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT, VK_PIPELINE_STAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR));
2789
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT, VK_PIPELINE_STAGE_FRAGMENT_DENSITY_PROCESS_BIT_EXT));
2790
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_RAY_TRACING_SHADER_BIT, VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR));
2791
static_assert(ENUM_MEMBERS_EQUAL(RDD::PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT, VK_PIPELINE_STAGE_ACCELERATION_STRUCTURE_BUILD_BIT_KHR));
2792
2793
// RDD::BarrierAccessBits == VkAccessFlagBits.
2794
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_INDIRECT_COMMAND_READ_BIT, VK_ACCESS_INDIRECT_COMMAND_READ_BIT));
2795
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_INDEX_READ_BIT, VK_ACCESS_INDEX_READ_BIT));
2796
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_VERTEX_ATTRIBUTE_READ_BIT, VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT));
2797
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_UNIFORM_READ_BIT, VK_ACCESS_UNIFORM_READ_BIT));
2798
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_INPUT_ATTACHMENT_READ_BIT, VK_ACCESS_INPUT_ATTACHMENT_READ_BIT));
2799
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_SHADER_READ_BIT, VK_ACCESS_SHADER_READ_BIT));
2800
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_SHADER_WRITE_BIT, VK_ACCESS_SHADER_WRITE_BIT));
2801
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT));
2802
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT));
2803
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT));
2804
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT));
2805
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_READ_BIT, VK_ACCESS_HOST_READ_BIT));
2806
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_HOST_WRITE_BIT, VK_ACCESS_HOST_WRITE_BIT));
2807
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_READ_BIT, VK_ACCESS_MEMORY_READ_BIT));
2808
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_MEMORY_WRITE_BIT, VK_ACCESS_MEMORY_WRITE_BIT));
2809
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_SHADING_RATE_ATTACHMENT_READ_BIT_KHR));
2810
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_FRAGMENT_DENSITY_MAP_ATTACHMENT_READ_BIT, VK_ACCESS_FRAGMENT_DENSITY_MAP_READ_BIT_EXT));
2811
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_READ_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR));
2812
static_assert(ENUM_MEMBERS_EQUAL(RDD::BARRIER_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT, VK_ACCESS_ACCELERATION_STRUCTURE_WRITE_BIT_KHR));
2813
2814
void RenderingDeviceDriverVulkan::command_pipeline_barrier(
2815
CommandBufferID p_cmd_buffer,
2816
BitField<PipelineStageBits> p_src_stages,
2817
BitField<PipelineStageBits> p_dst_stages,
2818
VectorView<MemoryAccessBarrier> p_memory_barriers,
2819
VectorView<BufferBarrier> p_buffer_barriers,
2820
VectorView<TextureBarrier> p_texture_barriers,
2821
VectorView<AccelerationStructureBarrier> p_acceleration_structure_barriers) {
2822
VkMemoryBarrier *vk_memory_barriers = ALLOCA_ARRAY(VkMemoryBarrier, p_memory_barriers.size());
2823
for (uint32_t i = 0; i < p_memory_barriers.size(); i++) {
2824
vk_memory_barriers[i] = {};
2825
vk_memory_barriers[i].sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
2826
vk_memory_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].src_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2827
vk_memory_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_memory_barriers[i].dst_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2828
}
2829
2830
VkBufferMemoryBarrier *vk_buffer_barriers = ALLOCA_ARRAY(VkBufferMemoryBarrier, p_buffer_barriers.size());
2831
for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) {
2832
vk_buffer_barriers[i] = {};
2833
vk_buffer_barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
2834
vk_buffer_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2835
vk_buffer_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2836
vk_buffer_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].src_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2837
vk_buffer_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_buffer_barriers[i].dst_access) & ~VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR;
2838
vk_buffer_barriers[i].buffer = ((const BufferInfo *)p_buffer_barriers[i].buffer.id)->vk_buffer;
2839
vk_buffer_barriers[i].offset = p_buffer_barriers[i].offset;
2840
vk_buffer_barriers[i].size = p_buffer_barriers[i].size;
2841
}
2842
2843
VkImageMemoryBarrier *vk_image_barriers = ALLOCA_ARRAY(VkImageMemoryBarrier, p_texture_barriers.size());
2844
for (uint32_t i = 0; i < p_texture_barriers.size(); i++) {
2845
const TextureInfo *tex_info = (const TextureInfo *)p_texture_barriers[i].texture.id;
2846
vk_image_barriers[i] = {};
2847
vk_image_barriers[i].sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
2848
vk_image_barriers[i].srcAccessMask = _rd_to_vk_access_flags(p_texture_barriers[i].src_access);
2849
vk_image_barriers[i].dstAccessMask = _rd_to_vk_access_flags(p_texture_barriers[i].dst_access);
2850
vk_image_barriers[i].oldLayout = RD_TO_VK_LAYOUT[p_texture_barriers[i].prev_layout];
2851
vk_image_barriers[i].newLayout = RD_TO_VK_LAYOUT[p_texture_barriers[i].next_layout];
2852
vk_image_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2853
vk_image_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2854
vk_image_barriers[i].image = tex_info->vk_view_create_info.image;
2855
vk_image_barriers[i].subresourceRange.aspectMask = (VkImageAspectFlags)p_texture_barriers[i].subresources.aspect;
2856
vk_image_barriers[i].subresourceRange.baseMipLevel = p_texture_barriers[i].subresources.base_mipmap;
2857
vk_image_barriers[i].subresourceRange.levelCount = p_texture_barriers[i].subresources.mipmap_count;
2858
vk_image_barriers[i].subresourceRange.baseArrayLayer = p_texture_barriers[i].subresources.base_layer;
2859
vk_image_barriers[i].subresourceRange.layerCount = p_texture_barriers[i].subresources.layer_count;
2860
}
2861
2862
VkPipelineStageFlags src_stage_flags = _rd_to_vk_pipeline_stages(p_src_stages);
2863
VkPipelineStageFlags dst_stage_flags = _rd_to_vk_pipeline_stages(p_dst_stages);
2864
VkPipelineStageFlags accel_src_stages = src_stage_flags;
2865
VkPipelineStageFlags accel_dst_stages = dst_stage_flags;
2866
2867
VkBufferMemoryBarrier *vk_accel_barriers = ALLOCA_ARRAY(VkBufferMemoryBarrier, p_acceleration_structure_barriers.size());
2868
for (uint32_t i = 0; i < p_acceleration_structure_barriers.size(); i++) {
2869
// If the rayQuery feature is not enabled and a memory barrier srcAccessMask includes
2870
// VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, srcStageMask must not include any of the
2871
// VK_PIPELINE_STAGE_*_SHADER_BIT stages except VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR
2872
VkAccessFlags src_access = _rd_to_vk_access_flags(p_acceleration_structure_barriers[i].src_access);
2873
if ((src_access & VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR) != 0) {
2874
accel_src_stages &= ~(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
2875
}
2876
2877
// If the rayQuery feature is not enabled and a memory barrier dstAccessMask includes
2878
// VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR, dstStageMask must not include any of the
2879
// VK_PIPELINE_STAGE_*_SHADER_BIT stages except VK_PIPELINE_STAGE_RAY_TRACING_SHADER_BIT_KHR
2880
VkAccessFlags dst_access = _rd_to_vk_access_flags(p_acceleration_structure_barriers[i].dst_access);
2881
if ((dst_access & VK_ACCESS_ACCELERATION_STRUCTURE_READ_BIT_KHR) != 0) {
2882
accel_dst_stages &= ~(VK_PIPELINE_STAGE_VERTEX_SHADER_BIT | VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT);
2883
}
2884
2885
const AccelerationStructureInfo *accel_info = (const AccelerationStructureInfo *)p_acceleration_structure_barriers[i].acceleration_structure.id;
2886
vk_accel_barriers[i] = {};
2887
vk_accel_barriers[i].sType = VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER;
2888
vk_accel_barriers[i].srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2889
vk_accel_barriers[i].dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
2890
vk_accel_barriers[i].srcAccessMask = src_access;
2891
vk_accel_barriers[i].dstAccessMask = dst_access;
2892
vk_accel_barriers[i].buffer = ((const BufferInfo *)accel_info->buffer.id)->vk_buffer;
2893
vk_accel_barriers[i].offset = p_acceleration_structure_barriers[i].offset;
2894
vk_accel_barriers[i].size = p_acceleration_structure_barriers[i].size;
2895
}
2896
2897
#if PRINT_NATIVE_COMMANDS
2898
print_line(vformat("vkCmdPipelineBarrier MEMORY %d BUFFER %d TEXTURE %d ACCELERATION STRUCTURE %d", p_memory_barriers.size(), p_buffer_barriers.size(), p_texture_barriers.size(), p_acceleration_structure_barriers.size()));
2899
for (uint32_t i = 0; i < p_memory_barriers.size(); i++) {
2900
print_line(vformat(" VkMemoryBarrier #%d src 0x%uX dst 0x%uX", i, vk_memory_barriers[i].srcAccessMask, vk_memory_barriers[i].dstAccessMask));
2901
}
2902
2903
for (uint32_t i = 0; i < p_buffer_barriers.size(); i++) {
2904
print_line(vformat(" VkBufferMemoryBarrier #%d src 0x%uX dst 0x%uX buffer 0x%ux", i, vk_buffer_barriers[i].srcAccessMask, vk_buffer_barriers[i].dstAccessMask, uint64_t(vk_buffer_barriers[i].buffer)));
2905
}
2906
2907
for (uint32_t i = 0; i < p_texture_barriers.size(); i++) {
2908
print_line(vformat(" VkImageMemoryBarrier #%d src 0x%uX dst 0x%uX image 0x%ux old %d new %d (%d %d %d %d)", i, vk_image_barriers[i].srcAccessMask, vk_image_barriers[i].dstAccessMask,
2909
uint64_t(vk_image_barriers[i].image), vk_image_barriers[i].oldLayout, vk_image_barriers[i].newLayout, vk_image_barriers[i].subresourceRange.baseMipLevel, vk_image_barriers[i].subresourceRange.levelCount,
2910
vk_image_barriers[i].subresourceRange.baseArrayLayer, vk_image_barriers[i].subresourceRange.layerCount));
2911
}
2912
2913
for (uint32_t i = 0; i < p_acceleration_structure_barriers.size(); i++) {
2914
print_line(vformat(" VkBufferMemoryBarrier #%d src 0x%uX dst 0x%uX acceleration structure buffer 0x%ux", i, vk_accel_barriers[i].srcAccessMask, vk_accel_barriers[i].dstAccessMask, uint64_t(vk_accel_barriers[i].buffer)));
2915
}
2916
#endif
2917
2918
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
2919
vkCmdPipelineBarrier(
2920
command_buffer->vk_command_buffer,
2921
src_stage_flags,
2922
dst_stage_flags,
2923
0,
2924
p_memory_barriers.size(), vk_memory_barriers,
2925
p_buffer_barriers.size(), vk_buffer_barriers,
2926
p_texture_barriers.size(), vk_image_barriers);
2927
2928
if (p_acceleration_structure_barriers.size() > 0) {
2929
vkCmdPipelineBarrier(
2930
command_buffer->vk_command_buffer,
2931
accel_src_stages,
2932
accel_dst_stages,
2933
0,
2934
0, nullptr,
2935
p_acceleration_structure_barriers.size(), vk_accel_barriers,
2936
0, nullptr);
2937
}
2938
}
2939
2940
/****************/
2941
/**** FENCES ****/
2942
/****************/
2943
2944
RDD::FenceID RenderingDeviceDriverVulkan::fence_create() {
2945
VkFence vk_fence = VK_NULL_HANDLE;
2946
VkFenceCreateInfo create_info = {};
2947
create_info.sType = VK_STRUCTURE_TYPE_FENCE_CREATE_INFO;
2948
VkResult err = vkCreateFence(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FENCE), &vk_fence);
2949
ERR_FAIL_COND_V(err != VK_SUCCESS, FenceID());
2950
2951
Fence *fence = memnew(Fence);
2952
fence->vk_fence = vk_fence;
2953
fence->queue_signaled_from = nullptr;
2954
return FenceID(fence);
2955
}
2956
2957
Error RenderingDeviceDriverVulkan::fence_wait(FenceID p_fence) {
2958
Fence *fence = (Fence *)(p_fence.id);
2959
VkResult fence_status = vkGetFenceStatus(vk_device, fence->vk_fence);
2960
if (fence_status == VK_NOT_READY) {
2961
VkResult err = vkWaitForFences(vk_device, 1, &fence->vk_fence, VK_TRUE, UINT64_MAX);
2962
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
2963
}
2964
2965
VkResult err = vkResetFences(vk_device, 1, &fence->vk_fence);
2966
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
2967
2968
if (fence->queue_signaled_from != nullptr) {
2969
// Release all semaphores that the command queue associated to the fence waited on the last time it was submitted.
2970
LocalVector<Pair<Fence *, uint32_t>> &pairs = fence->queue_signaled_from->image_semaphores_for_fences;
2971
uint32_t i = 0;
2972
while (i < pairs.size()) {
2973
if (pairs[i].first == fence) {
2974
_release_image_semaphore(fence->queue_signaled_from, pairs[i].second, true);
2975
fence->queue_signaled_from->free_image_semaphores.push_back(pairs[i].second);
2976
pairs.remove_at(i);
2977
} else {
2978
i++;
2979
}
2980
}
2981
2982
fence->queue_signaled_from = nullptr;
2983
}
2984
2985
return OK;
2986
}
2987
2988
void RenderingDeviceDriverVulkan::fence_free(FenceID p_fence) {
2989
Fence *fence = (Fence *)(p_fence.id);
2990
vkDestroyFence(vk_device, fence->vk_fence, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FENCE));
2991
memdelete(fence);
2992
}
2993
2994
/********************/
2995
/**** SEMAPHORES ****/
2996
/********************/
2997
2998
RDD::SemaphoreID RenderingDeviceDriverVulkan::semaphore_create() {
2999
VkSemaphore semaphore = VK_NULL_HANDLE;
3000
VkSemaphoreCreateInfo create_info = {};
3001
create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
3002
VkResult err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore);
3003
ERR_FAIL_COND_V(err != VK_SUCCESS, SemaphoreID());
3004
3005
return SemaphoreID(semaphore);
3006
}
3007
3008
void RenderingDeviceDriverVulkan::semaphore_free(SemaphoreID p_semaphore) {
3009
vkDestroySemaphore(vk_device, VkSemaphore(p_semaphore.id), VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE));
3010
}
3011
3012
/******************/
3013
/**** COMMANDS ****/
3014
/******************/
3015
3016
// ----- QUEUE FAMILY -----
3017
3018
RDD::CommandQueueFamilyID RenderingDeviceDriverVulkan::command_queue_family_get(BitField<CommandQueueFamilyBits> p_cmd_queue_family_bits, RenderingContextDriver::SurfaceID p_surface) {
3019
// Pick the queue with the least amount of bits that can fulfill the requirements.
3020
VkQueueFlags picked_queue_flags = VK_QUEUE_FLAG_BITS_MAX_ENUM;
3021
uint32_t picked_family_index = UINT_MAX;
3022
for (uint32_t i = 0; i < queue_family_properties.size(); i++) {
3023
if (queue_families[i].is_empty()) {
3024
// Ignore empty queue families.
3025
continue;
3026
}
3027
3028
if (p_surface != 0 && !context_driver->queue_family_supports_present(physical_device, i, p_surface)) {
3029
// Present is not an actual bit but something that must be queried manually.
3030
continue;
3031
}
3032
3033
// Preferring a queue with less bits will get us closer to getting a queue that performs better for our requirements.
3034
// For example, dedicated compute and transfer queues are usually indicated as such.
3035
const VkQueueFlags option_queue_flags = queue_family_properties[i].queueFlags;
3036
const bool includes_all_bits = p_cmd_queue_family_bits.get_shared(option_queue_flags) == p_cmd_queue_family_bits;
3037
const bool prefer_less_bits = option_queue_flags < picked_queue_flags;
3038
if (includes_all_bits && prefer_less_bits) {
3039
picked_family_index = i;
3040
picked_queue_flags = option_queue_flags;
3041
}
3042
}
3043
3044
if (picked_family_index >= queue_family_properties.size()) {
3045
return CommandQueueFamilyID();
3046
}
3047
3048
// Since 0 is a valid index and we use 0 as the error case, we make the index start from 1 instead.
3049
return CommandQueueFamilyID(picked_family_index + 1);
3050
}
3051
3052
// ----- QUEUE -----
3053
3054
RDD::CommandQueueID RenderingDeviceDriverVulkan::command_queue_create(CommandQueueFamilyID p_cmd_queue_family, bool p_identify_as_main_queue) {
3055
DEV_ASSERT(p_cmd_queue_family.id != 0);
3056
3057
// Make a virtual queue on top of a real queue. Use the queue from the family with the least amount of virtual queues created.
3058
uint32_t family_index = p_cmd_queue_family.id - 1;
3059
TightLocalVector<Queue> &queue_family = queue_families[family_index];
3060
uint32_t picked_queue_index = UINT_MAX;
3061
uint32_t picked_virtual_count = UINT_MAX;
3062
for (uint32_t i = 0; i < queue_family.size(); i++) {
3063
if (queue_family[i].virtual_count < picked_virtual_count) {
3064
picked_queue_index = i;
3065
picked_virtual_count = queue_family[i].virtual_count;
3066
}
3067
}
3068
3069
ERR_FAIL_COND_V_MSG(picked_queue_index >= queue_family.size(), CommandQueueID(), "A queue in the picked family could not be found.");
3070
3071
#if defined(SWAPPY_FRAME_PACING_ENABLED)
3072
if (swappy_frame_pacer_enable) {
3073
VkQueue selected_queue;
3074
vkGetDeviceQueue(vk_device, family_index, picked_queue_index, &selected_queue);
3075
SwappyVk_setQueueFamilyIndex(vk_device, selected_queue, family_index);
3076
}
3077
#endif
3078
3079
// Create the virtual queue.
3080
CommandQueue *command_queue = memnew(CommandQueue);
3081
command_queue->queue_family = family_index;
3082
command_queue->queue_index = picked_queue_index;
3083
queue_family[picked_queue_index].virtual_count++;
3084
3085
// If is was identified as the main queue and a hook is active, indicate it as such to the hook.
3086
if (p_identify_as_main_queue && (VulkanHooks::get_singleton() != nullptr)) {
3087
VulkanHooks::get_singleton()->set_direct_queue_family_and_index(family_index, picked_queue_index);
3088
}
3089
3090
return CommandQueueID(command_queue);
3091
}
3092
3093
Error RenderingDeviceDriverVulkan::command_queue_execute_and_present(CommandQueueID p_cmd_queue, VectorView<SemaphoreID> p_wait_semaphores, VectorView<CommandBufferID> p_cmd_buffers, VectorView<SemaphoreID> p_cmd_semaphores, FenceID p_cmd_fence, VectorView<SwapChainID> p_swap_chains) {
3094
DEV_ASSERT(p_cmd_queue.id != 0);
3095
3096
VkResult err;
3097
CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id);
3098
Queue &device_queue = queue_families[command_queue->queue_family][command_queue->queue_index];
3099
Fence *fence = (Fence *)(p_cmd_fence.id);
3100
VkFence vk_fence = (fence != nullptr) ? fence->vk_fence : VK_NULL_HANDLE;
3101
3102
thread_local LocalVector<VkSemaphore> wait_semaphores;
3103
thread_local LocalVector<VkPipelineStageFlags> wait_semaphores_stages;
3104
wait_semaphores.clear();
3105
wait_semaphores_stages.clear();
3106
3107
if (!command_queue->pending_semaphores_for_execute.is_empty()) {
3108
for (uint32_t i = 0; i < command_queue->pending_semaphores_for_execute.size(); i++) {
3109
VkSemaphore wait_semaphore = command_queue->image_semaphores[command_queue->pending_semaphores_for_execute[i]];
3110
wait_semaphores.push_back(wait_semaphore);
3111
wait_semaphores_stages.push_back(VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT);
3112
}
3113
3114
command_queue->pending_semaphores_for_execute.clear();
3115
}
3116
3117
for (uint32_t i = 0; i < p_wait_semaphores.size(); i++) {
3118
// FIXME: Allow specifying the stage mask in more detail.
3119
wait_semaphores.push_back(VkSemaphore(p_wait_semaphores[i].id));
3120
wait_semaphores_stages.push_back(VK_PIPELINE_STAGE_ALL_COMMANDS_BIT);
3121
}
3122
3123
if (!pending_flushes.allocations.is_empty()) {
3124
// We must do this now, even if p_cmd_buffers is empty; because afterwards pending_flushes.allocations
3125
// could become dangling. We cannot delay this call for the next frame(s).
3126
err = vmaFlushAllocations(allocator, pending_flushes.allocations.size(),
3127
pending_flushes.allocations.ptr(), pending_flushes.offsets.ptr(),
3128
pending_flushes.sizes.ptr());
3129
pending_flushes.allocations.clear();
3130
pending_flushes.offsets.clear();
3131
pending_flushes.sizes.clear();
3132
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
3133
}
3134
3135
if (p_cmd_buffers.size() > 0) {
3136
thread_local LocalVector<VkCommandBuffer> command_buffers;
3137
thread_local LocalVector<VkSemaphore> present_semaphores;
3138
thread_local LocalVector<VkSemaphore> signal_semaphores;
3139
command_buffers.clear();
3140
present_semaphores.clear();
3141
signal_semaphores.clear();
3142
3143
for (uint32_t i = 0; i < p_cmd_buffers.size(); i++) {
3144
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)(p_cmd_buffers[i].id);
3145
command_buffers.push_back(command_buffer->vk_command_buffer);
3146
}
3147
3148
for (uint32_t i = 0; i < p_cmd_semaphores.size(); i++) {
3149
signal_semaphores.push_back(VkSemaphore(p_cmd_semaphores[i].id));
3150
}
3151
3152
for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
3153
const SwapChain *swap_chain = (const SwapChain *)(p_swap_chains[i].id);
3154
VkSemaphore semaphore = swap_chain->present_semaphores[swap_chain->image_index];
3155
present_semaphores.push_back(semaphore);
3156
signal_semaphores.push_back(semaphore);
3157
}
3158
3159
VkSubmitInfo submit_info = {};
3160
submit_info.sType = VK_STRUCTURE_TYPE_SUBMIT_INFO;
3161
submit_info.waitSemaphoreCount = wait_semaphores.size();
3162
submit_info.pWaitSemaphores = wait_semaphores.ptr();
3163
submit_info.pWaitDstStageMask = wait_semaphores_stages.ptr();
3164
submit_info.commandBufferCount = command_buffers.size();
3165
submit_info.pCommandBuffers = command_buffers.ptr();
3166
submit_info.signalSemaphoreCount = signal_semaphores.size();
3167
submit_info.pSignalSemaphores = signal_semaphores.ptr();
3168
3169
device_queue.submit_mutex.lock();
3170
err = vkQueueSubmit(device_queue.queue, 1, &submit_info, vk_fence);
3171
device_queue.submit_mutex.unlock();
3172
3173
if (err == VK_ERROR_DEVICE_LOST) {
3174
print_lost_device_info();
3175
CRASH_NOW_MSG("Vulkan device was lost.");
3176
}
3177
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
3178
3179
if (fence != nullptr && !command_queue->pending_semaphores_for_fence.is_empty()) {
3180
fence->queue_signaled_from = command_queue;
3181
3182
// Indicate to the fence that it should release the semaphores that were waited on this submission the next time the fence is waited on.
3183
for (uint32_t i = 0; i < command_queue->pending_semaphores_for_fence.size(); i++) {
3184
command_queue->image_semaphores_for_fences.push_back({ fence, command_queue->pending_semaphores_for_fence[i] });
3185
}
3186
3187
command_queue->pending_semaphores_for_fence.clear();
3188
}
3189
3190
if (!present_semaphores.is_empty()) {
3191
// If command buffers were executed, swap chains must wait on the present semaphore used by the command queue.
3192
wait_semaphores = present_semaphores;
3193
}
3194
}
3195
3196
if (p_swap_chains.size() > 0) {
3197
thread_local LocalVector<VkSwapchainKHR> swapchains;
3198
thread_local LocalVector<uint32_t> image_indices;
3199
thread_local LocalVector<VkResult> results;
3200
swapchains.clear();
3201
image_indices.clear();
3202
3203
for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
3204
SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id);
3205
swapchains.push_back(swap_chain->vk_swapchain);
3206
DEV_ASSERT(swap_chain->image_index < swap_chain->images.size());
3207
image_indices.push_back(swap_chain->image_index);
3208
}
3209
3210
results.resize(swapchains.size());
3211
3212
VkPresentInfoKHR present_info = {};
3213
present_info.sType = VK_STRUCTURE_TYPE_PRESENT_INFO_KHR;
3214
present_info.waitSemaphoreCount = wait_semaphores.size();
3215
present_info.pWaitSemaphores = wait_semaphores.ptr();
3216
present_info.swapchainCount = swapchains.size();
3217
present_info.pSwapchains = swapchains.ptr();
3218
present_info.pImageIndices = image_indices.ptr();
3219
present_info.pResults = results.ptr();
3220
3221
device_queue.submit_mutex.lock();
3222
#if defined(SWAPPY_FRAME_PACING_ENABLED)
3223
if (swappy_frame_pacer_enable) {
3224
err = SwappyVk_queuePresent(device_queue.queue, &present_info);
3225
} else {
3226
err = device_functions.QueuePresentKHR(device_queue.queue, &present_info);
3227
}
3228
#else
3229
err = device_functions.QueuePresentKHR(device_queue.queue, &present_info);
3230
#endif
3231
3232
device_queue.submit_mutex.unlock();
3233
3234
// Set the index to an invalid value. If any of the swap chains returned out of date, indicate it should be resized the next time it's acquired.
3235
bool any_result_is_out_of_date = false;
3236
for (uint32_t i = 0; i < p_swap_chains.size(); i++) {
3237
SwapChain *swap_chain = (SwapChain *)(p_swap_chains[i].id);
3238
swap_chain->image_index = UINT_MAX;
3239
if (results[i] == VK_ERROR_OUT_OF_DATE_KHR) {
3240
context_driver->surface_set_needs_resize(swap_chain->surface, true);
3241
any_result_is_out_of_date = true;
3242
}
3243
}
3244
3245
if (any_result_is_out_of_date || err == VK_ERROR_OUT_OF_DATE_KHR) {
3246
// It is possible for presentation to fail with out of date while acquire might've succeeded previously. This case
3247
// will be considered a silent failure as it can be triggered easily by resizing a window in the OS natively.
3248
return FAILED;
3249
}
3250
3251
// Handling VK_SUBOPTIMAL_KHR the same as VK_SUCCESS is completely intentional.
3252
//
3253
// Godot does not currently support native rotation in Android when creating the swap chain. It intentionally uses
3254
// VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR instead of the current transform bits available in the surface capabilities.
3255
// Choosing the transform that leads to optimal presentation leads to distortion that makes the application unusable,
3256
// as the rotation of all the content is not handled at the moment.
3257
//
3258
// VK_SUBOPTIMAL_KHR is accepted as a successful case even if it's not the most efficient solution to work around this
3259
// problem. This behavior should not be changed unless the swap chain recreation uses the current transform bits, as
3260
// it'll lead to very low performance in Android by entering an endless loop where it'll always resize the swap chain
3261
// every frame.
3262
3263
ERR_FAIL_COND_V_MSG(
3264
err != VK_SUCCESS && err != VK_SUBOPTIMAL_KHR,
3265
FAILED,
3266
"QueuePresentKHR failed with error: " + get_vulkan_result(err));
3267
}
3268
3269
return OK;
3270
}
3271
3272
void RenderingDeviceDriverVulkan::command_queue_free(CommandQueueID p_cmd_queue) {
3273
DEV_ASSERT(p_cmd_queue);
3274
3275
CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id);
3276
3277
// Erase all the semaphores used for image acquisition.
3278
for (VkSemaphore semaphore : command_queue->image_semaphores) {
3279
vkDestroySemaphore(vk_device, semaphore, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE));
3280
}
3281
3282
// Retrieve the queue family corresponding to the virtual queue.
3283
DEV_ASSERT(command_queue->queue_family < queue_families.size());
3284
TightLocalVector<Queue> &queue_family = queue_families[command_queue->queue_family];
3285
3286
// Decrease the virtual queue count.
3287
DEV_ASSERT(command_queue->queue_index < queue_family.size());
3288
DEV_ASSERT(queue_family[command_queue->queue_index].virtual_count > 0);
3289
queue_family[command_queue->queue_index].virtual_count--;
3290
3291
// Destroy the virtual queue structure.
3292
memdelete(command_queue);
3293
}
3294
3295
// ----- POOL -----
3296
3297
RDD::CommandPoolID RenderingDeviceDriverVulkan::command_pool_create(CommandQueueFamilyID p_cmd_queue_family, CommandBufferType p_cmd_buffer_type) {
3298
DEV_ASSERT(p_cmd_queue_family.id != 0);
3299
3300
uint32_t family_index = p_cmd_queue_family.id - 1;
3301
VkCommandPoolCreateInfo cmd_pool_info = {};
3302
cmd_pool_info.sType = VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO;
3303
cmd_pool_info.queueFamilyIndex = family_index;
3304
3305
if (!command_pool_reset_enabled) {
3306
cmd_pool_info.flags = VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT;
3307
}
3308
3309
VkCommandPool vk_command_pool = VK_NULL_HANDLE;
3310
VkResult res = vkCreateCommandPool(vk_device, &cmd_pool_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_COMMAND_POOL), &vk_command_pool);
3311
ERR_FAIL_COND_V_MSG(res, CommandPoolID(), "vkCreateCommandPool failed with error " + itos(res) + ".");
3312
3313
CommandPool *command_pool = memnew(CommandPool);
3314
command_pool->vk_command_pool = vk_command_pool;
3315
command_pool->buffer_type = p_cmd_buffer_type;
3316
return CommandPoolID(command_pool);
3317
}
3318
3319
bool RenderingDeviceDriverVulkan::command_pool_reset(CommandPoolID p_cmd_pool) {
3320
DEV_ASSERT(p_cmd_pool);
3321
3322
CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id);
3323
VkResult err = vkResetCommandPool(vk_device, command_pool->vk_command_pool, 0);
3324
ERR_FAIL_COND_V_MSG(err, false, "vkResetCommandPool failed with error " + itos(err) + ".");
3325
3326
return true;
3327
}
3328
3329
void RenderingDeviceDriverVulkan::command_pool_free(CommandPoolID p_cmd_pool) {
3330
DEV_ASSERT(p_cmd_pool);
3331
3332
CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id);
3333
for (CommandBufferInfo *command_buffer : command_pool->command_buffers_created) {
3334
VersatileResource::free(resources_allocator, command_buffer);
3335
}
3336
3337
vkDestroyCommandPool(vk_device, command_pool->vk_command_pool, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_COMMAND_POOL));
3338
memdelete(command_pool);
3339
}
3340
3341
// ----- BUFFER -----
3342
3343
RDD::CommandBufferID RenderingDeviceDriverVulkan::command_buffer_create(CommandPoolID p_cmd_pool) {
3344
DEV_ASSERT(p_cmd_pool);
3345
3346
CommandPool *command_pool = (CommandPool *)(p_cmd_pool.id);
3347
VkCommandBufferAllocateInfo cmd_buf_info = {};
3348
cmd_buf_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO;
3349
cmd_buf_info.commandPool = command_pool->vk_command_pool;
3350
cmd_buf_info.commandBufferCount = 1;
3351
3352
if (command_pool->buffer_type == COMMAND_BUFFER_TYPE_SECONDARY) {
3353
cmd_buf_info.level = VK_COMMAND_BUFFER_LEVEL_SECONDARY;
3354
} else {
3355
cmd_buf_info.level = VK_COMMAND_BUFFER_LEVEL_PRIMARY;
3356
}
3357
3358
VkCommandBuffer vk_command_buffer = VK_NULL_HANDLE;
3359
VkResult err = vkAllocateCommandBuffers(vk_device, &cmd_buf_info, &vk_command_buffer);
3360
ERR_FAIL_COND_V_MSG(err, CommandBufferID(), "vkAllocateCommandBuffers failed with error " + itos(err) + ".");
3361
3362
CommandBufferInfo *command_buffer = VersatileResource::allocate<CommandBufferInfo>(resources_allocator);
3363
command_buffer->vk_command_buffer = vk_command_buffer;
3364
command_pool->command_buffers_created.push_back(command_buffer);
3365
return CommandBufferID(command_buffer);
3366
}
3367
3368
bool RenderingDeviceDriverVulkan::command_buffer_begin(CommandBufferID p_cmd_buffer) {
3369
CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id);
3370
3371
VkCommandBufferBeginInfo cmd_buf_begin_info = {};
3372
cmd_buf_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
3373
cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT;
3374
3375
VkResult err = vkBeginCommandBuffer(command_buffer->vk_command_buffer, &cmd_buf_begin_info);
3376
ERR_FAIL_COND_V_MSG(err, false, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
3377
3378
return true;
3379
}
3380
3381
bool RenderingDeviceDriverVulkan::command_buffer_begin_secondary(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, uint32_t p_subpass, FramebufferID p_framebuffer) {
3382
Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id);
3383
RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id);
3384
CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id);
3385
3386
VkCommandBufferInheritanceInfo inheritance_info = {};
3387
inheritance_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_INHERITANCE_INFO;
3388
inheritance_info.renderPass = render_pass->vk_render_pass;
3389
inheritance_info.subpass = p_subpass;
3390
inheritance_info.framebuffer = framebuffer->vk_framebuffer;
3391
3392
VkCommandBufferBeginInfo cmd_buf_begin_info = {};
3393
cmd_buf_begin_info.sType = VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO;
3394
cmd_buf_begin_info.flags = VK_COMMAND_BUFFER_USAGE_ONE_TIME_SUBMIT_BIT | VK_COMMAND_BUFFER_USAGE_RENDER_PASS_CONTINUE_BIT;
3395
cmd_buf_begin_info.pInheritanceInfo = &inheritance_info;
3396
3397
VkResult err = vkBeginCommandBuffer(command_buffer->vk_command_buffer, &cmd_buf_begin_info);
3398
ERR_FAIL_COND_V_MSG(err, false, "vkBeginCommandBuffer failed with error " + itos(err) + ".");
3399
3400
return true;
3401
}
3402
3403
void RenderingDeviceDriverVulkan::command_buffer_end(CommandBufferID p_cmd_buffer) {
3404
CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id);
3405
vkEndCommandBuffer(command_buffer->vk_command_buffer);
3406
}
3407
3408
void RenderingDeviceDriverVulkan::command_buffer_execute_secondary(CommandBufferID p_cmd_buffer, VectorView<CommandBufferID> p_secondary_cmd_buffers) {
3409
thread_local LocalVector<VkCommandBuffer> secondary_command_buffers;
3410
CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id);
3411
secondary_command_buffers.resize(p_secondary_cmd_buffers.size());
3412
for (uint32_t i = 0; i < p_secondary_cmd_buffers.size(); i++) {
3413
CommandBufferInfo *secondary_command_buffer = (CommandBufferInfo *)(p_secondary_cmd_buffers[i].id);
3414
secondary_command_buffers[i] = secondary_command_buffer->vk_command_buffer;
3415
}
3416
3417
vkCmdExecuteCommands(command_buffer->vk_command_buffer, p_secondary_cmd_buffers.size(), secondary_command_buffers.ptr());
3418
}
3419
3420
/********************/
3421
/**** SWAP CHAIN ****/
3422
/********************/
3423
3424
void RenderingDeviceDriverVulkan::_swap_chain_release(SwapChain *swap_chain) {
3425
// Destroy views and framebuffers associated to the swapchain's images.
3426
for (FramebufferID framebuffer : swap_chain->framebuffers) {
3427
framebuffer_free(framebuffer);
3428
}
3429
3430
for (VkImageView view : swap_chain->image_views) {
3431
vkDestroyImageView(vk_device, view, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW));
3432
}
3433
3434
swap_chain->image_index = UINT_MAX;
3435
swap_chain->images.clear();
3436
swap_chain->image_views.clear();
3437
swap_chain->framebuffers.clear();
3438
3439
if (swap_chain->vk_swapchain != VK_NULL_HANDLE) {
3440
#if defined(SWAPPY_FRAME_PACING_ENABLED)
3441
if (swappy_frame_pacer_enable) {
3442
// Swappy has a bug where the ANativeWindow will be leaked if we call
3443
// SwappyVk_destroySwapchain, so we must release it by hand.
3444
SwappyVk_setWindow(vk_device, swap_chain->vk_swapchain, nullptr);
3445
SwappyVk_destroySwapchain(vk_device, swap_chain->vk_swapchain);
3446
}
3447
#endif
3448
device_functions.DestroySwapchainKHR(vk_device, swap_chain->vk_swapchain, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SWAPCHAIN_KHR));
3449
swap_chain->vk_swapchain = VK_NULL_HANDLE;
3450
}
3451
3452
for (uint32_t i = 0; i < swap_chain->command_queues_acquired.size(); i++) {
3453
_recreate_image_semaphore(swap_chain->command_queues_acquired[i], swap_chain->command_queues_acquired_semaphores[i], false);
3454
}
3455
3456
swap_chain->command_queues_acquired.clear();
3457
swap_chain->command_queues_acquired_semaphores.clear();
3458
3459
for (VkSemaphore semaphore : swap_chain->present_semaphores) {
3460
vkDestroySemaphore(vk_device, semaphore, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE));
3461
}
3462
3463
swap_chain->present_semaphores.clear();
3464
}
3465
3466
RenderingDeviceDriver::SwapChainID RenderingDeviceDriverVulkan::swap_chain_create(RenderingContextDriver::SurfaceID p_surface) {
3467
DEV_ASSERT(p_surface != 0);
3468
3469
RenderingContextDriverVulkan::Surface *surface = (RenderingContextDriverVulkan::Surface *)(p_surface);
3470
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
3471
3472
// Retrieve the formats supported by the surface.
3473
uint32_t format_count = 0;
3474
VkResult err = functions.GetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface->vk_surface, &format_count, nullptr);
3475
ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID());
3476
3477
TightLocalVector<VkSurfaceFormatKHR> formats;
3478
formats.resize(format_count);
3479
err = functions.GetPhysicalDeviceSurfaceFormatsKHR(physical_device, surface->vk_surface, &format_count, formats.ptr());
3480
ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID());
3481
3482
VkFormat format = VK_FORMAT_UNDEFINED;
3483
VkColorSpaceKHR color_space = VK_COLOR_SPACE_SRGB_NONLINEAR_KHR;
3484
if (format_count == 1 && formats[0].format == VK_FORMAT_UNDEFINED) {
3485
// If the format list includes just one entry of VK_FORMAT_UNDEFINED, the surface has no preferred format.
3486
format = VK_FORMAT_B8G8R8A8_UNORM;
3487
color_space = formats[0].colorSpace;
3488
} else if (format_count > 0) {
3489
// Use one of the supported formats, prefer B8G8R8A8_UNORM.
3490
const VkFormat preferred_format = VK_FORMAT_B8G8R8A8_UNORM;
3491
const VkFormat second_format = VK_FORMAT_R8G8B8A8_UNORM;
3492
for (uint32_t i = 0; i < format_count; i++) {
3493
if (formats[i].format == preferred_format || formats[i].format == second_format) {
3494
format = formats[i].format;
3495
if (formats[i].format == preferred_format) {
3496
// This is the preferred format, stop searching.
3497
break;
3498
}
3499
}
3500
}
3501
}
3502
3503
// No formats are supported.
3504
ERR_FAIL_COND_V_MSG(format == VK_FORMAT_UNDEFINED, SwapChainID(), "Surface did not return any valid formats.");
3505
3506
// Create the render pass for the chosen format.
3507
VkAttachmentDescription2KHR attachment = {};
3508
attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
3509
attachment.format = format;
3510
attachment.samples = VK_SAMPLE_COUNT_1_BIT;
3511
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_CLEAR;
3512
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
3513
attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
3514
attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_DONT_CARE;
3515
attachment.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED;
3516
attachment.finalLayout = VK_IMAGE_LAYOUT_PRESENT_SRC_KHR;
3517
3518
VkAttachmentReference2KHR color_reference = {};
3519
color_reference.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
3520
color_reference.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
3521
3522
VkSubpassDescription2KHR subpass = {};
3523
subpass.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
3524
subpass.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
3525
subpass.colorAttachmentCount = 1;
3526
subpass.pColorAttachments = &color_reference;
3527
3528
VkRenderPassCreateInfo2KHR pass_info = {};
3529
pass_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
3530
pass_info.attachmentCount = 1;
3531
pass_info.pAttachments = &attachment;
3532
pass_info.subpassCount = 1;
3533
pass_info.pSubpasses = &subpass;
3534
3535
VkRenderPass vk_render_pass = VK_NULL_HANDLE;
3536
err = _create_render_pass(vk_device, &pass_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &vk_render_pass);
3537
ERR_FAIL_COND_V(err != VK_SUCCESS, SwapChainID());
3538
3539
RenderPassInfo *render_pass_info = VersatileResource::allocate<RenderPassInfo>(resources_allocator);
3540
render_pass_info->vk_render_pass = vk_render_pass;
3541
3542
SwapChain *swap_chain = memnew(SwapChain);
3543
swap_chain->surface = p_surface;
3544
swap_chain->format = format;
3545
swap_chain->color_space = color_space;
3546
swap_chain->render_pass = RenderPassID(render_pass_info);
3547
return SwapChainID(swap_chain);
3548
}
3549
3550
Error RenderingDeviceDriverVulkan::swap_chain_resize(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, uint32_t p_desired_framebuffer_count) {
3551
DEV_ASSERT(p_cmd_queue.id != 0);
3552
DEV_ASSERT(p_swap_chain.id != 0);
3553
3554
CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id);
3555
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
3556
3557
// Release all current contents of the swap chain.
3558
_swap_chain_release(swap_chain);
3559
3560
// Validate if the command queue being used supports creating the swap chain for this surface.
3561
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
3562
if (!context_driver->queue_family_supports_present(physical_device, command_queue->queue_family, swap_chain->surface)) {
3563
ERR_FAIL_V_MSG(ERR_CANT_CREATE, "Surface is not supported by device. Did the GPU go offline? Was the window created on another monitor? Check"
3564
"previous errors & try launching with --gpu-validation.");
3565
}
3566
3567
// Retrieve the surface's capabilities.
3568
RenderingContextDriverVulkan::Surface *surface = (RenderingContextDriverVulkan::Surface *)(swap_chain->surface);
3569
VkSurfaceCapabilitiesKHR surface_capabilities = {};
3570
VkResult err = functions.GetPhysicalDeviceSurfaceCapabilitiesKHR(physical_device, surface->vk_surface, &surface_capabilities);
3571
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
3572
3573
// No swapchain yet, this is the first time we're creating it.
3574
if (!swap_chain->vk_swapchain) {
3575
if (surface_capabilities.currentExtent.width == 0xFFFFFFFF) {
3576
// The current extent is currently undefined, so the current surface width and height will be clamped to the surface's capabilities.
3577
// We make sure to overwrite surface_capabilities.currentExtent.width so that the same check further below
3578
// does not set extent.width = CLAMP( surface->width, ... ) on the first run of this function, because
3579
// that'd be potentially unswapped.
3580
surface_capabilities.currentExtent.width = CLAMP(surface->width, surface_capabilities.minImageExtent.width, surface_capabilities.maxImageExtent.width);
3581
surface_capabilities.currentExtent.height = CLAMP(surface->height, surface_capabilities.minImageExtent.height, surface_capabilities.maxImageExtent.height);
3582
}
3583
3584
// We must SWAP() only once otherwise we'll keep ping-ponging between
3585
// the right and wrong resolutions after multiple calls to swap_chain_resize().
3586
if (surface_capabilities.currentTransform & VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR ||
3587
surface_capabilities.currentTransform & VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR) {
3588
// Swap to get identity width and height.
3589
SWAP(surface_capabilities.currentExtent.width, surface_capabilities.currentExtent.height);
3590
}
3591
}
3592
3593
VkExtent2D extent;
3594
if (surface_capabilities.currentExtent.width == 0xFFFFFFFF) {
3595
// The current extent is currently undefined, so the current surface width and height will be clamped to the surface's capabilities.
3596
// We can only be here on the second call to swap_chain_resize(), by which time surface->width & surface->height should already be swapped if needed.
3597
extent.width = CLAMP(surface->width, surface_capabilities.minImageExtent.width, surface_capabilities.maxImageExtent.width);
3598
extent.height = CLAMP(surface->height, surface_capabilities.minImageExtent.height, surface_capabilities.maxImageExtent.height);
3599
} else {
3600
// Grab the dimensions from the current extent.
3601
extent = surface_capabilities.currentExtent;
3602
surface->width = extent.width;
3603
surface->height = extent.height;
3604
}
3605
3606
if (surface->width == 0 || surface->height == 0) {
3607
// The surface doesn't have valid dimensions, so we can't create a swap chain.
3608
return ERR_SKIP;
3609
}
3610
3611
// Find what present modes are supported.
3612
TightLocalVector<VkPresentModeKHR> present_modes;
3613
uint32_t present_modes_count = 0;
3614
err = functions.GetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface->vk_surface, &present_modes_count, nullptr);
3615
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
3616
3617
present_modes.resize(present_modes_count);
3618
err = functions.GetPhysicalDeviceSurfacePresentModesKHR(physical_device, surface->vk_surface, &present_modes_count, present_modes.ptr());
3619
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
3620
3621
// Choose the present mode based on the display server setting.
3622
VkPresentModeKHR present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_KHR;
3623
String present_mode_name = "Enabled";
3624
switch (surface->vsync_mode) {
3625
case DisplayServer::VSYNC_MAILBOX:
3626
present_mode = VK_PRESENT_MODE_MAILBOX_KHR;
3627
present_mode_name = "Mailbox";
3628
break;
3629
case DisplayServer::VSYNC_ADAPTIVE:
3630
present_mode = VK_PRESENT_MODE_FIFO_RELAXED_KHR;
3631
present_mode_name = "Adaptive";
3632
break;
3633
case DisplayServer::VSYNC_ENABLED:
3634
present_mode = VK_PRESENT_MODE_FIFO_KHR;
3635
present_mode_name = "Enabled";
3636
break;
3637
case DisplayServer::VSYNC_DISABLED:
3638
present_mode = VK_PRESENT_MODE_IMMEDIATE_KHR;
3639
present_mode_name = "Disabled";
3640
break;
3641
}
3642
3643
bool present_mode_available = present_modes.has(present_mode);
3644
if (!present_mode_available) {
3645
// Present mode is not available, fall back to FIFO which is guaranteed to be supported.
3646
WARN_PRINT(vformat("The requested V-Sync mode %s is not available. Falling back to V-Sync mode Enabled.", present_mode_name));
3647
surface->vsync_mode = DisplayServer::VSYNC_ENABLED;
3648
present_mode = VkPresentModeKHR::VK_PRESENT_MODE_FIFO_KHR;
3649
}
3650
3651
// Clamp the desired image count to the surface's capabilities.
3652
uint32_t desired_swapchain_images = MAX(p_desired_framebuffer_count, surface_capabilities.minImageCount);
3653
if (surface_capabilities.maxImageCount > 0) {
3654
// Only clamp to the max image count if it's defined. A max image count of 0 means there's no upper limit to the amount of images.
3655
desired_swapchain_images = MIN(desired_swapchain_images, surface_capabilities.maxImageCount);
3656
}
3657
3658
// Refer to the comment in command_queue_present() for more details.
3659
VkSurfaceTransformFlagBitsKHR surface_transform_bits = surface_capabilities.currentTransform;
3660
3661
VkCompositeAlphaFlagBitsKHR composite_alpha = VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR;
3662
if (OS::get_singleton()->is_layered_allowed() || !(surface_capabilities.supportedCompositeAlpha & composite_alpha)) {
3663
// Find a supported composite alpha mode - one of these is guaranteed to be set.
3664
VkCompositeAlphaFlagBitsKHR composite_alpha_flags[4] = {
3665
VK_COMPOSITE_ALPHA_PRE_MULTIPLIED_BIT_KHR,
3666
VK_COMPOSITE_ALPHA_POST_MULTIPLIED_BIT_KHR,
3667
VK_COMPOSITE_ALPHA_INHERIT_BIT_KHR,
3668
VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR
3669
};
3670
3671
for (uint32_t i = 0; i < ARRAY_SIZE(composite_alpha_flags); i++) {
3672
if (surface_capabilities.supportedCompositeAlpha & composite_alpha_flags[i]) {
3673
composite_alpha = composite_alpha_flags[i];
3674
break;
3675
}
3676
}
3677
has_comp_alpha[(uint64_t)p_cmd_queue.id] = (composite_alpha != VK_COMPOSITE_ALPHA_OPAQUE_BIT_KHR);
3678
}
3679
3680
VkSwapchainCreateInfoKHR swap_create_info = {};
3681
swap_create_info.sType = VK_STRUCTURE_TYPE_SWAPCHAIN_CREATE_INFO_KHR;
3682
swap_create_info.surface = surface->vk_surface;
3683
swap_create_info.minImageCount = desired_swapchain_images;
3684
swap_create_info.imageFormat = swap_chain->format;
3685
swap_create_info.imageColorSpace = swap_chain->color_space;
3686
swap_create_info.imageExtent = extent;
3687
swap_create_info.imageArrayLayers = 1;
3688
swap_create_info.imageUsage = VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT;
3689
swap_create_info.imageSharingMode = VK_SHARING_MODE_EXCLUSIVE;
3690
swap_create_info.preTransform = surface_transform_bits;
3691
switch (swap_create_info.preTransform) {
3692
case VK_SURFACE_TRANSFORM_IDENTITY_BIT_KHR:
3693
swap_chain->pre_transform_rotation_degrees = 0;
3694
break;
3695
case VK_SURFACE_TRANSFORM_ROTATE_90_BIT_KHR:
3696
swap_chain->pre_transform_rotation_degrees = 90;
3697
break;
3698
case VK_SURFACE_TRANSFORM_ROTATE_180_BIT_KHR:
3699
swap_chain->pre_transform_rotation_degrees = 180;
3700
break;
3701
case VK_SURFACE_TRANSFORM_ROTATE_270_BIT_KHR:
3702
swap_chain->pre_transform_rotation_degrees = 270;
3703
break;
3704
default:
3705
WARN_PRINT("Unexpected swap_create_info.preTransform = " + itos(swap_create_info.preTransform) + ".");
3706
swap_chain->pre_transform_rotation_degrees = 0;
3707
break;
3708
}
3709
swap_create_info.compositeAlpha = composite_alpha;
3710
swap_create_info.presentMode = present_mode;
3711
swap_create_info.clipped = true;
3712
err = device_functions.CreateSwapchainKHR(vk_device, &swap_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SWAPCHAIN_KHR), &swap_chain->vk_swapchain);
3713
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
3714
3715
#if defined(SWAPPY_FRAME_PACING_ENABLED)
3716
if (swappy_frame_pacer_enable) {
3717
SwappyVk_initAndGetRefreshCycleDuration(get_jni_env(), static_cast<OS_Android *>(OS::get_singleton())->get_godot_java()->get_activity(), physical_device,
3718
vk_device, swap_chain->vk_swapchain, &swap_chain->refresh_duration);
3719
SwappyVk_setWindow(vk_device, swap_chain->vk_swapchain, static_cast<OS_Android *>(OS::get_singleton())->get_native_window());
3720
SwappyVk_setSwapIntervalNS(vk_device, swap_chain->vk_swapchain, swap_chain->refresh_duration);
3721
3722
enum SwappyModes {
3723
PIPELINE_FORCED_ON,
3724
AUTO_FPS_PIPELINE_FORCED_ON,
3725
AUTO_FPS_AUTO_PIPELINE,
3726
};
3727
3728
switch (swappy_mode) {
3729
case PIPELINE_FORCED_ON:
3730
SwappyVk_setAutoSwapInterval(true);
3731
SwappyVk_setAutoPipelineMode(true);
3732
break;
3733
case AUTO_FPS_PIPELINE_FORCED_ON:
3734
SwappyVk_setAutoSwapInterval(true);
3735
SwappyVk_setAutoPipelineMode(false);
3736
break;
3737
case AUTO_FPS_AUTO_PIPELINE:
3738
SwappyVk_setAutoSwapInterval(false);
3739
SwappyVk_setAutoPipelineMode(false);
3740
break;
3741
}
3742
}
3743
#endif
3744
3745
uint32_t image_count = 0;
3746
err = device_functions.GetSwapchainImagesKHR(vk_device, swap_chain->vk_swapchain, &image_count, nullptr);
3747
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
3748
3749
swap_chain->images.resize(image_count);
3750
err = device_functions.GetSwapchainImagesKHR(vk_device, swap_chain->vk_swapchain, &image_count, swap_chain->images.ptr());
3751
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
3752
3753
VkImageViewCreateInfo view_create_info = {};
3754
view_create_info.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO;
3755
view_create_info.viewType = VK_IMAGE_VIEW_TYPE_2D;
3756
view_create_info.format = swap_chain->format;
3757
view_create_info.components.r = VK_COMPONENT_SWIZZLE_R;
3758
view_create_info.components.g = VK_COMPONENT_SWIZZLE_G;
3759
view_create_info.components.b = VK_COMPONENT_SWIZZLE_B;
3760
view_create_info.components.a = VK_COMPONENT_SWIZZLE_A;
3761
view_create_info.subresourceRange.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3762
view_create_info.subresourceRange.levelCount = 1;
3763
view_create_info.subresourceRange.layerCount = 1;
3764
3765
swap_chain->image_views.reserve(image_count);
3766
3767
VkImageView image_view;
3768
for (uint32_t i = 0; i < image_count; i++) {
3769
view_create_info.image = swap_chain->images[i];
3770
err = vkCreateImageView(vk_device, &view_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_IMAGE_VIEW), &image_view);
3771
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
3772
3773
swap_chain->image_views.push_back(image_view);
3774
}
3775
3776
swap_chain->framebuffers.reserve(image_count);
3777
3778
const RenderPassInfo *render_pass = (const RenderPassInfo *)(swap_chain->render_pass.id);
3779
VkFramebufferCreateInfo fb_create_info = {};
3780
fb_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
3781
fb_create_info.renderPass = render_pass->vk_render_pass;
3782
fb_create_info.attachmentCount = 1;
3783
fb_create_info.width = surface->width;
3784
fb_create_info.height = surface->height;
3785
fb_create_info.layers = 1;
3786
3787
VkFramebuffer vk_framebuffer;
3788
for (uint32_t i = 0; i < image_count; i++) {
3789
fb_create_info.pAttachments = &swap_chain->image_views[i];
3790
err = vkCreateFramebuffer(vk_device, &fb_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &vk_framebuffer);
3791
ERR_FAIL_COND_V(err != VK_SUCCESS, ERR_CANT_CREATE);
3792
3793
Framebuffer *framebuffer = memnew(Framebuffer);
3794
framebuffer->vk_framebuffer = vk_framebuffer;
3795
framebuffer->swap_chain_image = swap_chain->images[i];
3796
framebuffer->swap_chain_image_subresource_range = view_create_info.subresourceRange;
3797
swap_chain->framebuffers.push_back(RDD::FramebufferID(framebuffer));
3798
}
3799
3800
VkSemaphore vk_semaphore = VK_NULL_HANDLE;
3801
for (uint32_t i = 0; i < image_count; i++) {
3802
VkSemaphoreCreateInfo create_info = {};
3803
create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
3804
3805
err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &vk_semaphore);
3806
ERR_FAIL_COND_V(err != VK_SUCCESS, FAILED);
3807
3808
swap_chain->present_semaphores.push_back(vk_semaphore);
3809
}
3810
3811
// Once everything's been created correctly, indicate the surface no longer needs to be resized.
3812
context_driver->surface_set_needs_resize(swap_chain->surface, false);
3813
3814
return OK;
3815
}
3816
3817
RDD::FramebufferID RenderingDeviceDriverVulkan::swap_chain_acquire_framebuffer(CommandQueueID p_cmd_queue, SwapChainID p_swap_chain, bool &r_resize_required) {
3818
DEV_ASSERT(p_cmd_queue);
3819
DEV_ASSERT(p_swap_chain);
3820
3821
CommandQueue *command_queue = (CommandQueue *)(p_cmd_queue.id);
3822
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
3823
if ((swap_chain->vk_swapchain == VK_NULL_HANDLE) || context_driver->surface_get_needs_resize(swap_chain->surface)) {
3824
// The surface does not have a valid swap chain or it indicates it requires a resize.
3825
r_resize_required = true;
3826
return FramebufferID();
3827
}
3828
3829
VkResult err;
3830
VkSemaphore semaphore = VK_NULL_HANDLE;
3831
uint32_t semaphore_index = 0;
3832
if (command_queue->free_image_semaphores.is_empty()) {
3833
// Add a new semaphore if none are free.
3834
VkSemaphoreCreateInfo create_info = {};
3835
create_info.sType = VK_STRUCTURE_TYPE_SEMAPHORE_CREATE_INFO;
3836
err = vkCreateSemaphore(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SEMAPHORE), &semaphore);
3837
ERR_FAIL_COND_V(err != VK_SUCCESS, FramebufferID());
3838
3839
semaphore_index = command_queue->image_semaphores.size();
3840
command_queue->image_semaphores.push_back(semaphore);
3841
command_queue->image_semaphores_swap_chains.push_back(swap_chain);
3842
} else {
3843
// Pick a free semaphore.
3844
uint32_t free_index = command_queue->free_image_semaphores.size() - 1;
3845
semaphore_index = command_queue->free_image_semaphores[free_index];
3846
command_queue->image_semaphores_swap_chains[semaphore_index] = swap_chain;
3847
command_queue->free_image_semaphores.remove_at(free_index);
3848
semaphore = command_queue->image_semaphores[semaphore_index];
3849
}
3850
3851
// Store in the swap chain the acquired semaphore.
3852
swap_chain->command_queues_acquired.push_back(command_queue);
3853
swap_chain->command_queues_acquired_semaphores.push_back(semaphore_index);
3854
3855
err = device_functions.AcquireNextImageKHR(vk_device, swap_chain->vk_swapchain, UINT64_MAX, semaphore, VK_NULL_HANDLE, &swap_chain->image_index);
3856
if (err == VK_ERROR_OUT_OF_DATE_KHR) {
3857
// Out of date leaves the semaphore in a signaled state that will never finish, so it's necessary to recreate it.
3858
bool semaphore_recreated = _recreate_image_semaphore(command_queue, semaphore_index, true);
3859
ERR_FAIL_COND_V(!semaphore_recreated, FramebufferID());
3860
3861
// Swap chain is out of date and must be recreated.
3862
r_resize_required = true;
3863
return FramebufferID();
3864
} else if (err != VK_SUCCESS && err != VK_SUBOPTIMAL_KHR) {
3865
// Swap chain failed to present but the reason is unknown.
3866
// Refer to the comment in command_queue_present() as to why VK_SUBOPTIMAL_KHR is handled the same as VK_SUCCESS.
3867
return FramebufferID();
3868
}
3869
3870
// Indicate the command queue should wait on these semaphores on the next submission and that it should
3871
// indicate they're free again on the next fence.
3872
command_queue->pending_semaphores_for_execute.push_back(semaphore_index);
3873
command_queue->pending_semaphores_for_fence.push_back(semaphore_index);
3874
3875
// Return the corresponding framebuffer to the new current image.
3876
FramebufferID framebuffer_id = swap_chain->framebuffers[swap_chain->image_index];
3877
Framebuffer *framebuffer = (Framebuffer *)(framebuffer_id.id);
3878
framebuffer->swap_chain_acquired = true;
3879
return framebuffer_id;
3880
}
3881
3882
RDD::RenderPassID RenderingDeviceDriverVulkan::swap_chain_get_render_pass(SwapChainID p_swap_chain) {
3883
DEV_ASSERT(p_swap_chain.id != 0);
3884
3885
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
3886
return swap_chain->render_pass;
3887
}
3888
3889
int RenderingDeviceDriverVulkan::swap_chain_get_pre_rotation_degrees(SwapChainID p_swap_chain) {
3890
DEV_ASSERT(p_swap_chain.id != 0);
3891
3892
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
3893
return swap_chain->pre_transform_rotation_degrees;
3894
}
3895
3896
RDD::DataFormat RenderingDeviceDriverVulkan::swap_chain_get_format(SwapChainID p_swap_chain) {
3897
DEV_ASSERT(p_swap_chain.id != 0);
3898
3899
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
3900
switch (swap_chain->format) {
3901
case VK_FORMAT_B8G8R8A8_UNORM:
3902
return DATA_FORMAT_B8G8R8A8_UNORM;
3903
case VK_FORMAT_R8G8B8A8_UNORM:
3904
return DATA_FORMAT_R8G8B8A8_UNORM;
3905
default:
3906
DEV_ASSERT(false && "Unknown swap chain format.");
3907
return DATA_FORMAT_MAX;
3908
}
3909
}
3910
3911
void RenderingDeviceDriverVulkan::swap_chain_set_max_fps(SwapChainID p_swap_chain, int p_max_fps) {
3912
DEV_ASSERT(p_swap_chain.id != 0);
3913
3914
#ifdef SWAPPY_FRAME_PACING_ENABLED
3915
if (!swappy_frame_pacer_enable) {
3916
return;
3917
}
3918
3919
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
3920
if (swap_chain->vk_swapchain != VK_NULL_HANDLE) {
3921
const uint64_t max_time = p_max_fps > 0 ? uint64_t((1000.0 * 1000.0 * 1000.0) / p_max_fps) : 0;
3922
SwappyVk_setSwapIntervalNS(vk_device, swap_chain->vk_swapchain, MAX(swap_chain->refresh_duration, max_time));
3923
}
3924
#endif
3925
}
3926
3927
void RenderingDeviceDriverVulkan::swap_chain_free(SwapChainID p_swap_chain) {
3928
DEV_ASSERT(p_swap_chain.id != 0);
3929
3930
SwapChain *swap_chain = (SwapChain *)(p_swap_chain.id);
3931
_swap_chain_release(swap_chain);
3932
3933
if (swap_chain->render_pass) {
3934
render_pass_free(swap_chain->render_pass);
3935
}
3936
3937
memdelete(swap_chain);
3938
}
3939
3940
/*********************/
3941
/**** FRAMEBUFFER ****/
3942
/*********************/
3943
3944
RDD::FramebufferID RenderingDeviceDriverVulkan::framebuffer_create(RenderPassID p_render_pass, VectorView<TextureID> p_attachments, uint32_t p_width, uint32_t p_height) {
3945
RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id);
3946
3947
uint32_t fragment_density_map_offsets_layers = 0;
3948
VkImageView *vk_img_views = ALLOCA_ARRAY(VkImageView, p_attachments.size());
3949
for (uint32_t i = 0; i < p_attachments.size(); i++) {
3950
const TextureInfo *texture = (const TextureInfo *)p_attachments[i].id;
3951
vk_img_views[i] = texture->vk_view;
3952
}
3953
3954
VkFramebufferCreateInfo framebuffer_create_info = {};
3955
framebuffer_create_info.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO;
3956
framebuffer_create_info.renderPass = render_pass->vk_render_pass;
3957
framebuffer_create_info.attachmentCount = p_attachments.size();
3958
framebuffer_create_info.pAttachments = vk_img_views;
3959
framebuffer_create_info.width = p_width;
3960
framebuffer_create_info.height = p_height;
3961
framebuffer_create_info.layers = 1;
3962
3963
VkFramebuffer vk_framebuffer = VK_NULL_HANDLE;
3964
VkResult err = vkCreateFramebuffer(vk_device, &framebuffer_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER), &vk_framebuffer);
3965
ERR_FAIL_COND_V_MSG(err, FramebufferID(), "vkCreateFramebuffer failed with error " + itos(err) + ".");
3966
3967
#if PRINT_NATIVE_COMMANDS
3968
print_line(vformat("vkCreateFramebuffer 0x%uX with %d attachments", uint64_t(vk_framebuffer), p_attachments.size()));
3969
for (uint32_t i = 0; i < p_attachments.size(); i++) {
3970
const TextureInfo *attachment_info = (const TextureInfo *)p_attachments[i].id;
3971
print_line(vformat(" Attachment #%d: IMAGE 0x%uX VIEW 0x%uX", i, uint64_t(attachment_info->vk_view_create_info.image), uint64_t(attachment_info->vk_view)));
3972
}
3973
#endif
3974
3975
Framebuffer *framebuffer = memnew(Framebuffer);
3976
framebuffer->vk_framebuffer = vk_framebuffer;
3977
framebuffer->fragment_density_map_offsets_layers = fragment_density_map_offsets_layers;
3978
return FramebufferID(framebuffer);
3979
}
3980
3981
void RenderingDeviceDriverVulkan::framebuffer_free(FramebufferID p_framebuffer) {
3982
Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id);
3983
vkDestroyFramebuffer(vk_device, framebuffer->vk_framebuffer, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_FRAMEBUFFER));
3984
memdelete(framebuffer);
3985
}
3986
3987
/****************/
3988
/**** SHADER ****/
3989
/****************/
3990
3991
static VkShaderStageFlagBits RD_STAGE_TO_VK_SHADER_STAGE_BITS[RDD::SHADER_STAGE_MAX] = {
3992
VK_SHADER_STAGE_VERTEX_BIT,
3993
VK_SHADER_STAGE_FRAGMENT_BIT,
3994
VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,
3995
VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
3996
VK_SHADER_STAGE_COMPUTE_BIT,
3997
VK_SHADER_STAGE_RAYGEN_BIT_KHR,
3998
VK_SHADER_STAGE_ANY_HIT_BIT_KHR,
3999
VK_SHADER_STAGE_CLOSEST_HIT_BIT_KHR,
4000
VK_SHADER_STAGE_MISS_BIT_KHR,
4001
VK_SHADER_STAGE_INTERSECTION_BIT_KHR,
4002
};
4003
4004
RDD::ShaderID RenderingDeviceDriverVulkan::shader_create_from_container(const Ref<RenderingShaderContainer> &p_shader_container, const Vector<ImmutableSampler> &p_immutable_samplers) {
4005
ShaderReflection shader_refl = p_shader_container->get_shader_reflection();
4006
ShaderInfo shader_info;
4007
shader_info.name = p_shader_container->shader_name.get_data();
4008
4009
for (uint32_t i = 0; i < SHADER_STAGE_MAX; i++) {
4010
if (shader_refl.push_constant_stages.has_flag((ShaderStage)(1 << i))) {
4011
shader_info.vk_push_constant_stages |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[i];
4012
}
4013
}
4014
4015
// Set bindings.
4016
Vector<Vector<VkDescriptorSetLayoutBinding>> vk_set_bindings;
4017
vk_set_bindings.resize(shader_refl.uniform_sets.size());
4018
for (uint32_t i = 0; i < shader_refl.uniform_sets.size(); i++) {
4019
for (uint32_t j = 0; j < shader_refl.uniform_sets[i].size(); j++) {
4020
const ShaderUniform &uniform = shader_refl.uniform_sets[i][j];
4021
VkDescriptorSetLayoutBinding layout_binding = {};
4022
layout_binding.binding = uniform.binding;
4023
layout_binding.descriptorCount = 1;
4024
for (uint32_t k = 0; k < SHADER_STAGE_MAX; k++) {
4025
if ((uniform.stages.has_flag(ShaderStage(1U << k)))) {
4026
layout_binding.stageFlags |= RD_STAGE_TO_VK_SHADER_STAGE_BITS[k];
4027
}
4028
}
4029
4030
switch (uniform.type) {
4031
case UNIFORM_TYPE_SAMPLER: {
4032
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
4033
layout_binding.descriptorCount = uniform.length;
4034
// Immutable samplers: here they get set in the layoutbinding, given that they will not be changed later.
4035
int immutable_bind_index = -1;
4036
if (immutable_samplers_enabled && p_immutable_samplers.size() > 0) {
4037
for (int k = 0; k < p_immutable_samplers.size(); k++) {
4038
if (p_immutable_samplers[k].binding == layout_binding.binding) {
4039
immutable_bind_index = k;
4040
break;
4041
}
4042
}
4043
if (immutable_bind_index >= 0) {
4044
layout_binding.pImmutableSamplers = (VkSampler *)&p_immutable_samplers[immutable_bind_index].ids[0].id;
4045
}
4046
}
4047
} break;
4048
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
4049
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
4050
layout_binding.descriptorCount = uniform.length;
4051
} break;
4052
case UNIFORM_TYPE_TEXTURE: {
4053
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
4054
layout_binding.descriptorCount = uniform.length;
4055
} break;
4056
case UNIFORM_TYPE_IMAGE: {
4057
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
4058
layout_binding.descriptorCount = uniform.length;
4059
} break;
4060
case UNIFORM_TYPE_TEXTURE_BUFFER: {
4061
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
4062
layout_binding.descriptorCount = uniform.length;
4063
} break;
4064
case UNIFORM_TYPE_IMAGE_BUFFER: {
4065
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
4066
} break;
4067
case UNIFORM_TYPE_UNIFORM_BUFFER: {
4068
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4069
} break;
4070
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
4071
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
4072
} break;
4073
case UNIFORM_TYPE_STORAGE_BUFFER: {
4074
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4075
} break;
4076
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
4077
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
4078
} break;
4079
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
4080
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
4081
} break;
4082
case UNIFORM_TYPE_ACCELERATION_STRUCTURE: {
4083
layout_binding.descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
4084
} break;
4085
default: {
4086
DEV_ASSERT(false);
4087
}
4088
}
4089
4090
vk_set_bindings.write[i].push_back(layout_binding);
4091
}
4092
}
4093
4094
// Modules.
4095
VkResult res;
4096
String error_text;
4097
Vector<uint8_t> decompressed_code;
4098
VkShaderModule vk_module;
4099
PackedByteArray decoded_spirv;
4100
const bool use_respv = RESPV_ENABLED && !shader_container_format.get_debug_info_enabled();
4101
const bool store_respv = use_respv && !shader_refl.specialization_constants.is_empty();
4102
const int64_t stage_count = shader_refl.stages_vector.size();
4103
shader_info.vk_stages_create_info.reserve(stage_count);
4104
shader_info.original_stage_size.reserve(stage_count);
4105
4106
#if RECORD_PIPELINE_STATISTICS
4107
shader_info.spirv_stage_bytes.reserve(stage_count);
4108
#endif
4109
4110
if (store_respv) {
4111
shader_info.respv_stage_shaders.reserve(stage_count);
4112
}
4113
4114
// AnyHit and ClosestHit go in the same group.
4115
uint32_t hit_group_index = UINT32_MAX;
4116
4117
for (int i = 0; i < stage_count; i++) {
4118
const RenderingShaderContainer::Shader &shader = p_shader_container->shaders[i];
4119
bool requires_decompression = (shader.code_decompressed_size > 0);
4120
if (requires_decompression) {
4121
decompressed_code.resize(shader.code_decompressed_size);
4122
bool decompressed = p_shader_container->decompress_code(shader.code_compressed_bytes.ptr(), shader.code_compressed_bytes.size(), shader.code_compression_flags, decompressed_code.ptrw(), decompressed_code.size());
4123
if (!decompressed) {
4124
error_text = vformat("Failed to decompress code on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]));
4125
break;
4126
}
4127
}
4128
4129
const uint8_t *smolv_input = requires_decompression ? decompressed_code.ptr() : shader.code_compressed_bytes.ptr();
4130
uint32_t smolv_input_size = requires_decompression ? decompressed_code.size() : shader.code_compressed_bytes.size();
4131
if (shader.code_compression_flags & RenderingShaderContainerVulkan::COMPRESSION_FLAG_SMOLV) {
4132
decoded_spirv.resize(smolv::GetDecodedBufferSize(smolv_input, smolv_input_size));
4133
if (decoded_spirv.is_empty()) {
4134
error_text = vformat("Malformed smolv input on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]));
4135
break;
4136
}
4137
4138
if (!smolv::Decode(smolv_input, smolv_input_size, decoded_spirv.ptrw(), decoded_spirv.size())) {
4139
error_text = vformat("Malformed smolv input on shader stage %s.", String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]));
4140
break;
4141
}
4142
} else {
4143
decoded_spirv.resize(smolv_input_size);
4144
memcpy(decoded_spirv.ptrw(), smolv_input, decoded_spirv.size());
4145
}
4146
4147
shader_info.original_stage_size.push_back(decoded_spirv.size());
4148
4149
if (use_respv) {
4150
const bool inline_data = store_respv || !RESPV_ONLY_INLINE_SHADERS_WITH_SPEC_CONSTANTS;
4151
respv::Shader respv_shader(decoded_spirv.ptr(), decoded_spirv.size(), inline_data);
4152
if (respv_shader.empty()) {
4153
#if RESPV_VERBOSE
4154
print_line("re-spirv failed to parse the shader, skipping optimization.");
4155
#endif
4156
if (store_respv) {
4157
shader_info.respv_stage_shaders.push_back(respv::Shader());
4158
}
4159
} else if (store_respv) {
4160
shader_info.respv_stage_shaders.push_back(respv_shader);
4161
} else {
4162
std::vector<uint8_t> respv_optimized_data;
4163
if (respv::Optimizer::run(respv_shader, nullptr, 0, respv_optimized_data)) {
4164
#if RESPV_VERBOSE
4165
print_line(vformat("re-spirv transformed the shader from %d bytes to %d bytes.", decoded_spirv.size(), respv_optimized_data.size()));
4166
#endif
4167
decoded_spirv.resize(respv_optimized_data.size());
4168
memcpy(decoded_spirv.ptrw(), respv_optimized_data.data(), respv_optimized_data.size());
4169
} else {
4170
#if RESPV_VERBOSE
4171
print_line("re-spirv failed to optimize the shader.");
4172
#endif
4173
}
4174
}
4175
}
4176
4177
#if RECORD_PIPELINE_STATISTICS
4178
shader_info.spirv_stage_bytes.push_back(decoded_spirv);
4179
#endif
4180
4181
VkShaderModuleCreateInfo shader_module_create_info = {};
4182
shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
4183
shader_module_create_info.codeSize = decoded_spirv.size();
4184
shader_module_create_info.pCode = (const uint32_t *)(decoded_spirv.ptr());
4185
4186
res = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &vk_module);
4187
if (res != VK_SUCCESS) {
4188
error_text = vformat("Error (%d) creating module for shader stage %s.", res, String(SHADER_STAGE_NAMES[shader_refl.stages_vector[i]]));
4189
break;
4190
}
4191
4192
VkPipelineShaderStageCreateInfo create_info = {};
4193
create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;
4194
create_info.stage = RD_STAGE_TO_VK_SHADER_STAGE_BITS[shader_refl.stages_vector[i]];
4195
create_info.module = vk_module;
4196
create_info.pName = "main";
4197
shader_info.vk_stages_create_info.push_back(create_info);
4198
4199
ShaderStage stage = shader_refl.stages_vector[i];
4200
4201
if (stage == ShaderStage::SHADER_STAGE_RAYGEN || stage == ShaderStage::SHADER_STAGE_MISS) {
4202
VkRayTracingShaderGroupCreateInfoKHR group_info = {};
4203
group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR;
4204
group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR;
4205
group_info.anyHitShader = VK_SHADER_UNUSED_KHR;
4206
group_info.closestHitShader = VK_SHADER_UNUSED_KHR;
4207
group_info.intersectionShader = VK_SHADER_UNUSED_KHR;
4208
group_info.generalShader = i;
4209
4210
shader_info.vk_groups_create_info.push_back(group_info);
4211
}
4212
if (stage == ShaderStage::SHADER_STAGE_ANY_HIT || stage == ShaderStage::SHADER_STAGE_CLOSEST_HIT) {
4213
if (hit_group_index == UINT32_MAX) {
4214
VkRayTracingShaderGroupCreateInfoKHR group_info = {};
4215
group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR;
4216
group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR;
4217
group_info.anyHitShader = VK_SHADER_UNUSED_KHR;
4218
group_info.closestHitShader = VK_SHADER_UNUSED_KHR;
4219
group_info.intersectionShader = VK_SHADER_UNUSED_KHR;
4220
group_info.generalShader = VK_SHADER_UNUSED_KHR;
4221
4222
hit_group_index = shader_info.vk_groups_create_info.size();
4223
shader_info.vk_groups_create_info.push_back(group_info);
4224
}
4225
4226
VkRayTracingShaderGroupCreateInfoKHR &group_info = shader_info.vk_groups_create_info[hit_group_index];
4227
if (stage == ShaderStage::SHADER_STAGE_ANY_HIT) {
4228
group_info.anyHitShader = i;
4229
} else if (stage == ShaderStage::SHADER_STAGE_CLOSEST_HIT) {
4230
group_info.closestHitShader = i;
4231
}
4232
}
4233
if (stage == ShaderStage::SHADER_STAGE_INTERSECTION) {
4234
VkRayTracingShaderGroupCreateInfoKHR group_info = {};
4235
group_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_SHADER_GROUP_CREATE_INFO_KHR;
4236
group_info.type = VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR;
4237
group_info.anyHitShader = VK_SHADER_UNUSED_KHR;
4238
group_info.closestHitShader = VK_SHADER_UNUSED_KHR;
4239
group_info.intersectionShader = i;
4240
group_info.generalShader = VK_SHADER_UNUSED_KHR;
4241
4242
shader_info.vk_groups_create_info.push_back(group_info);
4243
}
4244
}
4245
4246
// Descriptor sets.
4247
if (error_text.is_empty()) {
4248
// For Adreno 5XX driver bug.
4249
VkDescriptorSetLayoutBinding placeholder_binding = {};
4250
placeholder_binding.binding = 0;
4251
placeholder_binding.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4252
placeholder_binding.descriptorCount = 1;
4253
placeholder_binding.stageFlags = VK_SHADER_STAGE_ALL;
4254
4255
for (uint32_t i = 0; i < shader_refl.uniform_sets.size(); i++) {
4256
// Empty ones are fine if they were not used according to spec (binding count will be 0).
4257
VkDescriptorSetLayoutCreateInfo layout_create_info = {};
4258
layout_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO;
4259
layout_create_info.bindingCount = vk_set_bindings[i].size();
4260
layout_create_info.pBindings = vk_set_bindings[i].ptr();
4261
4262
// ...not so fine on Adreno 5XX.
4263
if (adreno_5xx_empty_descriptor_set_layout_workaround && layout_create_info.bindingCount == 0) {
4264
layout_create_info.bindingCount = 1;
4265
layout_create_info.pBindings = &placeholder_binding;
4266
}
4267
4268
VkDescriptorSetLayout layout = VK_NULL_HANDLE;
4269
res = vkCreateDescriptorSetLayout(vk_device, &layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT), &layout);
4270
if (res) {
4271
error_text = vformat("Error (%d) creating descriptor set layout for set %d.", res, i);
4272
break;
4273
}
4274
4275
shader_info.vk_descriptor_set_layouts.push_back(layout);
4276
}
4277
}
4278
4279
if (error_text.is_empty()) {
4280
// Pipeline layout.
4281
VkPipelineLayoutCreateInfo pipeline_layout_create_info = {};
4282
pipeline_layout_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO;
4283
pipeline_layout_create_info.setLayoutCount = shader_info.vk_descriptor_set_layouts.size();
4284
pipeline_layout_create_info.pSetLayouts = shader_info.vk_descriptor_set_layouts.ptr();
4285
4286
if (shader_refl.push_constant_size > 0) {
4287
VkPushConstantRange *push_constant_range = ALLOCA_SINGLE(VkPushConstantRange);
4288
*push_constant_range = {};
4289
push_constant_range->stageFlags = shader_info.vk_push_constant_stages;
4290
push_constant_range->size = shader_refl.push_constant_size;
4291
pipeline_layout_create_info.pushConstantRangeCount = 1;
4292
pipeline_layout_create_info.pPushConstantRanges = push_constant_range;
4293
}
4294
4295
res = vkCreatePipelineLayout(vk_device, &pipeline_layout_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT), &shader_info.vk_pipeline_layout);
4296
if (res != VK_SUCCESS) {
4297
error_text = vformat("Error (%d) creating pipeline layout.", res);
4298
}
4299
}
4300
4301
if (!error_text.is_empty()) {
4302
// Clean up if failed.
4303
for (uint32_t i = 0; i < shader_info.vk_stages_create_info.size(); i++) {
4304
vkDestroyShaderModule(vk_device, shader_info.vk_stages_create_info[i].module, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE));
4305
}
4306
for (uint32_t i = 0; i < shader_info.vk_descriptor_set_layouts.size(); i++) {
4307
vkDestroyDescriptorSetLayout(vk_device, shader_info.vk_descriptor_set_layouts[i], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT));
4308
}
4309
4310
ERR_FAIL_V_MSG(ShaderID(), error_text);
4311
}
4312
4313
if (shader_refl.pipeline_type == PIPELINE_TYPE_RAYTRACING) {
4314
// Regions
4315
4316
for (ShaderStage stage : shader_refl.stages_vector) {
4317
switch (stage) {
4318
case ShaderStage::SHADER_STAGE_RAYGEN:
4319
shader_info.region_count.raygen_count += 1;
4320
break;
4321
case ShaderStage::SHADER_STAGE_ANY_HIT:
4322
case ShaderStage::SHADER_STAGE_CLOSEST_HIT:
4323
shader_info.region_count.hit_count += 1;
4324
break;
4325
case ShaderStage::SHADER_STAGE_MISS:
4326
shader_info.region_count.miss_count += 1;
4327
break;
4328
default:
4329
// nothing
4330
break;
4331
}
4332
}
4333
4334
shader_info.region_count.group_count = shader_info.region_count.raygen_count + shader_info.region_count.hit_count + shader_info.region_count.miss_count;
4335
}
4336
4337
// Bookkeep.
4338
ShaderInfo *shader_info_ptr = VersatileResource::allocate<ShaderInfo>(resources_allocator);
4339
*shader_info_ptr = shader_info;
4340
return ShaderID(shader_info_ptr);
4341
}
4342
4343
void RenderingDeviceDriverVulkan::shader_free(ShaderID p_shader) {
4344
ShaderInfo *shader_info = (ShaderInfo *)p_shader.id;
4345
4346
for (uint32_t i = 0; i < shader_info->vk_descriptor_set_layouts.size(); i++) {
4347
vkDestroyDescriptorSetLayout(vk_device, shader_info->vk_descriptor_set_layouts[i], VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT));
4348
}
4349
4350
vkDestroyPipelineLayout(vk_device, shader_info->vk_pipeline_layout, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_LAYOUT));
4351
4352
shader_destroy_modules(p_shader);
4353
4354
VersatileResource::free(resources_allocator, shader_info);
4355
}
4356
4357
void RenderingDeviceDriverVulkan::shader_destroy_modules(ShaderID p_shader) {
4358
ShaderInfo *si = (ShaderInfo *)p_shader.id;
4359
4360
for (uint32_t i = 0; i < si->vk_stages_create_info.size(); i++) {
4361
if (si->vk_stages_create_info[i].module) {
4362
vkDestroyShaderModule(vk_device, si->vk_stages_create_info[i].module,
4363
VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE));
4364
si->vk_stages_create_info[i].module = VK_NULL_HANDLE;
4365
}
4366
}
4367
si->vk_stages_create_info.clear();
4368
}
4369
4370
/*********************/
4371
/**** UNIFORM SET ****/
4372
/*********************/
4373
VkDescriptorPool RenderingDeviceDriverVulkan::_descriptor_set_pool_create(const DescriptorSetPoolKey &p_key, bool p_linear_pool) {
4374
// Here comes more vulkan API strangeness.
4375
VkDescriptorPoolSize *vk_sizes = ALLOCA_ARRAY(VkDescriptorPoolSize, UNIFORM_TYPE_MAX);
4376
uint32_t vk_sizes_count = 0;
4377
{
4378
VkDescriptorPoolSize *curr_vk_size = vk_sizes;
4379
if (p_key.uniform_type[UNIFORM_TYPE_SAMPLER]) {
4380
*curr_vk_size = {};
4381
curr_vk_size->type = VK_DESCRIPTOR_TYPE_SAMPLER;
4382
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_SAMPLER] * max_descriptor_sets_per_pool;
4383
curr_vk_size++;
4384
vk_sizes_count++;
4385
}
4386
if (p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE]) {
4387
*curr_vk_size = {};
4388
curr_vk_size->type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
4389
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE] * max_descriptor_sets_per_pool;
4390
curr_vk_size++;
4391
vk_sizes_count++;
4392
}
4393
if (p_key.uniform_type[UNIFORM_TYPE_TEXTURE]) {
4394
*curr_vk_size = {};
4395
curr_vk_size->type = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
4396
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_TEXTURE] * max_descriptor_sets_per_pool;
4397
curr_vk_size++;
4398
vk_sizes_count++;
4399
}
4400
if (p_key.uniform_type[UNIFORM_TYPE_IMAGE]) {
4401
*curr_vk_size = {};
4402
curr_vk_size->type = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
4403
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_IMAGE] * max_descriptor_sets_per_pool;
4404
curr_vk_size++;
4405
vk_sizes_count++;
4406
}
4407
if (p_key.uniform_type[UNIFORM_TYPE_TEXTURE_BUFFER] || p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER]) {
4408
*curr_vk_size = {};
4409
curr_vk_size->type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
4410
curr_vk_size->descriptorCount = (p_key.uniform_type[UNIFORM_TYPE_TEXTURE_BUFFER] + p_key.uniform_type[UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER]) * max_descriptor_sets_per_pool;
4411
curr_vk_size++;
4412
vk_sizes_count++;
4413
}
4414
if (p_key.uniform_type[UNIFORM_TYPE_IMAGE_BUFFER]) {
4415
*curr_vk_size = {};
4416
curr_vk_size->type = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER;
4417
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_IMAGE_BUFFER] * max_descriptor_sets_per_pool;
4418
curr_vk_size++;
4419
vk_sizes_count++;
4420
}
4421
if (p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER]) {
4422
*curr_vk_size = {};
4423
curr_vk_size->type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4424
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER] * max_descriptor_sets_per_pool;
4425
curr_vk_size++;
4426
vk_sizes_count++;
4427
}
4428
if (p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC]) {
4429
*curr_vk_size = {};
4430
curr_vk_size->type = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
4431
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC] * max_descriptor_sets_per_pool;
4432
curr_vk_size++;
4433
vk_sizes_count++;
4434
}
4435
if (p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER]) {
4436
*curr_vk_size = {};
4437
curr_vk_size->type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4438
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER] * max_descriptor_sets_per_pool;
4439
curr_vk_size++;
4440
vk_sizes_count++;
4441
}
4442
if (p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC]) {
4443
*curr_vk_size = {};
4444
curr_vk_size->type = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
4445
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC] * max_descriptor_sets_per_pool;
4446
curr_vk_size++;
4447
vk_sizes_count++;
4448
}
4449
if (p_key.uniform_type[UNIFORM_TYPE_INPUT_ATTACHMENT]) {
4450
*curr_vk_size = {};
4451
curr_vk_size->type = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
4452
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_INPUT_ATTACHMENT] * max_descriptor_sets_per_pool;
4453
curr_vk_size++;
4454
vk_sizes_count++;
4455
}
4456
if (p_key.uniform_type[UNIFORM_TYPE_ACCELERATION_STRUCTURE]) {
4457
*curr_vk_size = {};
4458
curr_vk_size->type = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
4459
curr_vk_size->descriptorCount = p_key.uniform_type[UNIFORM_TYPE_ACCELERATION_STRUCTURE] * max_descriptor_sets_per_pool;
4460
curr_vk_size++;
4461
vk_sizes_count++;
4462
}
4463
DEV_ASSERT(vk_sizes_count <= UNIFORM_TYPE_MAX);
4464
}
4465
4466
VkDescriptorPoolCreateInfo descriptor_set_pool_create_info = {};
4467
descriptor_set_pool_create_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO;
4468
if (linear_descriptor_pools_enabled && p_linear_pool) {
4469
descriptor_set_pool_create_info.flags = 0;
4470
} else {
4471
descriptor_set_pool_create_info.flags = VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT; // Can't think how somebody may NOT need this flag.
4472
}
4473
descriptor_set_pool_create_info.maxSets = max_descriptor_sets_per_pool;
4474
descriptor_set_pool_create_info.poolSizeCount = vk_sizes_count;
4475
descriptor_set_pool_create_info.pPoolSizes = vk_sizes;
4476
4477
VkDescriptorPool vk_pool = VK_NULL_HANDLE;
4478
VkResult res = vkCreateDescriptorPool(vk_device, &descriptor_set_pool_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_POOL), &vk_pool);
4479
if (res) {
4480
ERR_FAIL_COND_V_MSG(res, VK_NULL_HANDLE, "vkCreateDescriptorPool failed with error " + itos(res) + ".");
4481
}
4482
4483
return vk_pool;
4484
}
4485
4486
void RenderingDeviceDriverVulkan::_descriptor_set_pool_unreference(DescriptorSetPools::Iterator p_pool_sets_it, VkDescriptorPool p_vk_descriptor_pool, int p_linear_pool_index) {
4487
HashMap<VkDescriptorPool, uint32_t>::Iterator pool_rcs_it = p_pool_sets_it->value.find(p_vk_descriptor_pool);
4488
pool_rcs_it->value--;
4489
if (pool_rcs_it->value == 0) {
4490
vkDestroyDescriptorPool(vk_device, p_vk_descriptor_pool, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_POOL));
4491
p_pool_sets_it->value.erase(p_vk_descriptor_pool);
4492
if (p_pool_sets_it->value.is_empty()) {
4493
if (linear_descriptor_pools_enabled && p_linear_pool_index >= 0) {
4494
linear_descriptor_set_pools[p_linear_pool_index].remove(p_pool_sets_it);
4495
} else {
4496
descriptor_set_pools.remove(p_pool_sets_it);
4497
}
4498
}
4499
}
4500
}
4501
4502
RDD::UniformSetID RenderingDeviceDriverVulkan::uniform_set_create(VectorView<BoundUniform> p_uniforms, ShaderID p_shader, uint32_t p_set_index, int p_linear_pool_index) {
4503
if (!linear_descriptor_pools_enabled) {
4504
p_linear_pool_index = -1;
4505
}
4506
DescriptorSetPoolKey pool_key;
4507
4508
// We first gather dynamic arrays in a local array because TightLocalVector's
4509
// growth is not efficient when the number of elements is unknown.
4510
const BufferInfo *dynamic_buffers[MAX_DYNAMIC_BUFFERS];
4511
uint32_t num_dynamic_buffers = 0u;
4512
4513
// Immutable samplers will be skipped so we need to track the number of vk_writes used.
4514
VkWriteDescriptorSet *vk_writes = ALLOCA_ARRAY(VkWriteDescriptorSet, p_uniforms.size());
4515
uint32_t writes_amount = 0;
4516
for (uint32_t i = 0; i < p_uniforms.size(); i++) {
4517
const BoundUniform &uniform = p_uniforms[i];
4518
4519
vk_writes[writes_amount] = {};
4520
vk_writes[writes_amount].sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET;
4521
4522
bool add_write = true;
4523
uint32_t num_descriptors = 1;
4524
4525
switch (uniform.type) {
4526
case UNIFORM_TYPE_SAMPLER: {
4527
num_descriptors = uniform.ids.size();
4528
4529
if (uniform.immutable_sampler && immutable_samplers_enabled) {
4530
add_write = false;
4531
} else {
4532
VkDescriptorImageInfo *vk_img_infos = ALLOCA_ARRAY(VkDescriptorImageInfo, num_descriptors);
4533
4534
for (uint32_t j = 0; j < num_descriptors; j++) {
4535
vk_img_infos[j] = {};
4536
vk_img_infos[j].sampler = (VkSampler)uniform.ids[j].id;
4537
vk_img_infos[j].imageView = VK_NULL_HANDLE;
4538
vk_img_infos[j].imageLayout = VK_IMAGE_LAYOUT_UNDEFINED;
4539
}
4540
4541
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLER;
4542
vk_writes[writes_amount].pImageInfo = vk_img_infos;
4543
}
4544
} break;
4545
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE: {
4546
num_descriptors = uniform.ids.size() / 2;
4547
VkDescriptorImageInfo *vk_img_infos = ALLOCA_ARRAY(VkDescriptorImageInfo, num_descriptors);
4548
4549
for (uint32_t j = 0; j < num_descriptors; j++) {
4550
#ifdef DEBUG_ENABLED
4551
if (((const TextureInfo *)uniform.ids[j * 2 + 1].id)->transient) {
4552
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT texture must not be used for sampling in a shader.");
4553
}
4554
#endif
4555
vk_img_infos[j] = {};
4556
vk_img_infos[j].sampler = (VkSampler)uniform.ids[j * 2 + 0].id;
4557
vk_img_infos[j].imageView = ((const TextureInfo *)uniform.ids[j * 2 + 1].id)->vk_view;
4558
vk_img_infos[j].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
4559
}
4560
4561
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER;
4562
vk_writes[writes_amount].pImageInfo = vk_img_infos;
4563
} break;
4564
case UNIFORM_TYPE_TEXTURE: {
4565
num_descriptors = uniform.ids.size();
4566
VkDescriptorImageInfo *vk_img_infos = ALLOCA_ARRAY(VkDescriptorImageInfo, num_descriptors);
4567
4568
for (uint32_t j = 0; j < num_descriptors; j++) {
4569
#ifdef DEBUG_ENABLED
4570
if (((const TextureInfo *)uniform.ids[j].id)->transient) {
4571
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT texture must not be used for sampling in a shader.");
4572
}
4573
#endif
4574
vk_img_infos[j] = {};
4575
vk_img_infos[j].imageView = ((const TextureInfo *)uniform.ids[j].id)->vk_view;
4576
vk_img_infos[j].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
4577
}
4578
4579
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE;
4580
vk_writes[writes_amount].pImageInfo = vk_img_infos;
4581
} break;
4582
case UNIFORM_TYPE_IMAGE: {
4583
num_descriptors = uniform.ids.size();
4584
VkDescriptorImageInfo *vk_img_infos = ALLOCA_ARRAY(VkDescriptorImageInfo, num_descriptors);
4585
4586
for (uint32_t j = 0; j < num_descriptors; j++) {
4587
#ifdef DEBUG_ENABLED
4588
if (((const TextureInfo *)uniform.ids[j].id)->transient) {
4589
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT texture must not be used for sampling in a shader.");
4590
}
4591
#endif
4592
vk_img_infos[j] = {};
4593
vk_img_infos[j].imageView = ((const TextureInfo *)uniform.ids[j].id)->vk_view;
4594
vk_img_infos[j].imageLayout = VK_IMAGE_LAYOUT_GENERAL;
4595
}
4596
4597
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
4598
vk_writes[writes_amount].pImageInfo = vk_img_infos;
4599
} break;
4600
case UNIFORM_TYPE_TEXTURE_BUFFER: {
4601
num_descriptors = uniform.ids.size();
4602
VkDescriptorBufferInfo *vk_buf_infos = ALLOCA_ARRAY(VkDescriptorBufferInfo, num_descriptors);
4603
VkBufferView *vk_buf_views = ALLOCA_ARRAY(VkBufferView, num_descriptors);
4604
4605
for (uint32_t j = 0; j < num_descriptors; j++) {
4606
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[j].id;
4607
vk_buf_infos[j] = {};
4608
vk_buf_infos[j].buffer = buf_info->vk_buffer;
4609
vk_buf_infos[j].range = buf_info->size;
4610
4611
vk_buf_views[j] = buf_info->vk_view;
4612
}
4613
4614
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
4615
vk_writes[writes_amount].pBufferInfo = vk_buf_infos;
4616
vk_writes[writes_amount].pTexelBufferView = vk_buf_views;
4617
} break;
4618
case UNIFORM_TYPE_SAMPLER_WITH_TEXTURE_BUFFER: {
4619
num_descriptors = uniform.ids.size() / 2;
4620
VkDescriptorImageInfo *vk_img_infos = ALLOCA_ARRAY(VkDescriptorImageInfo, num_descriptors);
4621
VkDescriptorBufferInfo *vk_buf_infos = ALLOCA_ARRAY(VkDescriptorBufferInfo, num_descriptors);
4622
VkBufferView *vk_buf_views = ALLOCA_ARRAY(VkBufferView, num_descriptors);
4623
4624
for (uint32_t j = 0; j < num_descriptors; j++) {
4625
vk_img_infos[j] = {};
4626
vk_img_infos[j].sampler = (VkSampler)uniform.ids[j * 2 + 0].id;
4627
4628
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[j * 2 + 1].id;
4629
vk_buf_infos[j] = {};
4630
vk_buf_infos[j].buffer = buf_info->vk_buffer;
4631
vk_buf_infos[j].range = buf_info->size;
4632
4633
vk_buf_views[j] = buf_info->vk_view;
4634
}
4635
4636
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER;
4637
vk_writes[writes_amount].pImageInfo = vk_img_infos;
4638
vk_writes[writes_amount].pBufferInfo = vk_buf_infos;
4639
vk_writes[writes_amount].pTexelBufferView = vk_buf_views;
4640
} break;
4641
case UNIFORM_TYPE_IMAGE_BUFFER: {
4642
CRASH_NOW_MSG("Unimplemented!"); // TODO.
4643
} break;
4644
case UNIFORM_TYPE_UNIFORM_BUFFER: {
4645
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
4646
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
4647
*vk_buf_info = {};
4648
vk_buf_info->buffer = buf_info->vk_buffer;
4649
vk_buf_info->range = buf_info->size;
4650
4651
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), UniformSetID(),
4652
"Sent a buffer with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_UNIFORM_BUFFER instead of UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC.");
4653
4654
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
4655
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
4656
} break;
4657
case UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC: {
4658
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
4659
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
4660
*vk_buf_info = {};
4661
vk_buf_info->buffer = buf_info->vk_buffer;
4662
vk_buf_info->range = buf_info->size;
4663
4664
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), UniformSetID(),
4665
"Sent a buffer without BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_UNIFORM_BUFFER_DYNAMIC instead of UNIFORM_TYPE_UNIFORM_BUFFER.");
4666
ERR_FAIL_COND_V_MSG(num_dynamic_buffers >= MAX_DYNAMIC_BUFFERS, UniformSetID(),
4667
"Uniform set exceeded the limit of dynamic/persistent buffers. (" + itos(MAX_DYNAMIC_BUFFERS) + ").");
4668
4669
dynamic_buffers[num_dynamic_buffers++] = buf_info;
4670
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC;
4671
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
4672
} break;
4673
case UNIFORM_TYPE_STORAGE_BUFFER: {
4674
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
4675
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
4676
*vk_buf_info = {};
4677
vk_buf_info->buffer = buf_info->vk_buffer;
4678
vk_buf_info->range = buf_info->size;
4679
4680
ERR_FAIL_COND_V_MSG(buf_info->is_dynamic(), UniformSetID(),
4681
"Sent a buffer with BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_STORAGE_BUFFER instead of UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC.");
4682
4683
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
4684
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
4685
} break;
4686
case UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC: {
4687
const BufferInfo *buf_info = (const BufferInfo *)uniform.ids[0].id;
4688
VkDescriptorBufferInfo *vk_buf_info = ALLOCA_SINGLE(VkDescriptorBufferInfo);
4689
*vk_buf_info = {};
4690
vk_buf_info->buffer = buf_info->vk_buffer;
4691
vk_buf_info->range = buf_info->size;
4692
4693
ERR_FAIL_COND_V_MSG(!buf_info->is_dynamic(), UniformSetID(),
4694
"Sent a buffer without BUFFER_USAGE_DYNAMIC_PERSISTENT_BIT but binding (" + itos(uniform.binding) + "), set (" + itos(p_set_index) + ") is UNIFORM_TYPE_STORAGE_BUFFER_DYNAMIC instead of UNIFORM_TYPE_STORAGE_BUFFER.");
4695
ERR_FAIL_COND_V_MSG(num_dynamic_buffers >= MAX_DYNAMIC_BUFFERS, UniformSetID(),
4696
"Uniform set exceeded the limit of dynamic/persistent buffers. (" + itos(MAX_DYNAMIC_BUFFERS) + ").");
4697
4698
dynamic_buffers[num_dynamic_buffers++] = buf_info;
4699
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC;
4700
vk_writes[writes_amount].pBufferInfo = vk_buf_info;
4701
} break;
4702
case UNIFORM_TYPE_INPUT_ATTACHMENT: {
4703
num_descriptors = uniform.ids.size();
4704
VkDescriptorImageInfo *vk_img_infos = ALLOCA_ARRAY(VkDescriptorImageInfo, num_descriptors);
4705
4706
for (uint32_t j = 0; j < uniform.ids.size(); j++) {
4707
vk_img_infos[j] = {};
4708
vk_img_infos[j].imageView = ((const TextureInfo *)uniform.ids[j].id)->vk_view;
4709
vk_img_infos[j].imageLayout = VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL;
4710
}
4711
4712
vk_writes[writes_amount].descriptorType = VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT;
4713
vk_writes[writes_amount].pImageInfo = vk_img_infos;
4714
} break;
4715
case UNIFORM_TYPE_ACCELERATION_STRUCTURE: {
4716
const AccelerationStructureInfo *accel_info = (const AccelerationStructureInfo *)uniform.ids[0].id;
4717
VkWriteDescriptorSetAccelerationStructureKHR *acceleration_structure_write = ALLOCA_SINGLE(VkWriteDescriptorSetAccelerationStructureKHR);
4718
*acceleration_structure_write = {};
4719
acceleration_structure_write->sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET_ACCELERATION_STRUCTURE_KHR;
4720
acceleration_structure_write->accelerationStructureCount = 1;
4721
acceleration_structure_write->pAccelerationStructures = &accel_info->vk_acceleration_structure;
4722
4723
vk_writes[i].descriptorType = VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR;
4724
vk_writes[i].pNext = acceleration_structure_write;
4725
} break;
4726
default: {
4727
DEV_ASSERT(false);
4728
}
4729
}
4730
4731
if (add_write) {
4732
vk_writes[writes_amount].dstBinding = uniform.binding;
4733
vk_writes[writes_amount].descriptorCount = num_descriptors;
4734
writes_amount++;
4735
}
4736
4737
ERR_FAIL_COND_V_MSG(pool_key.uniform_type[uniform.type] == MAX_UNIFORM_POOL_ELEMENT, UniformSetID(), "Uniform set reached the limit of bindings for the same type (" + itos(MAX_UNIFORM_POOL_ELEMENT) + ").");
4738
pool_key.uniform_type[uniform.type] += num_descriptors;
4739
}
4740
4741
bool linear_pool = p_linear_pool_index >= 0;
4742
DescriptorSetPools::Iterator pool_sets_it = linear_pool ? linear_descriptor_set_pools[p_linear_pool_index].find(pool_key) : descriptor_set_pools.find(pool_key);
4743
if (!pool_sets_it) {
4744
if (linear_pool) {
4745
pool_sets_it = linear_descriptor_set_pools[p_linear_pool_index].insert(pool_key, HashMap<VkDescriptorPool, uint32_t>());
4746
} else {
4747
pool_sets_it = descriptor_set_pools.insert(pool_key, HashMap<VkDescriptorPool, uint32_t>());
4748
}
4749
}
4750
4751
VkDescriptorSetAllocateInfo descriptor_set_allocate_info = {};
4752
descriptor_set_allocate_info.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO;
4753
descriptor_set_allocate_info.descriptorSetCount = 1;
4754
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
4755
descriptor_set_allocate_info.pSetLayouts = &shader_info->vk_descriptor_set_layouts[p_set_index];
4756
4757
VkDescriptorSet vk_descriptor_set = VK_NULL_HANDLE;
4758
for (KeyValue<VkDescriptorPool, uint32_t> &E : pool_sets_it->value) {
4759
if (E.value < max_descriptor_sets_per_pool) {
4760
descriptor_set_allocate_info.descriptorPool = E.key;
4761
VkResult res = vkAllocateDescriptorSets(vk_device, &descriptor_set_allocate_info, &vk_descriptor_set);
4762
4763
// Break early on success.
4764
if (res == VK_SUCCESS) {
4765
break;
4766
}
4767
4768
// "Fragmented pool" and "out of memory pool" errors are handled by creating more pools. Any other error is unexpected.
4769
if (res != VK_ERROR_FRAGMENTED_POOL && res != VK_ERROR_OUT_OF_POOL_MEMORY) {
4770
ERR_FAIL_V_MSG(UniformSetID(), "Cannot allocate descriptor sets, error " + itos(res) + ".");
4771
}
4772
}
4773
}
4774
4775
// Create a new pool when no allocations could be made from the existing pools.
4776
if (vk_descriptor_set == VK_NULL_HANDLE) {
4777
descriptor_set_allocate_info.descriptorPool = _descriptor_set_pool_create(pool_key, linear_pool);
4778
VkResult res = vkAllocateDescriptorSets(vk_device, &descriptor_set_allocate_info, &vk_descriptor_set);
4779
4780
// All errors are unexpected at this stage.
4781
if (res) {
4782
vkDestroyDescriptorPool(vk_device, descriptor_set_allocate_info.descriptorPool, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_POOL));
4783
ERR_FAIL_V_MSG(UniformSetID(), "Cannot allocate descriptor sets, error " + itos(res) + ".");
4784
}
4785
}
4786
4787
DEV_ASSERT(descriptor_set_allocate_info.descriptorPool != VK_NULL_HANDLE && vk_descriptor_set != VK_NULL_HANDLE);
4788
pool_sets_it->value[descriptor_set_allocate_info.descriptorPool]++;
4789
4790
for (uint32_t i = 0; i < writes_amount; i++) {
4791
vk_writes[i].dstSet = vk_descriptor_set;
4792
}
4793
vkUpdateDescriptorSets(vk_device, writes_amount, vk_writes, 0, nullptr);
4794
4795
// Bookkeep.
4796
4797
UniformSetInfo *usi = VersatileResource::allocate<UniformSetInfo>(resources_allocator);
4798
usi->vk_descriptor_set = vk_descriptor_set;
4799
if (p_linear_pool_index >= 0) {
4800
usi->vk_linear_descriptor_pool = descriptor_set_allocate_info.descriptorPool;
4801
} else {
4802
usi->vk_descriptor_pool = descriptor_set_allocate_info.descriptorPool;
4803
}
4804
usi->pool_sets_it = pool_sets_it;
4805
usi->dynamic_buffers.resize(num_dynamic_buffers);
4806
for (uint32_t i = 0u; i < num_dynamic_buffers; ++i) {
4807
usi->dynamic_buffers[i] = dynamic_buffers[i];
4808
}
4809
4810
return UniformSetID(usi);
4811
}
4812
4813
void RenderingDeviceDriverVulkan::uniform_set_free(UniformSetID p_uniform_set) {
4814
UniformSetInfo *usi = (UniformSetInfo *)p_uniform_set.id;
4815
4816
if (usi->vk_linear_descriptor_pool) {
4817
// Nothing to do. All sets are freed at once using vkResetDescriptorPool.
4818
//
4819
// We can NOT decrease the reference count (i.e. call _descriptor_set_pool_unreference())
4820
// because the pool is linear (i.e. the freed set can't be recycled) and further calls to
4821
// _descriptor_set_pool_find_or_create() need usi->pool_sets_it->value to stay so that we can
4822
// tell if the pool has ran out of space and we need to create a new pool.
4823
} else {
4824
vkFreeDescriptorSets(vk_device, usi->vk_descriptor_pool, 1, &usi->vk_descriptor_set);
4825
_descriptor_set_pool_unreference(usi->pool_sets_it, usi->vk_descriptor_pool, -1);
4826
}
4827
4828
VersatileResource::free(resources_allocator, usi);
4829
}
4830
4831
bool RenderingDeviceDriverVulkan::uniform_sets_have_linear_pools() const {
4832
return true;
4833
}
4834
4835
uint32_t RenderingDeviceDriverVulkan::uniform_sets_get_dynamic_offsets(VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count) const {
4836
uint32_t mask = 0u;
4837
uint32_t shift = 0u;
4838
#ifdef DEV_ENABLED
4839
uint32_t curr_dynamic_offset = 0u;
4840
#endif
4841
4842
for (uint32_t i = 0; i < p_set_count; i++) {
4843
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
4844
// At this point this assert should already have been validated.
4845
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
4846
4847
for (const BufferInfo *dynamic_buffer : usi->dynamic_buffers) {
4848
DEV_ASSERT(dynamic_buffer->frame_idx < 16u);
4849
mask |= dynamic_buffer->frame_idx << shift;
4850
shift += 4u;
4851
}
4852
#ifdef DEV_ENABLED
4853
curr_dynamic_offset += usi->dynamic_buffers.size();
4854
#endif
4855
}
4856
4857
return mask;
4858
}
4859
4860
void RenderingDeviceDriverVulkan::linear_uniform_set_pools_reset(int p_linear_pool_index) {
4861
if (linear_descriptor_pools_enabled) {
4862
DescriptorSetPools &pools_to_reset = linear_descriptor_set_pools[p_linear_pool_index];
4863
DescriptorSetPools::Iterator curr_pool = pools_to_reset.begin();
4864
4865
while (curr_pool != pools_to_reset.end()) {
4866
HashMap<VkDescriptorPool, uint32_t>::Iterator curr_pair = curr_pool->value.begin();
4867
while (curr_pair != curr_pool->value.end()) {
4868
vkResetDescriptorPool(vk_device, curr_pair->key, 0);
4869
curr_pair->value = 0;
4870
++curr_pair;
4871
}
4872
++curr_pool;
4873
}
4874
}
4875
}
4876
4877
// ----- COMMANDS -----
4878
4879
void RenderingDeviceDriverVulkan::command_uniform_set_prepare_for_use(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
4880
}
4881
4882
/******************/
4883
/**** TRANSFER ****/
4884
/******************/
4885
4886
static_assert(ARRAYS_COMPATIBLE_FIELDWISE(RDD::BufferCopyRegion, VkBufferCopy));
4887
4888
static void _texture_subresource_range_to_vk(const RDD::TextureSubresourceRange &p_subresources, VkImageSubresourceRange *r_vk_subreources) {
4889
*r_vk_subreources = {};
4890
r_vk_subreources->aspectMask = (VkImageAspectFlags)p_subresources.aspect;
4891
r_vk_subreources->baseMipLevel = p_subresources.base_mipmap;
4892
r_vk_subreources->levelCount = p_subresources.mipmap_count;
4893
r_vk_subreources->baseArrayLayer = p_subresources.base_layer;
4894
r_vk_subreources->layerCount = p_subresources.layer_count;
4895
}
4896
4897
static void _texture_subresource_layers_to_vk(const RDD::TextureSubresourceLayers &p_subresources, VkImageSubresourceLayers *r_vk_subreources) {
4898
*r_vk_subreources = {};
4899
r_vk_subreources->aspectMask = (VkImageAspectFlags)p_subresources.aspect;
4900
r_vk_subreources->mipLevel = p_subresources.mipmap;
4901
r_vk_subreources->baseArrayLayer = p_subresources.base_layer;
4902
r_vk_subreources->layerCount = p_subresources.layer_count;
4903
}
4904
4905
static void _buffer_texture_copy_region_to_vk(const RDD::BufferTextureCopyRegion &p_copy_region, uint32_t p_buffer_row_length, VkBufferImageCopy *r_vk_copy_region) {
4906
*r_vk_copy_region = {};
4907
r_vk_copy_region->bufferOffset = p_copy_region.buffer_offset;
4908
r_vk_copy_region->bufferRowLength = p_buffer_row_length;
4909
r_vk_copy_region->imageSubresource.aspectMask = (VkImageAspectFlags)(1 << p_copy_region.texture_subresource.aspect);
4910
r_vk_copy_region->imageSubresource.mipLevel = p_copy_region.texture_subresource.mipmap;
4911
r_vk_copy_region->imageSubresource.baseArrayLayer = p_copy_region.texture_subresource.layer;
4912
r_vk_copy_region->imageSubresource.layerCount = 1;
4913
r_vk_copy_region->imageOffset.x = p_copy_region.texture_offset.x;
4914
r_vk_copy_region->imageOffset.y = p_copy_region.texture_offset.y;
4915
r_vk_copy_region->imageOffset.z = p_copy_region.texture_offset.z;
4916
r_vk_copy_region->imageExtent.width = p_copy_region.texture_region_size.x;
4917
r_vk_copy_region->imageExtent.height = p_copy_region.texture_region_size.y;
4918
r_vk_copy_region->imageExtent.depth = p_copy_region.texture_region_size.z;
4919
}
4920
4921
static void _texture_copy_region_to_vk(const RDD::TextureCopyRegion &p_copy_region, VkImageCopy *r_vk_copy_region) {
4922
*r_vk_copy_region = {};
4923
_texture_subresource_layers_to_vk(p_copy_region.src_subresources, &r_vk_copy_region->srcSubresource);
4924
r_vk_copy_region->srcOffset.x = p_copy_region.src_offset.x;
4925
r_vk_copy_region->srcOffset.y = p_copy_region.src_offset.y;
4926
r_vk_copy_region->srcOffset.z = p_copy_region.src_offset.z;
4927
_texture_subresource_layers_to_vk(p_copy_region.dst_subresources, &r_vk_copy_region->dstSubresource);
4928
r_vk_copy_region->dstOffset.x = p_copy_region.dst_offset.x;
4929
r_vk_copy_region->dstOffset.y = p_copy_region.dst_offset.y;
4930
r_vk_copy_region->dstOffset.z = p_copy_region.dst_offset.z;
4931
r_vk_copy_region->extent.width = p_copy_region.size.x;
4932
r_vk_copy_region->extent.height = p_copy_region.size.y;
4933
r_vk_copy_region->extent.depth = p_copy_region.size.z;
4934
}
4935
4936
void RenderingDeviceDriverVulkan::command_clear_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, uint64_t p_offset, uint64_t p_size) {
4937
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
4938
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
4939
vkCmdFillBuffer(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_size, 0);
4940
}
4941
4942
void RenderingDeviceDriverVulkan::command_copy_buffer(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, BufferID p_dst_buffer, VectorView<BufferCopyRegion> p_regions) {
4943
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
4944
const BufferInfo *src_buf_info = (const BufferInfo *)p_src_buffer.id;
4945
const BufferInfo *dst_buf_info = (const BufferInfo *)p_dst_buffer.id;
4946
vkCmdCopyBuffer(command_buffer->vk_command_buffer, src_buf_info->vk_buffer, dst_buf_info->vk_buffer, p_regions.size(), (const VkBufferCopy *)p_regions.ptr());
4947
}
4948
4949
void RenderingDeviceDriverVulkan::command_copy_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<TextureCopyRegion> p_regions) {
4950
VkImageCopy *vk_copy_regions = ALLOCA_ARRAY(VkImageCopy, p_regions.size());
4951
for (uint32_t i = 0; i < p_regions.size(); i++) {
4952
_texture_copy_region_to_vk(p_regions[i], &vk_copy_regions[i]);
4953
}
4954
4955
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
4956
const TextureInfo *src_tex_info = (const TextureInfo *)p_src_texture.id;
4957
const TextureInfo *dst_tex_info = (const TextureInfo *)p_dst_texture.id;
4958
4959
#ifdef DEBUG_ENABLED
4960
if (src_tex_info->transient) {
4961
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_src_texture must not be used in command_copy_texture.");
4962
}
4963
if (dst_tex_info->transient) {
4964
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_dst_texture must not be used in command_copy_texture.");
4965
}
4966
#endif
4967
4968
vkCmdCopyImage(command_buffer->vk_command_buffer, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions);
4969
}
4970
4971
void RenderingDeviceDriverVulkan::command_resolve_texture(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, uint32_t p_src_layer, uint32_t p_src_mipmap, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, uint32_t p_dst_layer, uint32_t p_dst_mipmap) {
4972
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
4973
const TextureInfo *src_tex_info = (const TextureInfo *)p_src_texture.id;
4974
const TextureInfo *dst_tex_info = (const TextureInfo *)p_dst_texture.id;
4975
4976
VkImageResolve vk_resolve = {};
4977
vk_resolve.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
4978
vk_resolve.srcSubresource.mipLevel = p_src_mipmap;
4979
vk_resolve.srcSubresource.baseArrayLayer = p_src_layer;
4980
vk_resolve.srcSubresource.layerCount = 1;
4981
vk_resolve.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
4982
vk_resolve.dstSubresource.mipLevel = p_dst_mipmap;
4983
vk_resolve.dstSubresource.baseArrayLayer = p_dst_layer;
4984
vk_resolve.dstSubresource.layerCount = 1;
4985
vk_resolve.extent.width = MAX(1u, src_tex_info->vk_create_info.extent.width >> p_src_mipmap);
4986
vk_resolve.extent.height = MAX(1u, src_tex_info->vk_create_info.extent.height >> p_src_mipmap);
4987
vk_resolve.extent.depth = MAX(1u, src_tex_info->vk_create_info.extent.depth >> p_src_mipmap);
4988
4989
#ifdef DEBUG_ENABLED
4990
if (src_tex_info->transient) {
4991
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_src_texture must not be used in command_resolve_texture. Use a resolve store action pass instead.");
4992
}
4993
if (dst_tex_info->transient) {
4994
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_dst_texture must not be used in command_resolve_texture.");
4995
}
4996
#endif
4997
4998
vkCmdResolveImage(command_buffer->vk_command_buffer, src_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], dst_tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], 1, &vk_resolve);
4999
}
5000
5001
void RenderingDeviceDriverVulkan::command_clear_color_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, const Color &p_color, const TextureSubresourceRange &p_subresources) {
5002
VkClearColorValue vk_color = {};
5003
memcpy(&vk_color.float32, p_color.components, sizeof(VkClearColorValue::float32));
5004
5005
VkImageSubresourceRange vk_subresources = {};
5006
_texture_subresource_range_to_vk(p_subresources, &vk_subresources);
5007
5008
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5009
const TextureInfo *tex_info = (const TextureInfo *)p_texture.id;
5010
#ifdef DEBUG_ENABLED
5011
if (tex_info->transient) {
5012
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_texture must not be used in command_clear_color_texture. Use a clear store action pass instead.");
5013
}
5014
#endif
5015
vkCmdClearColorImage(command_buffer->vk_command_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_texture_layout], &vk_color, 1, &vk_subresources);
5016
}
5017
5018
void RenderingDeviceDriverVulkan::command_clear_depth_stencil_texture(CommandBufferID p_cmd_buffer, TextureID p_texture, TextureLayout p_texture_layout, float p_depth, uint8_t p_stencil, const TextureSubresourceRange &p_subresources) {
5019
VkClearDepthStencilValue vk_depth_stencil = {};
5020
vk_depth_stencil.depth = p_depth;
5021
vk_depth_stencil.stencil = p_stencil;
5022
5023
VkImageSubresourceRange vk_subresources = {};
5024
_texture_subresource_range_to_vk(p_subresources, &vk_subresources);
5025
5026
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5027
const TextureInfo *tex_info = (const TextureInfo *)p_texture.id;
5028
#ifdef DEBUG_ENABLED
5029
if (tex_info->transient) {
5030
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_texture must not be used in command_clear_depth_stencil_texture. Use a clear store action pass instead.");
5031
}
5032
#endif
5033
vkCmdClearDepthStencilImage(command_buffer->vk_command_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_texture_layout], &vk_depth_stencil, 1, &vk_subresources);
5034
}
5035
5036
void RenderingDeviceDriverVulkan::command_copy_buffer_to_texture(CommandBufferID p_cmd_buffer, BufferID p_src_buffer, TextureID p_dst_texture, TextureLayout p_dst_texture_layout, VectorView<BufferTextureCopyRegion> p_regions) {
5037
const TextureInfo *tex_info = (const TextureInfo *)p_dst_texture.id;
5038
5039
uint32_t pixel_size = get_image_format_pixel_size(tex_info->rd_format);
5040
uint32_t block_size = get_compressed_image_format_block_byte_size(tex_info->rd_format);
5041
uint32_t block_w, block_h;
5042
get_compressed_image_format_block_dimensions(tex_info->rd_format, block_w, block_h);
5043
5044
VkBufferImageCopy *vk_copy_regions = ALLOCA_ARRAY(VkBufferImageCopy, p_regions.size());
5045
for (uint32_t i = 0; i < p_regions.size(); i++) {
5046
_buffer_texture_copy_region_to_vk(p_regions[i], p_regions[i].row_pitch * block_w / (pixel_size * block_size), &vk_copy_regions[i]);
5047
}
5048
5049
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5050
const BufferInfo *buf_info = (const BufferInfo *)p_src_buffer.id;
5051
#ifdef DEBUG_ENABLED
5052
if (tex_info->transient) {
5053
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_dst_texture must not be used in command_copy_buffer_to_texture.");
5054
}
5055
#endif
5056
vkCmdCopyBufferToImage(command_buffer->vk_command_buffer, buf_info->vk_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_dst_texture_layout], p_regions.size(), vk_copy_regions);
5057
}
5058
5059
void RenderingDeviceDriverVulkan::command_copy_texture_to_buffer(CommandBufferID p_cmd_buffer, TextureID p_src_texture, TextureLayout p_src_texture_layout, BufferID p_dst_buffer, VectorView<BufferTextureCopyRegion> p_regions) {
5060
const TextureInfo *tex_info = (const TextureInfo *)p_src_texture.id;
5061
5062
uint32_t pixel_size = get_image_format_pixel_size(tex_info->rd_format);
5063
uint32_t block_size = get_compressed_image_format_block_byte_size(tex_info->rd_format);
5064
uint32_t block_w, block_h;
5065
get_compressed_image_format_block_dimensions(tex_info->rd_format, block_w, block_h);
5066
5067
VkBufferImageCopy *vk_copy_regions = ALLOCA_ARRAY(VkBufferImageCopy, p_regions.size());
5068
for (uint32_t i = 0; i < p_regions.size(); i++) {
5069
_buffer_texture_copy_region_to_vk(p_regions[i], p_regions[i].row_pitch * block_w / (pixel_size * block_size), &vk_copy_regions[i]);
5070
}
5071
5072
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5073
const BufferInfo *buf_info = (const BufferInfo *)p_dst_buffer.id;
5074
#ifdef DEBUG_ENABLED
5075
if (tex_info->transient) {
5076
ERR_PRINT("TEXTURE_USAGE_TRANSIENT_BIT p_src_texture must not be used in command_copy_texture_to_buffer.");
5077
}
5078
#endif
5079
vkCmdCopyImageToBuffer(command_buffer->vk_command_buffer, tex_info->vk_view_create_info.image, RD_TO_VK_LAYOUT[p_src_texture_layout], buf_info->vk_buffer, p_regions.size(), vk_copy_regions);
5080
}
5081
5082
/******************/
5083
/**** PIPELINE ****/
5084
/******************/
5085
5086
void RenderingDeviceDriverVulkan::pipeline_free(PipelineID p_pipeline) {
5087
vkDestroyPipeline(vk_device, (VkPipeline)p_pipeline.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE));
5088
}
5089
5090
// ----- BINDING -----
5091
5092
void RenderingDeviceDriverVulkan::command_bind_push_constants(CommandBufferID p_cmd_buffer, ShaderID p_shader, uint32_t p_dst_first_index, VectorView<uint32_t> p_data) {
5093
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5094
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
5095
vkCmdPushConstants(command_buffer->vk_command_buffer, shader_info->vk_pipeline_layout, shader_info->vk_push_constant_stages, p_dst_first_index * sizeof(uint32_t), p_data.size() * sizeof(uint32_t), p_data.ptr());
5096
}
5097
5098
// ----- CACHE -----
5099
5100
int RenderingDeviceDriverVulkan::caching_instance_count = 0;
5101
5102
bool RenderingDeviceDriverVulkan::pipeline_cache_create(const Vector<uint8_t> &p_data) {
5103
if (caching_instance_count) {
5104
WARN_PRINT("There's already a RenderingDeviceDriverVulkan instance doing PSO caching. Only one can at the same time. This one won't.");
5105
return false;
5106
}
5107
caching_instance_count++;
5108
5109
pipelines_cache.current_size = 0;
5110
pipelines_cache.buffer.resize(sizeof(PipelineCacheHeader));
5111
5112
// Parse.
5113
{
5114
if (p_data.is_empty()) {
5115
// No pre-existing cache, just create it.
5116
} else if (p_data.size() <= (int)sizeof(PipelineCacheHeader)) {
5117
print_verbose("Invalid/corrupt Vulkan pipelines cache. Existing shader pipeline cache will be ignored, which may result in stuttering during gameplay.");
5118
} else {
5119
const PipelineCacheHeader *loaded_header = reinterpret_cast<const PipelineCacheHeader *>(p_data.ptr());
5120
if (loaded_header->magic != 868 + VK_PIPELINE_CACHE_HEADER_VERSION_ONE) {
5121
print_verbose("Invalid Vulkan pipelines cache magic number. Existing shader pipeline cache will be ignored, which may result in stuttering during gameplay.");
5122
} else {
5123
const uint8_t *loaded_buffer_start = p_data.ptr() + sizeof(PipelineCacheHeader);
5124
uint32_t loaded_buffer_size = p_data.size() - sizeof(PipelineCacheHeader);
5125
const PipelineCacheHeader *current_header = (PipelineCacheHeader *)pipelines_cache.buffer.ptr();
5126
if (loaded_header->data_hash != hash_murmur3_buffer(loaded_buffer_start, loaded_buffer_size) ||
5127
loaded_header->data_size != loaded_buffer_size ||
5128
loaded_header->vendor_id != current_header->vendor_id ||
5129
loaded_header->device_id != current_header->device_id ||
5130
loaded_header->driver_version != current_header->driver_version ||
5131
memcmp(loaded_header->uuid, current_header->uuid, VK_UUID_SIZE) != 0 ||
5132
loaded_header->driver_abi != current_header->driver_abi) {
5133
print_verbose("Invalid Vulkan pipelines cache header. This may be due to an engine change, GPU change or graphics driver version change. Existing shader pipeline cache will be ignored, which may result in stuttering during gameplay.");
5134
} else {
5135
pipelines_cache.current_size = loaded_buffer_size;
5136
pipelines_cache.buffer = p_data;
5137
}
5138
}
5139
}
5140
}
5141
5142
// Create.
5143
{
5144
VkPipelineCacheCreateInfo cache_info = {};
5145
cache_info.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
5146
cache_info.initialDataSize = pipelines_cache.buffer.size() - sizeof(PipelineCacheHeader);
5147
cache_info.pInitialData = pipelines_cache.buffer.ptr() + sizeof(PipelineCacheHeader);
5148
5149
VkResult err = vkCreatePipelineCache(vk_device, &cache_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_CACHE), &pipelines_cache.vk_cache);
5150
if (err != VK_SUCCESS) {
5151
WARN_PRINT("vkCreatePipelinecache failed with error " + itos(err) + ".");
5152
return false;
5153
}
5154
}
5155
5156
return true;
5157
}
5158
5159
void RenderingDeviceDriverVulkan::pipeline_cache_free() {
5160
DEV_ASSERT(pipelines_cache.vk_cache);
5161
5162
vkDestroyPipelineCache(vk_device, pipelines_cache.vk_cache, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE_CACHE));
5163
pipelines_cache.vk_cache = VK_NULL_HANDLE;
5164
5165
DEV_ASSERT(caching_instance_count > 0);
5166
caching_instance_count--;
5167
}
5168
5169
size_t RenderingDeviceDriverVulkan::pipeline_cache_query_size() {
5170
DEV_ASSERT(pipelines_cache.vk_cache);
5171
5172
// FIXME:
5173
// We're letting the cache grow unboundedly. We may want to set at limit and see if implementations use LRU or the like.
5174
// If we do, we won't be able to assume any longer that the cache is dirty if, and only if, it has grown.
5175
VkResult err = vkGetPipelineCacheData(vk_device, pipelines_cache.vk_cache, &pipelines_cache.current_size, nullptr);
5176
ERR_FAIL_COND_V_MSG(err, 0, "vkGetPipelineCacheData failed with error " + itos(err) + ".");
5177
5178
return pipelines_cache.current_size;
5179
}
5180
5181
Vector<uint8_t> RenderingDeviceDriverVulkan::pipeline_cache_serialize() {
5182
DEV_ASSERT(pipelines_cache.vk_cache);
5183
5184
pipelines_cache.buffer.resize(pipelines_cache.current_size + sizeof(PipelineCacheHeader));
5185
5186
VkResult err = vkGetPipelineCacheData(vk_device, pipelines_cache.vk_cache, &pipelines_cache.current_size, pipelines_cache.buffer.ptrw() + sizeof(PipelineCacheHeader));
5187
ERR_FAIL_COND_V(err != VK_SUCCESS && err != VK_INCOMPLETE, Vector<uint8_t>()); // Incomplete is OK because the cache may have grown since the size was queried (unless when exiting).
5188
5189
// The real buffer size may now be bigger than the updated current_size.
5190
// We take into account the new size but keep the buffer resized in a worst-case fashion.
5191
5192
PipelineCacheHeader *header = (PipelineCacheHeader *)pipelines_cache.buffer.ptrw();
5193
header->data_size = pipelines_cache.current_size;
5194
header->data_hash = hash_murmur3_buffer(pipelines_cache.buffer.ptr() + sizeof(PipelineCacheHeader), pipelines_cache.current_size);
5195
5196
return pipelines_cache.buffer;
5197
}
5198
5199
/*******************/
5200
/**** RENDERING ****/
5201
/*******************/
5202
5203
// ----- SUBPASS -----
5204
5205
// RDD::AttachmentLoadOp == VkAttachmentLoadOp.
5206
static_assert(ENUM_MEMBERS_EQUAL(RDD::ATTACHMENT_LOAD_OP_LOAD, VK_ATTACHMENT_LOAD_OP_LOAD));
5207
static_assert(ENUM_MEMBERS_EQUAL(RDD::ATTACHMENT_LOAD_OP_CLEAR, VK_ATTACHMENT_LOAD_OP_CLEAR));
5208
static_assert(ENUM_MEMBERS_EQUAL(RDD::ATTACHMENT_LOAD_OP_DONT_CARE, VK_ATTACHMENT_LOAD_OP_DONT_CARE));
5209
5210
// RDD::AttachmentStoreOp == VkAttachmentStoreOp.
5211
static_assert(ENUM_MEMBERS_EQUAL(RDD::ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_STORE_OP_STORE));
5212
static_assert(ENUM_MEMBERS_EQUAL(RDD::ATTACHMENT_STORE_OP_DONT_CARE, VK_ATTACHMENT_STORE_OP_DONT_CARE));
5213
5214
// Assuming Vulkan and RDD's are backed by uint32_t in:
5215
// - VkSubpassDescription2::pPreserveAttachments and RDD::Subpass::preserve_attachments.
5216
// - VkRenderPassCreateInfo2KHR::pCorrelatedViewMasks and p_view_correlation_mask.
5217
5218
static void _attachment_reference_to_vk(const RDD::AttachmentReference &p_attachment_reference, VkAttachmentReference2KHR *r_vk_attachment_reference) {
5219
*r_vk_attachment_reference = {};
5220
r_vk_attachment_reference->sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
5221
r_vk_attachment_reference->attachment = p_attachment_reference.attachment;
5222
r_vk_attachment_reference->layout = RD_TO_VK_LAYOUT[p_attachment_reference.layout];
5223
r_vk_attachment_reference->aspectMask = (VkImageAspectFlags)p_attachment_reference.aspect;
5224
}
5225
5226
RDD::RenderPassID RenderingDeviceDriverVulkan::render_pass_create(VectorView<Attachment> p_attachments, VectorView<Subpass> p_subpasses, VectorView<SubpassDependency> p_subpass_dependencies, uint32_t p_view_count, AttachmentReference p_fragment_density_map_attachment) {
5227
// These are only used if we use multiview but we need to define them in scope.
5228
const uint32_t view_mask = (1 << p_view_count) - 1;
5229
const uint32_t correlation_mask = (1 << p_view_count) - 1;
5230
5231
VkAttachmentDescription2KHR *vk_attachments = ALLOCA_ARRAY(VkAttachmentDescription2KHR, p_attachments.size());
5232
for (uint32_t i = 0; i < p_attachments.size(); i++) {
5233
vk_attachments[i] = {};
5234
vk_attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2_KHR;
5235
vk_attachments[i].format = RD_TO_VK_FORMAT[p_attachments[i].format];
5236
vk_attachments[i].samples = _ensure_supported_sample_count(p_attachments[i].samples);
5237
vk_attachments[i].loadOp = (VkAttachmentLoadOp)p_attachments[i].load_op;
5238
vk_attachments[i].storeOp = (VkAttachmentStoreOp)p_attachments[i].store_op;
5239
vk_attachments[i].stencilLoadOp = (VkAttachmentLoadOp)p_attachments[i].stencil_load_op;
5240
vk_attachments[i].stencilStoreOp = (VkAttachmentStoreOp)p_attachments[i].stencil_store_op;
5241
vk_attachments[i].initialLayout = RD_TO_VK_LAYOUT[p_attachments[i].initial_layout];
5242
vk_attachments[i].finalLayout = RD_TO_VK_LAYOUT[p_attachments[i].final_layout];
5243
}
5244
5245
VkSubpassDescription2KHR *vk_subpasses = ALLOCA_ARRAY(VkSubpassDescription2KHR, p_subpasses.size());
5246
for (uint32_t i = 0; i < p_subpasses.size(); i++) {
5247
VkAttachmentReference2KHR *vk_subpass_input_attachments = ALLOCA_ARRAY(VkAttachmentReference2KHR, p_subpasses[i].input_references.size());
5248
for (uint32_t j = 0; j < p_subpasses[i].input_references.size(); j++) {
5249
_attachment_reference_to_vk(p_subpasses[i].input_references[j], &vk_subpass_input_attachments[j]);
5250
}
5251
5252
VkAttachmentReference2KHR *vk_subpass_color_attachments = ALLOCA_ARRAY(VkAttachmentReference2KHR, p_subpasses[i].color_references.size());
5253
for (uint32_t j = 0; j < p_subpasses[i].color_references.size(); j++) {
5254
_attachment_reference_to_vk(p_subpasses[i].color_references[j], &vk_subpass_color_attachments[j]);
5255
}
5256
5257
VkAttachmentReference2KHR *vk_subpass_resolve_attachments = ALLOCA_ARRAY(VkAttachmentReference2KHR, p_subpasses[i].resolve_references.size());
5258
for (uint32_t j = 0; j < p_subpasses[i].resolve_references.size(); j++) {
5259
_attachment_reference_to_vk(p_subpasses[i].resolve_references[j], &vk_subpass_resolve_attachments[j]);
5260
}
5261
5262
VkAttachmentReference2KHR *vk_subpass_depth_stencil_attachment = nullptr;
5263
if (p_subpasses[i].depth_stencil_reference.attachment != AttachmentReference::UNUSED) {
5264
vk_subpass_depth_stencil_attachment = ALLOCA_SINGLE(VkAttachmentReference2KHR);
5265
_attachment_reference_to_vk(p_subpasses[i].depth_stencil_reference, vk_subpass_depth_stencil_attachment);
5266
}
5267
5268
vk_subpasses[i] = {};
5269
vk_subpasses[i].sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2_KHR;
5270
vk_subpasses[i].pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS;
5271
vk_subpasses[i].viewMask = p_view_count == 1 ? 0 : view_mask;
5272
vk_subpasses[i].inputAttachmentCount = p_subpasses[i].input_references.size();
5273
vk_subpasses[i].pInputAttachments = vk_subpass_input_attachments;
5274
vk_subpasses[i].colorAttachmentCount = p_subpasses[i].color_references.size();
5275
vk_subpasses[i].pColorAttachments = vk_subpass_color_attachments;
5276
vk_subpasses[i].pResolveAttachments = vk_subpass_resolve_attachments;
5277
vk_subpasses[i].pDepthStencilAttachment = vk_subpass_depth_stencil_attachment;
5278
vk_subpasses[i].preserveAttachmentCount = p_subpasses[i].preserve_attachments.size();
5279
vk_subpasses[i].pPreserveAttachments = p_subpasses[i].preserve_attachments.ptr();
5280
5281
// Fragment shading rate.
5282
if (fsr_capabilities.attachment_supported && p_subpasses[i].fragment_shading_rate_reference.attachment != AttachmentReference::UNUSED) {
5283
VkAttachmentReference2KHR *vk_subpass_fsr_attachment = ALLOCA_SINGLE(VkAttachmentReference2KHR);
5284
*vk_subpass_fsr_attachment = {};
5285
vk_subpass_fsr_attachment->sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
5286
vk_subpass_fsr_attachment->attachment = p_subpasses[i].fragment_shading_rate_reference.attachment;
5287
vk_subpass_fsr_attachment->layout = VK_IMAGE_LAYOUT_FRAGMENT_SHADING_RATE_ATTACHMENT_OPTIMAL_KHR;
5288
5289
VkFragmentShadingRateAttachmentInfoKHR *vk_fsr_info = ALLOCA_SINGLE(VkFragmentShadingRateAttachmentInfoKHR);
5290
*vk_fsr_info = {};
5291
vk_fsr_info->sType = VK_STRUCTURE_TYPE_FRAGMENT_SHADING_RATE_ATTACHMENT_INFO_KHR;
5292
vk_fsr_info->pNext = vk_subpasses[i].pNext;
5293
vk_fsr_info->pFragmentShadingRateAttachment = vk_subpass_fsr_attachment;
5294
vk_fsr_info->shadingRateAttachmentTexelSize.width = p_subpasses[i].fragment_shading_rate_texel_size.x;
5295
vk_fsr_info->shadingRateAttachmentTexelSize.height = p_subpasses[i].fragment_shading_rate_texel_size.y;
5296
5297
vk_subpasses[i].pNext = vk_fsr_info;
5298
}
5299
5300
// Depth resolve.
5301
if (framebuffer_depth_resolve && p_subpasses[i].depth_resolve_reference.attachment != AttachmentReference::UNUSED) {
5302
VkAttachmentReference2KHR *vk_subpass_depth_resolve_attachment = ALLOCA_SINGLE(VkAttachmentReference2KHR);
5303
*vk_subpass_depth_resolve_attachment = {};
5304
vk_subpass_depth_resolve_attachment->sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2_KHR;
5305
vk_subpass_depth_resolve_attachment->attachment = p_subpasses[i].depth_resolve_reference.attachment;
5306
vk_subpass_depth_resolve_attachment->layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;
5307
5308
VkSubpassDescriptionDepthStencilResolveKHR *vk_depth_resolve_info = ALLOCA_SINGLE(VkSubpassDescriptionDepthStencilResolveKHR);
5309
*vk_depth_resolve_info = {};
5310
vk_depth_resolve_info->sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE;
5311
vk_depth_resolve_info->pNext = vk_subpasses[i].pNext;
5312
vk_depth_resolve_info->depthResolveMode = VK_RESOLVE_MODE_MAX_BIT_KHR;
5313
vk_depth_resolve_info->stencilResolveMode = VK_RESOLVE_MODE_NONE_KHR; // we don't resolve our stencil (for now)
5314
vk_depth_resolve_info->pDepthStencilResolveAttachment = vk_subpass_depth_resolve_attachment;
5315
5316
vk_subpasses[i].pNext = vk_depth_resolve_info;
5317
}
5318
}
5319
5320
VkSubpassDependency2KHR *vk_subpass_dependencies = ALLOCA_ARRAY(VkSubpassDependency2KHR, p_subpass_dependencies.size());
5321
for (uint32_t i = 0; i < p_subpass_dependencies.size(); i++) {
5322
vk_subpass_dependencies[i] = {};
5323
vk_subpass_dependencies[i].sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2;
5324
vk_subpass_dependencies[i].srcSubpass = p_subpass_dependencies[i].src_subpass;
5325
vk_subpass_dependencies[i].dstSubpass = p_subpass_dependencies[i].dst_subpass;
5326
vk_subpass_dependencies[i].srcStageMask = _rd_to_vk_pipeline_stages(p_subpass_dependencies[i].src_stages);
5327
vk_subpass_dependencies[i].dstStageMask = _rd_to_vk_pipeline_stages(p_subpass_dependencies[i].dst_stages);
5328
vk_subpass_dependencies[i].srcAccessMask = _rd_to_vk_access_flags(p_subpass_dependencies[i].src_access);
5329
vk_subpass_dependencies[i].dstAccessMask = _rd_to_vk_access_flags(p_subpass_dependencies[i].dst_access);
5330
}
5331
5332
VkRenderPassCreateInfo2KHR create_info = {};
5333
create_info.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR;
5334
create_info.attachmentCount = p_attachments.size();
5335
create_info.pAttachments = vk_attachments;
5336
create_info.subpassCount = p_subpasses.size();
5337
create_info.pSubpasses = vk_subpasses;
5338
create_info.dependencyCount = p_subpass_dependencies.size();
5339
create_info.pDependencies = vk_subpass_dependencies;
5340
create_info.correlatedViewMaskCount = p_view_count == 1 ? 0 : 1;
5341
create_info.pCorrelatedViewMasks = p_view_count == 1 ? nullptr : &correlation_mask;
5342
5343
// Multiview.
5344
if (p_view_count > 1 && device_functions.CreateRenderPass2KHR == nullptr) {
5345
// This is only required when not using vkCreateRenderPass2.
5346
// We add it if vkCreateRenderPass2KHR is not supported,
5347
// resulting this in being passed to our vkCreateRenderPass fallback.
5348
5349
uint32_t *vk_view_masks = ALLOCA_ARRAY(uint32_t, p_subpasses.size());
5350
for (uint32_t i = 0; i < p_subpasses.size(); i++) {
5351
vk_view_masks[i] = view_mask;
5352
}
5353
5354
VkRenderPassMultiviewCreateInfo *multiview_create_info = ALLOCA_SINGLE(VkRenderPassMultiviewCreateInfo);
5355
*multiview_create_info = {};
5356
multiview_create_info->sType = VK_STRUCTURE_TYPE_RENDER_PASS_MULTIVIEW_CREATE_INFO;
5357
multiview_create_info->subpassCount = p_subpasses.size();
5358
multiview_create_info->pViewMasks = vk_view_masks;
5359
multiview_create_info->correlationMaskCount = 1;
5360
multiview_create_info->pCorrelationMasks = &correlation_mask;
5361
5362
create_info.pNext = multiview_create_info;
5363
}
5364
5365
// Fragment density map.
5366
bool uses_fragment_density_map = fdm_capabilities.attachment_supported && p_fragment_density_map_attachment.attachment != AttachmentReference::UNUSED;
5367
if (uses_fragment_density_map) {
5368
VkRenderPassFragmentDensityMapCreateInfoEXT *vk_fdm_info = ALLOCA_SINGLE(VkRenderPassFragmentDensityMapCreateInfoEXT);
5369
vk_fdm_info->sType = VK_STRUCTURE_TYPE_RENDER_PASS_FRAGMENT_DENSITY_MAP_CREATE_INFO_EXT;
5370
vk_fdm_info->fragmentDensityMapAttachment.attachment = p_fragment_density_map_attachment.attachment;
5371
vk_fdm_info->fragmentDensityMapAttachment.layout = RD_TO_VK_LAYOUT[p_fragment_density_map_attachment.layout];
5372
vk_fdm_info->pNext = create_info.pNext;
5373
create_info.pNext = vk_fdm_info;
5374
}
5375
5376
VkRenderPass vk_render_pass = VK_NULL_HANDLE;
5377
VkResult res = _create_render_pass(vk_device, &create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS), &vk_render_pass);
5378
ERR_FAIL_COND_V_MSG(res, RenderPassID(), "vkCreateRenderPass2KHR failed with error " + itos(res) + ".");
5379
5380
RenderPassInfo *render_pass = VersatileResource::allocate<RenderPassInfo>(resources_allocator);
5381
render_pass->vk_render_pass = vk_render_pass;
5382
render_pass->uses_fragment_density_map = uses_fragment_density_map;
5383
return RenderPassID(render_pass);
5384
}
5385
5386
void RenderingDeviceDriverVulkan::render_pass_free(RenderPassID p_render_pass) {
5387
RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id);
5388
vkDestroyRenderPass(vk_device, render_pass->vk_render_pass, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_RENDER_PASS));
5389
VersatileResource::free<RenderPassInfo>(resources_allocator, render_pass);
5390
}
5391
5392
// ----- COMMANDS -----
5393
5394
static_assert(ARRAYS_COMPATIBLE_FIELDWISE(RDD::RenderPassClearValue, VkClearValue));
5395
5396
void RenderingDeviceDriverVulkan::command_begin_render_pass(CommandBufferID p_cmd_buffer, RenderPassID p_render_pass, FramebufferID p_framebuffer, CommandBufferType p_cmd_buffer_type, const Rect2i &p_rect, VectorView<RenderPassClearValue> p_clear_values) {
5397
CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id);
5398
RenderPassInfo *render_pass = (RenderPassInfo *)(p_render_pass.id);
5399
Framebuffer *framebuffer = (Framebuffer *)(p_framebuffer.id);
5400
5401
if (framebuffer->swap_chain_acquired) {
5402
// Insert a barrier to wait for the acquisition of the framebuffer before the render pass begins.
5403
VkImageMemoryBarrier image_barrier = {};
5404
image_barrier.sType = VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER;
5405
image_barrier.dstAccessMask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
5406
image_barrier.newLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
5407
image_barrier.srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
5408
image_barrier.dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED;
5409
image_barrier.image = framebuffer->swap_chain_image;
5410
image_barrier.subresourceRange = framebuffer->swap_chain_image_subresource_range;
5411
vkCmdPipelineBarrier(command_buffer->vk_command_buffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 0, 0, nullptr, 0, nullptr, 1, &image_barrier);
5412
framebuffer->swap_chain_acquired = false;
5413
}
5414
5415
VkRenderPassBeginInfo render_pass_begin = {};
5416
render_pass_begin.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO;
5417
render_pass_begin.renderPass = render_pass->vk_render_pass;
5418
render_pass_begin.framebuffer = framebuffer->vk_framebuffer;
5419
5420
render_pass_begin.renderArea.offset.x = p_rect.position.x;
5421
render_pass_begin.renderArea.offset.y = p_rect.position.y;
5422
render_pass_begin.renderArea.extent.width = p_rect.size.x;
5423
render_pass_begin.renderArea.extent.height = p_rect.size.y;
5424
5425
render_pass_begin.clearValueCount = p_clear_values.size();
5426
render_pass_begin.pClearValues = (const VkClearValue *)p_clear_values.ptr();
5427
5428
VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS;
5429
vkCmdBeginRenderPass(command_buffer->vk_command_buffer, &render_pass_begin, vk_subpass_contents);
5430
5431
command_buffer->active_framebuffer = framebuffer;
5432
command_buffer->active_render_pass = render_pass;
5433
5434
#if PRINT_NATIVE_COMMANDS
5435
print_line(vformat("vkCmdBeginRenderPass Pass 0x%uX Framebuffer 0x%uX", p_render_pass.id, p_framebuffer.id));
5436
#endif
5437
}
5438
5439
void RenderingDeviceDriverVulkan::command_end_render_pass(CommandBufferID p_cmd_buffer) {
5440
CommandBufferInfo *command_buffer = (CommandBufferInfo *)(p_cmd_buffer.id);
5441
DEV_ASSERT(command_buffer->active_framebuffer != nullptr && "A framebuffer must be active.");
5442
DEV_ASSERT(command_buffer->active_render_pass != nullptr && "A render pass must be active.");
5443
5444
if (device_functions.EndRenderPass2KHR != nullptr && fdm_capabilities.offset_supported && command_buffer->active_render_pass->uses_fragment_density_map) {
5445
LocalVector<VkOffset2D> fragment_density_offsets;
5446
if (VulkanHooks::get_singleton() != nullptr) {
5447
VulkanHooks::get_singleton()->get_fragment_density_offsets(fragment_density_offsets, fdm_capabilities.offset_granularity);
5448
}
5449
if (fragment_density_offsets.size() > 0) {
5450
VkSubpassFragmentDensityMapOffsetEndInfoQCOM offset_info = {};
5451
offset_info.sType = VK_STRUCTURE_TYPE_SUBPASS_FRAGMENT_DENSITY_MAP_OFFSET_END_INFO_QCOM;
5452
offset_info.pFragmentDensityOffsets = fragment_density_offsets.ptr();
5453
offset_info.fragmentDensityOffsetCount = fragment_density_offsets.size();
5454
5455
VkSubpassEndInfo subpass_end_info = {};
5456
subpass_end_info.sType = VK_STRUCTURE_TYPE_SUBPASS_END_INFO;
5457
subpass_end_info.pNext = &offset_info;
5458
5459
device_functions.EndRenderPass2KHR(command_buffer->vk_command_buffer, &subpass_end_info);
5460
} else {
5461
vkCmdEndRenderPass(command_buffer->vk_command_buffer);
5462
}
5463
} else {
5464
vkCmdEndRenderPass(command_buffer->vk_command_buffer);
5465
}
5466
5467
command_buffer->active_render_pass = nullptr;
5468
command_buffer->active_framebuffer = nullptr;
5469
5470
#if PRINT_NATIVE_COMMANDS
5471
print_line("vkCmdEndRenderPass");
5472
#endif
5473
}
5474
5475
void RenderingDeviceDriverVulkan::command_next_render_subpass(CommandBufferID p_cmd_buffer, CommandBufferType p_cmd_buffer_type) {
5476
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5477
VkSubpassContents vk_subpass_contents = p_cmd_buffer_type == COMMAND_BUFFER_TYPE_PRIMARY ? VK_SUBPASS_CONTENTS_INLINE : VK_SUBPASS_CONTENTS_SECONDARY_COMMAND_BUFFERS;
5478
vkCmdNextSubpass(command_buffer->vk_command_buffer, vk_subpass_contents);
5479
}
5480
5481
void RenderingDeviceDriverVulkan::command_render_set_viewport(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_viewports) {
5482
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5483
VkViewport *vk_viewports = ALLOCA_ARRAY(VkViewport, p_viewports.size());
5484
for (uint32_t i = 0; i < p_viewports.size(); i++) {
5485
vk_viewports[i] = {};
5486
vk_viewports[i].x = p_viewports[i].position.x;
5487
vk_viewports[i].y = p_viewports[i].position.y;
5488
vk_viewports[i].width = p_viewports[i].size.x;
5489
vk_viewports[i].height = p_viewports[i].size.y;
5490
vk_viewports[i].minDepth = 0.0f;
5491
vk_viewports[i].maxDepth = 1.0f;
5492
}
5493
vkCmdSetViewport(command_buffer->vk_command_buffer, 0, p_viewports.size(), vk_viewports);
5494
}
5495
5496
void RenderingDeviceDriverVulkan::command_render_set_scissor(CommandBufferID p_cmd_buffer, VectorView<Rect2i> p_scissors) {
5497
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5498
vkCmdSetScissor(command_buffer->vk_command_buffer, 0, p_scissors.size(), (VkRect2D *)p_scissors.ptr());
5499
}
5500
5501
void RenderingDeviceDriverVulkan::command_render_clear_attachments(CommandBufferID p_cmd_buffer, VectorView<AttachmentClear> p_attachment_clears, VectorView<Rect2i> p_rects) {
5502
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5503
5504
VkClearAttachment *vk_clears = ALLOCA_ARRAY(VkClearAttachment, p_attachment_clears.size());
5505
for (uint32_t i = 0; i < p_attachment_clears.size(); i++) {
5506
vk_clears[i] = {};
5507
memcpy(&vk_clears[i].clearValue, &p_attachment_clears[i].value, sizeof(VkClearValue));
5508
vk_clears[i].colorAttachment = p_attachment_clears[i].color_attachment;
5509
vk_clears[i].aspectMask = p_attachment_clears[i].aspect;
5510
}
5511
5512
VkClearRect *vk_rects = ALLOCA_ARRAY(VkClearRect, p_rects.size());
5513
for (uint32_t i = 0; i < p_rects.size(); i++) {
5514
vk_rects[i] = {};
5515
vk_rects[i].rect.offset.x = p_rects[i].position.x;
5516
vk_rects[i].rect.offset.y = p_rects[i].position.y;
5517
vk_rects[i].rect.extent.width = p_rects[i].size.x;
5518
vk_rects[i].rect.extent.height = p_rects[i].size.y;
5519
vk_rects[i].baseArrayLayer = 0;
5520
vk_rects[i].layerCount = 1;
5521
}
5522
5523
vkCmdClearAttachments(command_buffer->vk_command_buffer, p_attachment_clears.size(), vk_clears, p_rects.size(), vk_rects);
5524
}
5525
5526
void RenderingDeviceDriverVulkan::command_bind_render_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {
5527
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5528
vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, (VkPipeline)p_pipeline.id);
5529
}
5530
5531
void RenderingDeviceDriverVulkan::command_bind_render_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
5532
if (p_set_count == 0) {
5533
return;
5534
}
5535
5536
thread_local LocalVector<VkDescriptorSet> sets;
5537
sets.clear();
5538
sets.resize(p_set_count);
5539
5540
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
5541
uint32_t shift = 0u;
5542
uint32_t curr_dynamic_offset = 0u;
5543
5544
for (uint32_t i = 0; i < p_set_count; i++) {
5545
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
5546
5547
sets[i] = usi->vk_descriptor_set;
5548
5549
// At this point this assert should already have been validated.
5550
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
5551
5552
const uint32_t dynamic_offset_count = usi->dynamic_buffers.size();
5553
for (uint32_t j = 0u; j < dynamic_offset_count; ++j) {
5554
const uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xFu;
5555
shift += 4u;
5556
dynamic_offsets[curr_dynamic_offset++] = uint32_t(frame_idx * usi->dynamic_buffers[j]->size);
5557
}
5558
}
5559
5560
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5561
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
5562
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_GRAPHICS, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], curr_dynamic_offset, dynamic_offsets);
5563
}
5564
5565
void RenderingDeviceDriverVulkan::command_render_draw(CommandBufferID p_cmd_buffer, uint32_t p_vertex_count, uint32_t p_instance_count, uint32_t p_base_vertex, uint32_t p_first_instance) {
5566
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5567
vkCmdDraw(command_buffer->vk_command_buffer, p_vertex_count, p_instance_count, p_base_vertex, p_first_instance);
5568
}
5569
5570
void RenderingDeviceDriverVulkan::command_render_draw_indexed(CommandBufferID p_cmd_buffer, uint32_t p_index_count, uint32_t p_instance_count, uint32_t p_first_index, int32_t p_vertex_offset, uint32_t p_first_instance) {
5571
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5572
vkCmdDrawIndexed(command_buffer->vk_command_buffer, p_index_count, p_instance_count, p_first_index, p_vertex_offset, p_first_instance);
5573
}
5574
5575
void RenderingDeviceDriverVulkan::command_render_draw_indexed_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
5576
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5577
const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id;
5578
vkCmdDrawIndexedIndirect(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_draw_count, p_stride);
5579
}
5580
5581
void RenderingDeviceDriverVulkan::command_render_draw_indexed_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
5582
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5583
const BufferInfo *indirect_buf_info = (const BufferInfo *)p_indirect_buffer.id;
5584
const BufferInfo *count_buf_info = (const BufferInfo *)p_count_buffer.id;
5585
vkCmdDrawIndexedIndirectCount(command_buffer->vk_command_buffer, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride);
5586
}
5587
5588
void RenderingDeviceDriverVulkan::command_render_draw_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, uint32_t p_draw_count, uint32_t p_stride) {
5589
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5590
const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id;
5591
vkCmdDrawIndirect(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_draw_count, p_stride);
5592
}
5593
5594
void RenderingDeviceDriverVulkan::command_render_draw_indirect_count(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset, BufferID p_count_buffer, uint64_t p_count_buffer_offset, uint32_t p_max_draw_count, uint32_t p_stride) {
5595
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5596
const BufferInfo *indirect_buf_info = (const BufferInfo *)p_indirect_buffer.id;
5597
const BufferInfo *count_buf_info = (const BufferInfo *)p_count_buffer.id;
5598
vkCmdDrawIndirectCount(command_buffer->vk_command_buffer, indirect_buf_info->vk_buffer, p_offset, count_buf_info->vk_buffer, p_count_buffer_offset, p_max_draw_count, p_stride);
5599
}
5600
5601
void RenderingDeviceDriverVulkan::command_render_bind_vertex_buffers(CommandBufferID p_cmd_buffer, uint32_t p_binding_count, const BufferID *p_buffers, const uint64_t *p_offsets, uint64_t p_dynamic_offsets) {
5602
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5603
VkBuffer *vk_buffers = ALLOCA_ARRAY(VkBuffer, p_binding_count);
5604
uint64_t *vk_offsets = ALLOCA_ARRAY(uint64_t, p_binding_count);
5605
for (uint32_t i = 0; i < p_binding_count; i++) {
5606
const BufferInfo *buf_info = (const BufferInfo *)p_buffers[i].id;
5607
uint64_t offset = p_offsets[i];
5608
if (buf_info->is_dynamic()) {
5609
uint64_t frame_idx = p_dynamic_offsets & 0x3; // Assuming max 4 frames.
5610
p_dynamic_offsets >>= 2;
5611
offset += frame_idx * buf_info->size;
5612
}
5613
vk_buffers[i] = ((const BufferInfo *)p_buffers[i].id)->vk_buffer;
5614
vk_offsets[i] = offset;
5615
}
5616
vkCmdBindVertexBuffers(command_buffer->vk_command_buffer, 0, p_binding_count, vk_buffers, vk_offsets);
5617
}
5618
5619
void RenderingDeviceDriverVulkan::command_render_bind_index_buffer(CommandBufferID p_cmd_buffer, BufferID p_buffer, IndexBufferFormat p_format, uint64_t p_offset) {
5620
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5621
const BufferInfo *buf_info = (const BufferInfo *)p_buffer.id;
5622
vkCmdBindIndexBuffer(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset, p_format == INDEX_BUFFER_FORMAT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32);
5623
}
5624
5625
void RenderingDeviceDriverVulkan::command_render_set_blend_constants(CommandBufferID p_cmd_buffer, const Color &p_constants) {
5626
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5627
vkCmdSetBlendConstants(command_buffer->vk_command_buffer, p_constants.components);
5628
}
5629
5630
void RenderingDeviceDriverVulkan::command_render_set_line_width(CommandBufferID p_cmd_buffer, float p_width) {
5631
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
5632
vkCmdSetLineWidth(command_buffer->vk_command_buffer, p_width);
5633
}
5634
5635
// ----- PIPELINE -----
5636
5637
static const VkPrimitiveTopology RD_TO_VK_PRIMITIVE[RDD::RENDER_PRIMITIVE_MAX] = {
5638
VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
5639
VK_PRIMITIVE_TOPOLOGY_LINE_LIST,
5640
VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY,
5641
VK_PRIMITIVE_TOPOLOGY_LINE_STRIP,
5642
VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY,
5643
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
5644
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY,
5645
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
5646
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY,
5647
VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
5648
VK_PRIMITIVE_TOPOLOGY_PATCH_LIST,
5649
};
5650
5651
// RDD::PolygonCullMode == VkCullModeFlagBits.
5652
static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_DISABLED, VK_CULL_MODE_NONE));
5653
static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_FRONT, VK_CULL_MODE_FRONT_BIT));
5654
static_assert(ENUM_MEMBERS_EQUAL(RDD::POLYGON_CULL_BACK, VK_CULL_MODE_BACK_BIT));
5655
5656
// RDD::StencilOperation == VkStencilOp.
5657
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP));
5658
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_ZERO, VK_STENCIL_OP_ZERO));
5659
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_REPLACE, VK_STENCIL_OP_REPLACE));
5660
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_CLAMP, VK_STENCIL_OP_INCREMENT_AND_CLAMP));
5661
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_CLAMP, VK_STENCIL_OP_DECREMENT_AND_CLAMP));
5662
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INVERT, VK_STENCIL_OP_INVERT));
5663
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_INCREMENT_AND_WRAP, VK_STENCIL_OP_INCREMENT_AND_WRAP));
5664
static_assert(ENUM_MEMBERS_EQUAL(RDD::STENCIL_OP_DECREMENT_AND_WRAP, VK_STENCIL_OP_DECREMENT_AND_WRAP));
5665
5666
// RDD::LogicOperation == VkLogicOp.
5667
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_CLEAR, VK_LOGIC_OP_CLEAR));
5668
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_AND, VK_LOGIC_OP_AND));
5669
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_AND_REVERSE, VK_LOGIC_OP_AND_REVERSE));
5670
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_COPY, VK_LOGIC_OP_COPY));
5671
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_AND_INVERTED, VK_LOGIC_OP_AND_INVERTED));
5672
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_NO_OP, VK_LOGIC_OP_NO_OP));
5673
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_XOR, VK_LOGIC_OP_XOR));
5674
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_OR, VK_LOGIC_OP_OR));
5675
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_NOR, VK_LOGIC_OP_NOR));
5676
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_EQUIVALENT, VK_LOGIC_OP_EQUIVALENT));
5677
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_INVERT, VK_LOGIC_OP_INVERT));
5678
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_OR_REVERSE, VK_LOGIC_OP_OR_REVERSE));
5679
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_COPY_INVERTED, VK_LOGIC_OP_COPY_INVERTED));
5680
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_OR_INVERTED, VK_LOGIC_OP_OR_INVERTED));
5681
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_NAND, VK_LOGIC_OP_NAND));
5682
static_assert(ENUM_MEMBERS_EQUAL(RDD::LOGIC_OP_SET, VK_LOGIC_OP_SET));
5683
5684
// RDD::BlendFactor == VkBlendFactor.
5685
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO));
5686
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ONE, VK_BLEND_FACTOR_ONE));
5687
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_SRC_COLOR, VK_BLEND_FACTOR_SRC_COLOR));
5688
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ONE_MINUS_SRC_COLOR, VK_BLEND_FACTOR_ONE_MINUS_SRC_COLOR));
5689
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_DST_COLOR, VK_BLEND_FACTOR_DST_COLOR));
5690
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ONE_MINUS_DST_COLOR, VK_BLEND_FACTOR_ONE_MINUS_DST_COLOR));
5691
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_SRC_ALPHA, VK_BLEND_FACTOR_SRC_ALPHA));
5692
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ONE_MINUS_SRC_ALPHA, VK_BLEND_FACTOR_ONE_MINUS_SRC_ALPHA));
5693
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_DST_ALPHA, VK_BLEND_FACTOR_DST_ALPHA));
5694
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ONE_MINUS_DST_ALPHA, VK_BLEND_FACTOR_ONE_MINUS_DST_ALPHA));
5695
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_CONSTANT_COLOR, VK_BLEND_FACTOR_CONSTANT_COLOR));
5696
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR, VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_COLOR));
5697
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_CONSTANT_ALPHA, VK_BLEND_FACTOR_CONSTANT_ALPHA));
5698
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA, VK_BLEND_FACTOR_ONE_MINUS_CONSTANT_ALPHA));
5699
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_SRC_ALPHA_SATURATE, VK_BLEND_FACTOR_SRC_ALPHA_SATURATE));
5700
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_SRC1_COLOR, VK_BLEND_FACTOR_SRC1_COLOR));
5701
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ONE_MINUS_SRC1_COLOR, VK_BLEND_FACTOR_ONE_MINUS_SRC1_COLOR));
5702
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_SRC1_ALPHA, VK_BLEND_FACTOR_SRC1_ALPHA));
5703
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA, VK_BLEND_FACTOR_ONE_MINUS_SRC1_ALPHA));
5704
5705
// RDD::BlendOperation == VkBlendOp.
5706
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_ADD, VK_BLEND_OP_ADD));
5707
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_SUBTRACT, VK_BLEND_OP_SUBTRACT));
5708
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_REVERSE_SUBTRACT, VK_BLEND_OP_REVERSE_SUBTRACT));
5709
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MINIMUM, VK_BLEND_OP_MIN));
5710
static_assert(ENUM_MEMBERS_EQUAL(RDD::BLEND_OP_MAXIMUM, VK_BLEND_OP_MAX));
5711
5712
RDD::PipelineID RenderingDeviceDriverVulkan::render_pipeline_create(
5713
ShaderID p_shader,
5714
VertexFormatID p_vertex_format,
5715
RenderPrimitive p_render_primitive,
5716
PipelineRasterizationState p_rasterization_state,
5717
PipelineMultisampleState p_multisample_state,
5718
PipelineDepthStencilState p_depth_stencil_state,
5719
PipelineColorBlendState p_blend_state,
5720
VectorView<int32_t> p_color_attachments,
5721
BitField<PipelineDynamicStateFlags> p_dynamic_state,
5722
RenderPassID p_render_pass,
5723
uint32_t p_render_subpass,
5724
VectorView<PipelineSpecializationConstant> p_specialization_constants) {
5725
// Vertex.
5726
const VkPipelineVertexInputStateCreateInfo *vertex_input_state_create_info = nullptr;
5727
if (p_vertex_format.id) {
5728
const VertexFormatInfo *vf_info = (const VertexFormatInfo *)p_vertex_format.id;
5729
vertex_input_state_create_info = &vf_info->vk_create_info;
5730
} else {
5731
VkPipelineVertexInputStateCreateInfo *null_vertex_input_state = ALLOCA_SINGLE(VkPipelineVertexInputStateCreateInfo);
5732
*null_vertex_input_state = {};
5733
null_vertex_input_state->sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO;
5734
vertex_input_state_create_info = null_vertex_input_state;
5735
}
5736
5737
// Input assembly.
5738
VkPipelineInputAssemblyStateCreateInfo input_assembly_create_info = {};
5739
input_assembly_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO;
5740
input_assembly_create_info.topology = RD_TO_VK_PRIMITIVE[p_render_primitive];
5741
input_assembly_create_info.primitiveRestartEnable = (p_render_primitive == RENDER_PRIMITIVE_TRIANGLE_STRIPS_WITH_RESTART_INDEX);
5742
5743
// Tessellation.
5744
VkPipelineTessellationStateCreateInfo tessellation_create_info = {};
5745
tessellation_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO;
5746
ERR_FAIL_COND_V(physical_device_properties.limits.maxTessellationPatchSize > 0 && (p_rasterization_state.patch_control_points < 1 || p_rasterization_state.patch_control_points > physical_device_properties.limits.maxTessellationPatchSize), PipelineID());
5747
tessellation_create_info.patchControlPoints = p_rasterization_state.patch_control_points;
5748
5749
// Viewport.
5750
VkPipelineViewportStateCreateInfo viewport_state_create_info = {};
5751
viewport_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;
5752
viewport_state_create_info.viewportCount = 1; // If VR extensions are supported at some point, this will have to be customizable in the framebuffer format.
5753
viewport_state_create_info.scissorCount = 1;
5754
5755
// Rasterization.
5756
VkPipelineRasterizationStateCreateInfo rasterization_state_create_info = {};
5757
rasterization_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO;
5758
rasterization_state_create_info.depthClampEnable = p_rasterization_state.enable_depth_clamp;
5759
rasterization_state_create_info.rasterizerDiscardEnable = p_rasterization_state.discard_primitives;
5760
rasterization_state_create_info.polygonMode = p_rasterization_state.wireframe ? VK_POLYGON_MODE_LINE : VK_POLYGON_MODE_FILL;
5761
rasterization_state_create_info.cullMode = (PolygonCullMode)p_rasterization_state.cull_mode;
5762
rasterization_state_create_info.frontFace = (p_rasterization_state.front_face == POLYGON_FRONT_FACE_CLOCKWISE ? VK_FRONT_FACE_CLOCKWISE : VK_FRONT_FACE_COUNTER_CLOCKWISE);
5763
rasterization_state_create_info.depthBiasEnable = p_rasterization_state.depth_bias_enabled;
5764
rasterization_state_create_info.depthBiasConstantFactor = p_rasterization_state.depth_bias_constant_factor;
5765
rasterization_state_create_info.depthBiasClamp = p_rasterization_state.depth_bias_clamp;
5766
rasterization_state_create_info.depthBiasSlopeFactor = p_rasterization_state.depth_bias_slope_factor;
5767
rasterization_state_create_info.lineWidth = p_rasterization_state.line_width;
5768
5769
// Multisample.
5770
VkPipelineMultisampleStateCreateInfo multisample_state_create_info = {};
5771
multisample_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO;
5772
multisample_state_create_info.rasterizationSamples = _ensure_supported_sample_count(p_multisample_state.sample_count);
5773
multisample_state_create_info.sampleShadingEnable = p_multisample_state.enable_sample_shading;
5774
multisample_state_create_info.minSampleShading = p_multisample_state.min_sample_shading;
5775
if (p_multisample_state.sample_mask.size()) {
5776
static_assert(ARRAYS_COMPATIBLE(uint32_t, VkSampleMask));
5777
multisample_state_create_info.pSampleMask = p_multisample_state.sample_mask.ptr();
5778
} else {
5779
multisample_state_create_info.pSampleMask = nullptr;
5780
}
5781
multisample_state_create_info.alphaToCoverageEnable = p_multisample_state.enable_alpha_to_coverage;
5782
multisample_state_create_info.alphaToOneEnable = p_multisample_state.enable_alpha_to_one;
5783
5784
// Depth stencil.
5785
5786
VkPipelineDepthStencilStateCreateInfo depth_stencil_state_create_info = {};
5787
depth_stencil_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO;
5788
depth_stencil_state_create_info.depthTestEnable = p_depth_stencil_state.enable_depth_test;
5789
depth_stencil_state_create_info.depthWriteEnable = p_depth_stencil_state.enable_depth_write;
5790
depth_stencil_state_create_info.depthCompareOp = (VkCompareOp)p_depth_stencil_state.depth_compare_operator;
5791
depth_stencil_state_create_info.depthBoundsTestEnable = p_depth_stencil_state.enable_depth_range;
5792
depth_stencil_state_create_info.stencilTestEnable = p_depth_stencil_state.enable_stencil;
5793
5794
depth_stencil_state_create_info.front.failOp = (VkStencilOp)p_depth_stencil_state.front_op.fail;
5795
depth_stencil_state_create_info.front.passOp = (VkStencilOp)p_depth_stencil_state.front_op.pass;
5796
depth_stencil_state_create_info.front.depthFailOp = (VkStencilOp)p_depth_stencil_state.front_op.depth_fail;
5797
depth_stencil_state_create_info.front.compareOp = (VkCompareOp)p_depth_stencil_state.front_op.compare;
5798
depth_stencil_state_create_info.front.compareMask = p_depth_stencil_state.front_op.compare_mask;
5799
depth_stencil_state_create_info.front.writeMask = p_depth_stencil_state.front_op.write_mask;
5800
depth_stencil_state_create_info.front.reference = p_depth_stencil_state.front_op.reference;
5801
5802
depth_stencil_state_create_info.back.failOp = (VkStencilOp)p_depth_stencil_state.back_op.fail;
5803
depth_stencil_state_create_info.back.passOp = (VkStencilOp)p_depth_stencil_state.back_op.pass;
5804
depth_stencil_state_create_info.back.depthFailOp = (VkStencilOp)p_depth_stencil_state.back_op.depth_fail;
5805
depth_stencil_state_create_info.back.compareOp = (VkCompareOp)p_depth_stencil_state.back_op.compare;
5806
depth_stencil_state_create_info.back.compareMask = p_depth_stencil_state.back_op.compare_mask;
5807
depth_stencil_state_create_info.back.writeMask = p_depth_stencil_state.back_op.write_mask;
5808
depth_stencil_state_create_info.back.reference = p_depth_stencil_state.back_op.reference;
5809
5810
depth_stencil_state_create_info.minDepthBounds = p_depth_stencil_state.depth_range_min;
5811
depth_stencil_state_create_info.maxDepthBounds = p_depth_stencil_state.depth_range_max;
5812
5813
// Blend state.
5814
5815
VkPipelineColorBlendStateCreateInfo color_blend_state_create_info = {};
5816
color_blend_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO;
5817
color_blend_state_create_info.logicOpEnable = p_blend_state.enable_logic_op;
5818
color_blend_state_create_info.logicOp = (VkLogicOp)p_blend_state.logic_op;
5819
5820
VkPipelineColorBlendAttachmentState *vk_attachment_states = ALLOCA_ARRAY(VkPipelineColorBlendAttachmentState, p_color_attachments.size());
5821
{
5822
for (uint32_t i = 0; i < p_color_attachments.size(); i++) {
5823
vk_attachment_states[i] = {};
5824
if (p_color_attachments[i] != ATTACHMENT_UNUSED) {
5825
vk_attachment_states[i].blendEnable = p_blend_state.attachments[i].enable_blend;
5826
5827
vk_attachment_states[i].srcColorBlendFactor = (VkBlendFactor)p_blend_state.attachments[i].src_color_blend_factor;
5828
vk_attachment_states[i].dstColorBlendFactor = (VkBlendFactor)p_blend_state.attachments[i].dst_color_blend_factor;
5829
vk_attachment_states[i].colorBlendOp = (VkBlendOp)p_blend_state.attachments[i].color_blend_op;
5830
5831
vk_attachment_states[i].srcAlphaBlendFactor = (VkBlendFactor)p_blend_state.attachments[i].src_alpha_blend_factor;
5832
vk_attachment_states[i].dstAlphaBlendFactor = (VkBlendFactor)p_blend_state.attachments[i].dst_alpha_blend_factor;
5833
vk_attachment_states[i].alphaBlendOp = (VkBlendOp)p_blend_state.attachments[i].alpha_blend_op;
5834
5835
if (p_blend_state.attachments[i].write_r) {
5836
vk_attachment_states[i].colorWriteMask |= VK_COLOR_COMPONENT_R_BIT;
5837
}
5838
if (p_blend_state.attachments[i].write_g) {
5839
vk_attachment_states[i].colorWriteMask |= VK_COLOR_COMPONENT_G_BIT;
5840
}
5841
if (p_blend_state.attachments[i].write_b) {
5842
vk_attachment_states[i].colorWriteMask |= VK_COLOR_COMPONENT_B_BIT;
5843
}
5844
if (p_blend_state.attachments[i].write_a) {
5845
vk_attachment_states[i].colorWriteMask |= VK_COLOR_COMPONENT_A_BIT;
5846
}
5847
}
5848
}
5849
}
5850
color_blend_state_create_info.attachmentCount = p_color_attachments.size();
5851
color_blend_state_create_info.pAttachments = vk_attachment_states;
5852
5853
color_blend_state_create_info.blendConstants[0] = p_blend_state.blend_constant.r;
5854
color_blend_state_create_info.blendConstants[1] = p_blend_state.blend_constant.g;
5855
color_blend_state_create_info.blendConstants[2] = p_blend_state.blend_constant.b;
5856
color_blend_state_create_info.blendConstants[3] = p_blend_state.blend_constant.a;
5857
5858
// Dynamic state.
5859
5860
VkPipelineDynamicStateCreateInfo dynamic_state_create_info = {};
5861
dynamic_state_create_info.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO;
5862
5863
static const uint32_t MAX_DYN_STATE_COUNT = 9;
5864
VkDynamicState *vk_dynamic_states = ALLOCA_ARRAY(VkDynamicState, MAX_DYN_STATE_COUNT);
5865
uint32_t vk_dynamic_states_count = 0;
5866
5867
vk_dynamic_states[vk_dynamic_states_count] = VK_DYNAMIC_STATE_VIEWPORT; // Viewport and scissor are always dynamic.
5868
vk_dynamic_states_count++;
5869
vk_dynamic_states[vk_dynamic_states_count] = VK_DYNAMIC_STATE_SCISSOR;
5870
vk_dynamic_states_count++;
5871
if (p_dynamic_state.has_flag(DYNAMIC_STATE_LINE_WIDTH)) {
5872
vk_dynamic_states[vk_dynamic_states_count] = VK_DYNAMIC_STATE_LINE_WIDTH;
5873
vk_dynamic_states_count++;
5874
}
5875
if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BIAS)) {
5876
vk_dynamic_states[vk_dynamic_states_count] = VK_DYNAMIC_STATE_DEPTH_BIAS;
5877
vk_dynamic_states_count++;
5878
}
5879
if (p_dynamic_state.has_flag(DYNAMIC_STATE_BLEND_CONSTANTS)) {
5880
vk_dynamic_states[vk_dynamic_states_count] = VK_DYNAMIC_STATE_BLEND_CONSTANTS;
5881
vk_dynamic_states_count++;
5882
}
5883
if (p_dynamic_state.has_flag(DYNAMIC_STATE_DEPTH_BOUNDS)) {
5884
vk_dynamic_states[vk_dynamic_states_count] = VK_DYNAMIC_STATE_DEPTH_BOUNDS;
5885
vk_dynamic_states_count++;
5886
}
5887
if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_COMPARE_MASK)) {
5888
vk_dynamic_states[vk_dynamic_states_count] = VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK;
5889
vk_dynamic_states_count++;
5890
}
5891
if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_WRITE_MASK)) {
5892
vk_dynamic_states[vk_dynamic_states_count] = VK_DYNAMIC_STATE_STENCIL_WRITE_MASK;
5893
vk_dynamic_states_count++;
5894
}
5895
if (p_dynamic_state.has_flag(DYNAMIC_STATE_STENCIL_REFERENCE)) {
5896
vk_dynamic_states[vk_dynamic_states_count] = VK_DYNAMIC_STATE_STENCIL_REFERENCE;
5897
vk_dynamic_states_count++;
5898
}
5899
DEV_ASSERT(vk_dynamic_states_count <= MAX_DYN_STATE_COUNT);
5900
5901
dynamic_state_create_info.dynamicStateCount = vk_dynamic_states_count;
5902
dynamic_state_create_info.pDynamicStates = vk_dynamic_states;
5903
5904
void *graphics_pipeline_nextptr = nullptr;
5905
5906
if (fsr_capabilities.attachment_supported) {
5907
// Fragment shading rate.
5908
// If FSR is used, this defines how the different FSR types are combined.
5909
// combinerOps[0] decides how we use the output of pipeline and primitive (drawcall) FSR.
5910
// combinerOps[1] decides how we use the output of combinerOps[0] and our attachment FSR.
5911
5912
VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_create_info = ALLOCA_SINGLE(VkPipelineFragmentShadingRateStateCreateInfoKHR);
5913
*fsr_create_info = {};
5914
fsr_create_info->sType = VK_STRUCTURE_TYPE_PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR;
5915
fsr_create_info->fragmentSize = { 4, 4 };
5916
fsr_create_info->combinerOps[0] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR; // We don't use pipeline/primitive FSR so this really doesn't matter.
5917
fsr_create_info->combinerOps[1] = VK_FRAGMENT_SHADING_RATE_COMBINER_OP_REPLACE_KHR; // Always use the outcome of attachment FSR if enabled.
5918
5919
graphics_pipeline_nextptr = fsr_create_info;
5920
}
5921
5922
// Finally, pipeline create info.
5923
5924
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
5925
5926
VkGraphicsPipelineCreateInfo pipeline_create_info = {};
5927
5928
pipeline_create_info.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO;
5929
pipeline_create_info.pNext = graphics_pipeline_nextptr;
5930
pipeline_create_info.stageCount = shader_info->vk_stages_create_info.size();
5931
5932
ERR_FAIL_COND_V_MSG(pipeline_create_info.stageCount == 0, PipelineID(),
5933
"Cannot create pipeline without shader module, please make sure shader modules are destroyed only after all associated pipelines are created.");
5934
VkPipelineShaderStageCreateInfo *vk_pipeline_stages = ALLOCA_ARRAY(VkPipelineShaderStageCreateInfo, shader_info->vk_stages_create_info.size());
5935
5936
thread_local std::vector<uint8_t> respv_optimized_data;
5937
thread_local LocalVector<respv::SpecConstant> respv_spec_constants;
5938
thread_local LocalVector<VkShaderModule> respv_shader_modules;
5939
thread_local LocalVector<VkSpecializationMapEntry> specialization_entries;
5940
5941
#if RECORD_PIPELINE_STATISTICS
5942
thread_local LocalVector<uint64_t> respv_run_time;
5943
thread_local LocalVector<uint64_t> respv_size;
5944
uint32_t stage_count = shader_info->vk_stages_create_info.size();
5945
respv_run_time.clear();
5946
respv_size.clear();
5947
respv_run_time.resize_initialized(stage_count);
5948
respv_size.resize_initialized(stage_count);
5949
#endif
5950
5951
respv_shader_modules.clear();
5952
specialization_entries.clear();
5953
5954
for (uint32_t i = 0; i < shader_info->vk_stages_create_info.size(); i++) {
5955
vk_pipeline_stages[i] = shader_info->vk_stages_create_info[i];
5956
5957
if (p_specialization_constants.size()) {
5958
bool use_pipeline_spec_constants = true;
5959
if ((i < shader_info->respv_stage_shaders.size()) && !shader_info->respv_stage_shaders[i].empty()) {
5960
#if RECORD_PIPELINE_STATISTICS
5961
uint64_t respv_start_time = OS::get_singleton()->get_ticks_usec();
5962
#endif
5963
// Attempt to optimize the shader using re-spirv before relying on the driver.
5964
respv_spec_constants.resize(p_specialization_constants.size());
5965
for (uint32_t j = 0; j < p_specialization_constants.size(); j++) {
5966
respv_spec_constants[j].specId = p_specialization_constants[j].constant_id;
5967
respv_spec_constants[j].values.resize(1);
5968
respv_spec_constants[j].values[0] = p_specialization_constants[j].int_value;
5969
}
5970
5971
respv::Options respv_options;
5972
#if RESPV_DONT_REMOVE_DEAD_CODE
5973
respv_options.removeDeadCode = false;
5974
#endif
5975
if (respv::Optimizer::run(shader_info->respv_stage_shaders[i], respv_spec_constants.ptr(), respv_spec_constants.size(), respv_optimized_data, respv_options)) {
5976
#if RESPV_VERBOSE
5977
String spec_constants;
5978
for (uint32_t j = 0; j < p_specialization_constants.size(); j++) {
5979
spec_constants += vformat("%d: %d", p_specialization_constants[j].constant_id, p_specialization_constants[j].int_value);
5980
if (j < p_specialization_constants.size() - 1) {
5981
spec_constants += ", ";
5982
}
5983
}
5984
5985
print_line(vformat("re-spirv transformed the shader from %d bytes to %d bytes with constants %s (%d).", shader_info->respv_stage_shaders[i].inlinedSpirvWords.size() * sizeof(uint32_t), respv_optimized_data.size(), spec_constants, p_shader.id));
5986
#endif
5987
5988
// Create the shader module with the optimized output.
5989
VkShaderModule shader_module = VK_NULL_HANDLE;
5990
VkShaderModuleCreateInfo shader_module_create_info = {};
5991
shader_module_create_info.sType = VK_STRUCTURE_TYPE_SHADER_MODULE_CREATE_INFO;
5992
shader_module_create_info.pCode = (const uint32_t *)(respv_optimized_data.data());
5993
shader_module_create_info.codeSize = respv_optimized_data.size();
5994
VkResult err = vkCreateShaderModule(vk_device, &shader_module_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE), &shader_module);
5995
if (err == VK_SUCCESS) {
5996
// Replace the module used in the creation info.
5997
vk_pipeline_stages[i].module = shader_module;
5998
respv_shader_modules.push_back(shader_module);
5999
use_pipeline_spec_constants = false;
6000
}
6001
6002
#if RECORD_PIPELINE_STATISTICS
6003
respv_run_time[i] = OS::get_singleton()->get_ticks_usec() - respv_start_time;
6004
respv_size[i] = respv_optimized_data.size();
6005
#endif
6006
} else {
6007
#if RESPV_VERBOSE
6008
print_line("re-spirv failed to optimize the shader.");
6009
#endif
6010
}
6011
}
6012
6013
if (use_pipeline_spec_constants) {
6014
// Use specialization constants through the driver.
6015
if (specialization_entries.is_empty()) {
6016
specialization_entries.resize(p_specialization_constants.size());
6017
for (uint32_t j = 0; j < p_specialization_constants.size(); j++) {
6018
specialization_entries[j] = {};
6019
specialization_entries[j].constantID = p_specialization_constants[j].constant_id;
6020
specialization_entries[j].offset = (const char *)&p_specialization_constants[j].int_value - (const char *)p_specialization_constants.ptr();
6021
specialization_entries[j].size = sizeof(uint32_t);
6022
}
6023
}
6024
6025
VkSpecializationInfo *specialization_info = ALLOCA_SINGLE(VkSpecializationInfo);
6026
*specialization_info = {};
6027
specialization_info->dataSize = p_specialization_constants.size() * sizeof(PipelineSpecializationConstant);
6028
specialization_info->pData = p_specialization_constants.ptr();
6029
specialization_info->mapEntryCount = specialization_entries.size();
6030
specialization_info->pMapEntries = specialization_entries.ptr();
6031
6032
vk_pipeline_stages[i].pSpecializationInfo = specialization_info;
6033
}
6034
}
6035
}
6036
6037
const RenderPassInfo *render_pass = (const RenderPassInfo *)(p_render_pass.id);
6038
pipeline_create_info.pStages = vk_pipeline_stages;
6039
pipeline_create_info.pVertexInputState = vertex_input_state_create_info;
6040
pipeline_create_info.pInputAssemblyState = &input_assembly_create_info;
6041
pipeline_create_info.pTessellationState = &tessellation_create_info;
6042
pipeline_create_info.pViewportState = &viewport_state_create_info;
6043
pipeline_create_info.pRasterizationState = &rasterization_state_create_info;
6044
pipeline_create_info.pMultisampleState = &multisample_state_create_info;
6045
pipeline_create_info.pDepthStencilState = &depth_stencil_state_create_info;
6046
pipeline_create_info.pColorBlendState = &color_blend_state_create_info;
6047
pipeline_create_info.pDynamicState = &dynamic_state_create_info;
6048
pipeline_create_info.layout = shader_info->vk_pipeline_layout;
6049
pipeline_create_info.renderPass = render_pass->vk_render_pass;
6050
pipeline_create_info.subpass = p_render_subpass;
6051
6052
#if RECORD_PIPELINE_STATISTICS
6053
uint64_t pipeline_start_time = OS::get_singleton()->get_ticks_usec();
6054
#endif
6055
6056
VkPipeline vk_pipeline = VK_NULL_HANDLE;
6057
VkResult err = vkCreateGraphicsPipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE), &vk_pipeline);
6058
ERR_FAIL_COND_V_MSG(err, PipelineID(), "vkCreateGraphicsPipelines failed with error " + itos(err) + ".");
6059
6060
#if RECORD_PIPELINE_STATISTICS
6061
{
6062
MutexLock lock(pipeline_statistics.file_access_mutex);
6063
uint64_t pipeline_creation_time = OS::get_singleton()->get_ticks_usec() - pipeline_start_time;
6064
for (uint32_t i = 0; i < shader_info->vk_stages_create_info.size(); i++) {
6065
PackedStringArray csv_array = {
6066
shader_info->name,
6067
String::num_uint64(hash_murmur3_buffer(shader_info->spirv_stage_bytes[i].ptr(), shader_info->spirv_stage_bytes[i].size())),
6068
String::num_uint64(i),
6069
String::num_uint64(respv_size[i] > 0),
6070
String::num_uint64(shader_info->original_stage_size[i]),
6071
String::num_uint64(respv_size[i] > 0 ? respv_size[i] : shader_info->spirv_stage_bytes[i].size()),
6072
String::num_uint64(respv_run_time[i] + pipeline_creation_time)
6073
};
6074
6075
pipeline_statistics.file_access->store_csv_line(csv_array);
6076
}
6077
6078
pipeline_statistics.file_access->flush();
6079
}
6080
#endif
6081
6082
// Destroy any modules created temporarily by re-spirv.
6083
for (VkShaderModule vk_module : respv_shader_modules) {
6084
vkDestroyShaderModule(vk_device, vk_module, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_SHADER_MODULE));
6085
}
6086
6087
return PipelineID(vk_pipeline);
6088
}
6089
6090
/********************/
6091
/**** RAYTRACING ****/
6092
/********************/
6093
6094
// RDD::AccelerationStructureGeometryBits == VkGeometryFlagsKHR.
6095
static_assert(ENUM_MEMBERS_EQUAL(RDD::ACCELERATION_STRUCTURE_GEOMETRY_OPAQUE, VK_GEOMETRY_OPAQUE_BIT_KHR));
6096
static_assert(ENUM_MEMBERS_EQUAL(RDD::ACCELERATION_STRUCTURE_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION, VK_GEOMETRY_NO_DUPLICATE_ANY_HIT_INVOCATION_BIT_KHR));
6097
6098
RDD::AccelerationStructureID RenderingDeviceDriverVulkan::blas_create(BufferID p_vertex_buffer, uint64_t p_vertex_offset, VertexFormatID p_vertex_format, uint32_t p_vertex_count, uint32_t p_position_attribute_location, BufferID p_index_buffer, IndexBufferFormat p_index_format, uint64_t p_index_offset_bytes, uint32_t p_index_count, BitField<AccelerationStructureGeometryBits> p_geometry_bits) {
6099
#if VULKAN_RAYTRACING_ENABLED
6100
const VertexFormatInfo *vf_info = (const VertexFormatInfo *)p_vertex_format.id;
6101
6102
const VkVertexInputAttributeDescription *position_attribute = nullptr;
6103
for (const VkVertexInputAttributeDescription &attribute : vf_info->vk_attributes) {
6104
if (attribute.location == p_position_attribute_location) {
6105
position_attribute = &attribute;
6106
break;
6107
}
6108
}
6109
ERR_FAIL_NULL_V_MSG(position_attribute, AccelerationStructureID(), "BLAS position attribute location is missing from the vertex format.");
6110
6111
uint32_t position_binding_index = position_attribute->binding;
6112
if (position_binding_index == UINT32_MAX) {
6113
position_binding_index = p_position_attribute_location;
6114
}
6115
6116
const VkVertexInputBindingDescription *position_binding = nullptr;
6117
for (const VkVertexInputBindingDescription &binding : vf_info->vk_bindings) {
6118
if (binding.binding == position_binding_index) {
6119
position_binding = &binding;
6120
break;
6121
}
6122
}
6123
ERR_FAIL_NULL_V_MSG(position_binding, AccelerationStructureID(), "BLAS position attribute binding is missing from the vertex format.");
6124
6125
VkDeviceSize buffer_offset = position_attribute->offset;
6126
6127
VkDeviceAddress vertex_address = buffer_get_device_address(p_vertex_buffer) + buffer_offset;
6128
VkDeviceAddress index_address = buffer_get_device_address(p_index_buffer) + p_index_offset_bytes;
6129
6130
VkDeviceSize vertex_stride = position_binding->stride;
6131
VkFormat vertex_format = position_attribute->format;
6132
uint32_t max_vertex = p_vertex_count ? p_vertex_count - 1 : 0;
6133
6134
AccelerationStructureInfo *accel_info = VersatileResource::allocate<AccelerationStructureInfo>(resources_allocator);
6135
6136
accel_info->geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
6137
accel_info->geometry.geometryType = VK_GEOMETRY_TYPE_TRIANGLES_KHR;
6138
accel_info->geometry.flags = p_geometry_bits;
6139
6140
accel_info->geometry.geometry.triangles.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_TRIANGLES_DATA_KHR;
6141
accel_info->geometry.geometry.triangles.vertexFormat = vertex_format;
6142
accel_info->geometry.geometry.triangles.vertexData.deviceAddress = vertex_address;
6143
accel_info->geometry.geometry.triangles.vertexStride = vertex_stride;
6144
accel_info->geometry.geometry.triangles.indexType = p_index_format == INDEX_BUFFER_FORMAT_UINT16 ? VK_INDEX_TYPE_UINT16 : VK_INDEX_TYPE_UINT32;
6145
accel_info->geometry.geometry.triangles.indexData.deviceAddress = index_address;
6146
accel_info->geometry.geometry.triangles.transformData.deviceAddress = 0;
6147
// Number of vertices in vertexData minus one, aka max vertex index.
6148
accel_info->geometry.geometry.triangles.maxVertex = max_vertex;
6149
6150
// Info for building BLAS.
6151
uint32_t primitive_count = p_vertex_count / 3;
6152
if (p_index_buffer) {
6153
primitive_count = p_index_count / 3;
6154
}
6155
// The vertex offset is expressed in bytes.
6156
uint32_t first_vertex = p_vertex_offset / vertex_stride;
6157
accel_info->range_info.firstVertex = first_vertex;
6158
accel_info->range_info.primitiveCount = primitive_count;
6159
accel_info->range_info.primitiveOffset = 0;
6160
accel_info->range_info.transformOffset = 0;
6161
uint32_t max_primitive_count = accel_info->range_info.primitiveCount;
6162
6163
accel_info->build_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR;
6164
accel_info->build_info.type = VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR;
6165
accel_info->build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
6166
accel_info->build_info.pGeometries = &accel_info->geometry;
6167
accel_info->build_info.geometryCount = 1;
6168
accel_info->build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
6169
6170
VkAccelerationStructureBuildSizesInfoKHR size_info = {};
6171
size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
6172
6173
vkGetAccelerationStructureBuildSizesKHR(vk_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accel_info->build_info, &max_primitive_count, &size_info);
6174
_acceleration_structure_create(VK_ACCELERATION_STRUCTURE_TYPE_BOTTOM_LEVEL_KHR, size_info, accel_info);
6175
6176
return AccelerationStructureID(accel_info);
6177
#else
6178
return AccelerationStructureID();
6179
#endif
6180
}
6181
6182
#if VULKAN_RAYTRACING_ENABLED
6183
static _FORCE_INLINE_ void _store_transform_transposed_3x4(const Transform3D &p_mtx, VkTransformMatrixKHR &r_mtx) {
6184
r_mtx.matrix[0][0] = p_mtx.basis.rows[0][0];
6185
r_mtx.matrix[0][1] = p_mtx.basis.rows[0][1];
6186
r_mtx.matrix[0][2] = p_mtx.basis.rows[0][2];
6187
r_mtx.matrix[0][3] = p_mtx.origin.x;
6188
r_mtx.matrix[1][0] = p_mtx.basis.rows[1][0];
6189
r_mtx.matrix[1][1] = p_mtx.basis.rows[1][1];
6190
r_mtx.matrix[1][2] = p_mtx.basis.rows[1][2];
6191
r_mtx.matrix[1][3] = p_mtx.origin.y;
6192
r_mtx.matrix[2][0] = p_mtx.basis.rows[2][0];
6193
r_mtx.matrix[2][1] = p_mtx.basis.rows[2][1];
6194
r_mtx.matrix[2][2] = p_mtx.basis.rows[2][2];
6195
r_mtx.matrix[2][3] = p_mtx.origin.z;
6196
}
6197
#endif
6198
6199
uint32_t RenderingDeviceDriverVulkan::tlas_instances_buffer_get_size_bytes(uint32_t p_instance_count) {
6200
#if VULKAN_RAYTRACING_ENABLED
6201
return p_instance_count * sizeof(VkAccelerationStructureInstanceKHR);
6202
#else
6203
return 0;
6204
#endif
6205
}
6206
6207
void RenderingDeviceDriverVulkan::tlas_instances_buffer_fill(BufferID p_instances_buffer, VectorView<AccelerationStructureID> p_blases, VectorView<Transform3D> p_transforms) {
6208
#if VULKAN_RAYTRACING_ENABLED
6209
uint32_t blases_count = p_blases.size();
6210
ERR_FAIL_COND_MSG(blases_count != p_transforms.size(), "Blases and transforms vectors must have the same size.");
6211
ERR_FAIL_COND(blases_count == 0);
6212
6213
LocalVector<VkAccelerationStructureInstanceKHR> instances;
6214
instances.resize(blases_count);
6215
6216
for (uint32_t i = 0; i < blases_count; ++i) {
6217
const AccelerationStructureID &blas = p_blases[i];
6218
AccelerationStructureInfo *blas_info = (AccelerationStructureInfo *)blas.id;
6219
6220
VkAccelerationStructureInstanceKHR &instance = instances[i];
6221
_store_transform_transposed_3x4(p_transforms[i], instance.transform);
6222
instance.instanceCustomIndex = i;
6223
instance.mask = 0xFF;
6224
instance.accelerationStructureReference = buffer_get_device_address(blas_info->buffer);
6225
instance.instanceShaderBindingTableRecordOffset = 0;
6226
instance.flags = VK_GEOMETRY_INSTANCE_TRIANGLE_FACING_CULL_DISABLE_BIT_KHR;
6227
}
6228
6229
uint8_t *data_ptr = buffer_map(p_instances_buffer);
6230
ERR_FAIL_NULL(data_ptr);
6231
uint32_t instances_size = blases_count * sizeof(instances[0]);
6232
memcpy(data_ptr, instances.ptr(), instances_size);
6233
buffer_unmap(p_instances_buffer);
6234
#endif
6235
}
6236
6237
RDD::AccelerationStructureID RenderingDeviceDriverVulkan::tlas_create(BufferID p_instances_buffer) {
6238
#if VULKAN_RAYTRACING_ENABLED
6239
ERR_FAIL_COND_V(p_instances_buffer == BufferID(), AccelerationStructureID());
6240
6241
AccelerationStructureInfo *accel_info = VersatileResource::allocate<AccelerationStructureInfo>(resources_allocator);
6242
6243
accel_info->geometry.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_KHR;
6244
accel_info->geometry.geometryType = VK_GEOMETRY_TYPE_INSTANCES_KHR;
6245
accel_info->geometry.geometry.instances.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_GEOMETRY_INSTANCES_DATA_KHR;
6246
accel_info->geometry.geometry.instances.data.deviceAddress = buffer_get_device_address(p_instances_buffer);
6247
6248
accel_info->build_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_GEOMETRY_INFO_KHR;
6249
accel_info->build_info.flags = VK_BUILD_ACCELERATION_STRUCTURE_PREFER_FAST_TRACE_BIT_KHR;
6250
accel_info->build_info.geometryCount = 1;
6251
accel_info->build_info.pGeometries = &accel_info->geometry;
6252
accel_info->build_info.mode = VK_BUILD_ACCELERATION_STRUCTURE_MODE_BUILD_KHR;
6253
accel_info->build_info.type = VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR;
6254
6255
uint32_t instance_count = buffer_get_allocation_size(p_instances_buffer) / sizeof(VkAccelerationStructureInstanceKHR);
6256
VkAccelerationStructureBuildSizesInfoKHR size_info = {};
6257
size_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_BUILD_SIZES_INFO_KHR;
6258
vkGetAccelerationStructureBuildSizesKHR(vk_device, VK_ACCELERATION_STRUCTURE_BUILD_TYPE_DEVICE_KHR, &accel_info->build_info, &instance_count, &size_info);
6259
accel_info->range_info.primitiveCount = instance_count;
6260
6261
_acceleration_structure_create(VK_ACCELERATION_STRUCTURE_TYPE_TOP_LEVEL_KHR, size_info, accel_info);
6262
return AccelerationStructureID(accel_info);
6263
#else
6264
return AccelerationStructureID();
6265
#endif
6266
}
6267
6268
#if VULKAN_RAYTRACING_ENABLED
6269
static VkDeviceAddress _align_up_address(VkDeviceAddress address, VkDeviceAddress alignment) {
6270
return (address + (alignment - 1)) & ~(alignment - 1);
6271
}
6272
#endif
6273
6274
void RenderingDeviceDriverVulkan::_acceleration_structure_create(VkAccelerationStructureTypeKHR p_type, VkAccelerationStructureBuildSizesInfoKHR p_size_info, AccelerationStructureInfo *r_accel_info) {
6275
#if VULKAN_RAYTRACING_ENABLED
6276
RDD::BufferID buffer = buffer_create(p_size_info.accelerationStructureSize, RDD::BUFFER_USAGE_ACCELERATION_STRUCTURE_STORAGE_BIT | RDD::BUFFER_USAGE_STORAGE_BIT | RDD::BUFFER_USAGE_DEVICE_ADDRESS_BIT, RDD::MEMORY_ALLOCATION_TYPE_GPU, UINT64_MAX);
6277
r_accel_info->buffer = buffer;
6278
6279
// Scratch address must be a multiple of minAccelerationStructureScratchOffsetAlignment.
6280
r_accel_info->scratch_alignment = acceleration_structure_capabilities.min_acceleration_structure_scratch_offset_alignment;
6281
r_accel_info->scratch_size = p_size_info.buildScratchSize + r_accel_info->scratch_alignment;
6282
6283
VkAccelerationStructureCreateInfoKHR accel_create_info = {};
6284
accel_create_info.sType = VK_STRUCTURE_TYPE_ACCELERATION_STRUCTURE_CREATE_INFO_KHR;
6285
accel_create_info.type = p_type;
6286
accel_create_info.size = p_size_info.accelerationStructureSize;
6287
accel_create_info.buffer = ((const BufferInfo *)buffer.id)->vk_buffer;
6288
VkResult err = vkCreateAccelerationStructureKHR(vk_device, &accel_create_info, nullptr, &r_accel_info->vk_acceleration_structure);
6289
ERR_FAIL_COND_MSG(err, "vkCreateAccelerationStructureKHR failed with error " + itos(err) + ".");
6290
r_accel_info->build_info.dstAccelerationStructure = r_accel_info->vk_acceleration_structure;
6291
#endif
6292
}
6293
6294
void RenderingDeviceDriverVulkan::acceleration_structure_free(AccelerationStructureID p_acceleration_structure) {
6295
#if VULKAN_RAYTRACING_ENABLED
6296
AccelerationStructureInfo *accel_info = (AccelerationStructureInfo *)p_acceleration_structure.id;
6297
ERR_FAIL_NULL_MSG(accel_info, "Acceleration structure input parameter is not valid.");
6298
if (accel_info->instances_buffer) {
6299
buffer_free(accel_info->instances_buffer);
6300
}
6301
if (accel_info->buffer) {
6302
buffer_free(accel_info->buffer);
6303
}
6304
if (accel_info->vk_acceleration_structure) {
6305
vkDestroyAccelerationStructureKHR(vk_device, accel_info->vk_acceleration_structure, nullptr);
6306
}
6307
VersatileResource::free(resources_allocator, accel_info);
6308
#endif
6309
}
6310
6311
uint32_t RenderingDeviceDriverVulkan::acceleration_structure_get_scratch_size_bytes(AccelerationStructureID p_acceleration_structure) {
6312
AccelerationStructureInfo *accel_info = (AccelerationStructureInfo *)p_acceleration_structure.id;
6313
ERR_FAIL_NULL_V_MSG(accel_info, 0, "Acceleration structure input parameter is not valid.");
6314
return accel_info->scratch_size;
6315
}
6316
6317
// ----- COMMANDS -----
6318
6319
void RenderingDeviceDriverVulkan::command_build_acceleration_structure(CommandBufferID p_cmd_buffer, AccelerationStructureID p_acceleration_structure, BufferID p_scratch_buffer) {
6320
#if VULKAN_RAYTRACING_ENABLED
6321
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6322
AccelerationStructureInfo *accel_info = (AccelerationStructureInfo *)p_acceleration_structure.id;
6323
6324
VkAccelerationStructureBuildGeometryInfoKHR *build_info = &accel_info->build_info;
6325
VkDeviceAddress scratch_address = buffer_get_device_address(p_scratch_buffer);
6326
build_info->scratchData.deviceAddress = _align_up_address(scratch_address, accel_info->scratch_alignment);
6327
6328
const VkAccelerationStructureBuildRangeInfoKHR *range_info_ptr = &accel_info->range_info;
6329
6330
vkCmdBuildAccelerationStructuresKHR(command_buffer->vk_command_buffer, 1, build_info, &range_info_ptr);
6331
#endif
6332
}
6333
6334
void RenderingDeviceDriverVulkan::command_bind_raytracing_pipeline(CommandBufferID p_cmd_buffer, RaytracingPipelineID p_pipeline) {
6335
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6336
bound_raytracing_pipeline_id = p_pipeline;
6337
const RaytracingPipelineInfo *rpi = (const RaytracingPipelineInfo *)p_pipeline.id;
6338
vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, rpi->vk_pipeline);
6339
}
6340
6341
void RenderingDeviceDriverVulkan::command_bind_raytracing_uniform_set(CommandBufferID p_cmd_buffer, UniformSetID p_uniform_set, ShaderID p_shader, uint32_t p_set_index) {
6342
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6343
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
6344
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_set.id;
6345
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_RAY_TRACING_KHR, shader_info->vk_pipeline_layout, p_set_index, 1, &usi->vk_descriptor_set, 0, nullptr);
6346
}
6347
6348
void RenderingDeviceDriverVulkan::command_trace_rays(CommandBufferID p_cmd_buffer, uint32_t p_width, uint32_t p_height) {
6349
#if VULKAN_RAYTRACING_ENABLED
6350
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6351
ERR_FAIL_COND_MSG(bound_raytracing_pipeline_id == RaytracingPipelineID(), "A raytracing pipeline must have been bound with `command_bind_raytracing_pipeline()`.");
6352
const RaytracingPipelineInfo *rpi = (const RaytracingPipelineInfo *)bound_raytracing_pipeline_id.id;
6353
vkCmdTraceRaysKHR(command_buffer->vk_command_buffer, &rpi->regions.raygen, &rpi->regions.miss, &rpi->regions.hit, &rpi->regions.call, p_width, p_height, 1);
6354
#endif
6355
}
6356
6357
// --- PIPELINE ---
6358
6359
RDD::RaytracingPipelineID RenderingDeviceDriverVulkan::raytracing_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) {
6360
#if VULKAN_RAYTRACING_ENABLED
6361
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
6362
6363
VkRayTracingPipelineCreateInfoKHR pipeline_create_info = {};
6364
pipeline_create_info.sType = VK_STRUCTURE_TYPE_RAY_TRACING_PIPELINE_CREATE_INFO_KHR;
6365
6366
// Stages.
6367
pipeline_create_info.stageCount = shader_info->vk_stages_create_info.size();
6368
6369
VkPipelineShaderStageCreateInfo *vk_pipeline_stages = ALLOCA_ARRAY(VkPipelineShaderStageCreateInfo, pipeline_create_info.stageCount);
6370
6371
for (uint32_t i = 0; i < pipeline_create_info.stageCount; i++) {
6372
vk_pipeline_stages[i] = shader_info->vk_stages_create_info[i];
6373
6374
if (p_specialization_constants.size()) {
6375
VkSpecializationMapEntry *specialization_map_entries = ALLOCA_ARRAY(VkSpecializationMapEntry, p_specialization_constants.size());
6376
for (uint32_t j = 0; j < p_specialization_constants.size(); j++) {
6377
specialization_map_entries[j] = {};
6378
specialization_map_entries[j].constantID = p_specialization_constants[j].constant_id;
6379
specialization_map_entries[j].offset = (const char *)&p_specialization_constants[j].int_value - (const char *)p_specialization_constants.ptr();
6380
specialization_map_entries[j].size = sizeof(uint32_t);
6381
}
6382
6383
VkSpecializationInfo *specialization_info = ALLOCA_SINGLE(VkSpecializationInfo);
6384
*specialization_info = {};
6385
specialization_info->dataSize = p_specialization_constants.size() * sizeof(PipelineSpecializationConstant);
6386
specialization_info->pData = p_specialization_constants.ptr();
6387
specialization_info->mapEntryCount = p_specialization_constants.size();
6388
specialization_info->pMapEntries = specialization_map_entries;
6389
6390
vk_pipeline_stages[i].pSpecializationInfo = specialization_info;
6391
}
6392
}
6393
6394
// Groups.
6395
pipeline_create_info.groupCount = pipeline_create_info.stageCount;
6396
VkRayTracingShaderGroupCreateInfoKHR *vk_pipeline_groups = ALLOCA_ARRAY(VkRayTracingShaderGroupCreateInfoKHR, pipeline_create_info.groupCount);
6397
for (uint32_t i = 0; i < pipeline_create_info.stageCount; i++) {
6398
vk_pipeline_groups[i] = shader_info->vk_groups_create_info[i];
6399
}
6400
6401
// Pipeline.
6402
pipeline_create_info.layout = shader_info->vk_pipeline_layout;
6403
pipeline_create_info.pStages = vk_pipeline_stages;
6404
pipeline_create_info.pGroups = vk_pipeline_groups;
6405
pipeline_create_info.maxPipelineRayRecursionDepth = 1;
6406
6407
RaytracingPipelineInfo *rpi = VersatileResource::allocate<RaytracingPipelineInfo>(resources_allocator);
6408
6409
VkResult err = vkCreateRayTracingPipelinesKHR(vk_device, VK_NULL_HANDLE, pipelines_cache.vk_cache, 1, &pipeline_create_info, nullptr, &rpi->vk_pipeline);
6410
ERR_FAIL_COND_V_MSG(err, RaytracingPipelineID(), "vkCreateRayTracingPipelinesKHR failed with error " + itos(err) + ".");
6411
6412
RaytracingPipelineID raytracing_pipeline = RaytracingPipelineID(rpi);
6413
err = _raytracing_pipeline_stb_create(raytracing_pipeline, p_shader);
6414
ERR_FAIL_COND_V_MSG(err, RaytracingPipelineID(), "_raytracing_pipeline_stb_create failed with error " + itos(err) + ".");
6415
6416
return raytracing_pipeline;
6417
#else
6418
return RaytracingPipelineID();
6419
#endif
6420
}
6421
6422
VkResult RenderingDeviceDriverVulkan::_raytracing_pipeline_stb_create(RaytracingPipelineID p_pipeline, ShaderID p_shader) {
6423
#if VULKAN_RAYTRACING_ENABLED
6424
RaytracingPipelineInfo *rpi = (RaytracingPipelineInfo *)p_pipeline.id;
6425
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
6426
6427
// Shader group handles.
6428
uint32_t handle_size_aligned = raytracing_capabilities.shader_group_handle_size_aligned;
6429
uint32_t base_alignment = raytracing_capabilities.shader_group_base_alignment;
6430
6431
rpi->regions.raygen.stride = _align_up(handle_size_aligned * shader_info->region_count.raygen_count, base_alignment);
6432
rpi->regions.raygen.size = rpi->regions.raygen.stride; // odd but ok.
6433
6434
rpi->regions.hit.stride = handle_size_aligned;
6435
rpi->regions.hit.size = _align_up(handle_size_aligned * shader_info->region_count.hit_count, base_alignment);
6436
6437
rpi->regions.miss.stride = handle_size_aligned;
6438
rpi->regions.miss.size = _align_up(handle_size_aligned * shader_info->region_count.miss_count, base_alignment);
6439
6440
rpi->regions.call.stride = 0;
6441
rpi->regions.call.size = 0;
6442
6443
// Shader binding table.
6444
uint32_t sbt_size = rpi->regions.raygen.size + rpi->regions.hit.size + rpi->regions.miss.size + rpi->regions.call.size;
6445
rpi->sbt_buffer = buffer_create(sbt_size, BUFFER_USAGE_TRANSFER_FROM_BIT | BUFFER_USAGE_DEVICE_ADDRESS_BIT | BUFFER_USAGE_SHADER_BINDING_TABLE_BIT, MEMORY_ALLOCATION_TYPE_CPU, UINT64_MAX);
6446
6447
// Update regions addresses.
6448
rpi->regions.raygen.deviceAddress = buffer_get_device_address(rpi->sbt_buffer);
6449
rpi->regions.hit.deviceAddress = rpi->regions.raygen.deviceAddress + rpi->regions.raygen.size;
6450
rpi->regions.miss.deviceAddress = rpi->regions.hit.deviceAddress + rpi->regions.hit.size;
6451
rpi->regions.call.deviceAddress = 0;
6452
6453
// Update shader binding table buffer.
6454
uint32_t handle_size = raytracing_capabilities.shader_group_handle_size;
6455
uint32_t handles_size = shader_info->region_count.group_count * handle_size;
6456
LocalVector<uint8_t> handles_data;
6457
handles_data.resize(handles_size);
6458
uint8_t *handles_ptr = handles_data.ptr();
6459
6460
VkResult err = vkGetRayTracingShaderGroupHandlesKHR(vk_device, rpi->vk_pipeline, 0, shader_info->region_count.group_count, handles_size, handles_ptr);
6461
ERR_FAIL_COND_V_MSG(err, err, "vkGetRayTracingShaderGroupHandlesKHR failed with error " + itos(err) + ".");
6462
6463
uint8_t *sbt_ptr = buffer_map(rpi->sbt_buffer);
6464
uint8_t *sbt_data = sbt_ptr;
6465
uint32_t handle_index = 0;
6466
6467
// Raygen.
6468
memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size);
6469
++handle_index;
6470
6471
// Hit.
6472
sbt_data = sbt_ptr + rpi->regions.raygen.size;
6473
for (uint32_t i = 0; i < shader_info->region_count.hit_count; ++i) {
6474
memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size);
6475
sbt_data += rpi->regions.hit.stride;
6476
++handle_index;
6477
}
6478
6479
// Miss.
6480
sbt_data = sbt_ptr + rpi->regions.raygen.size + rpi->regions.hit.size;
6481
for (uint32_t i = 0; i < shader_info->region_count.miss_count; ++i) {
6482
memcpy(sbt_data, handles_ptr + handle_index * handle_size, handle_size);
6483
sbt_data += rpi->regions.miss.stride;
6484
++handle_index;
6485
}
6486
6487
buffer_unmap(rpi->sbt_buffer);
6488
6489
return err;
6490
#else
6491
return VK_ERROR_UNKNOWN;
6492
#endif
6493
}
6494
6495
void RenderingDeviceDriverVulkan::raytracing_pipeline_free(RaytracingPipelineID p_pipeline) {
6496
const RaytracingPipelineInfo *rpi = (const RaytracingPipelineInfo *)p_pipeline.id;
6497
vkDestroyPipeline(vk_device, rpi->vk_pipeline, nullptr);
6498
if (rpi->sbt_buffer) {
6499
buffer_free(rpi->sbt_buffer);
6500
}
6501
VersatileResource::free(resources_allocator, rpi);
6502
}
6503
6504
/*****************/
6505
/**** COMPUTE ****/
6506
/*****************/
6507
6508
// ----- COMMANDS -----
6509
6510
void RenderingDeviceDriverVulkan::command_bind_compute_pipeline(CommandBufferID p_cmd_buffer, PipelineID p_pipeline) {
6511
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6512
vkCmdBindPipeline(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, (VkPipeline)p_pipeline.id);
6513
}
6514
6515
void RenderingDeviceDriverVulkan::command_bind_compute_uniform_sets(CommandBufferID p_cmd_buffer, VectorView<UniformSetID> p_uniform_sets, ShaderID p_shader, uint32_t p_first_set_index, uint32_t p_set_count, uint32_t p_dynamic_offsets) {
6516
if (p_set_count == 0) {
6517
return;
6518
}
6519
6520
thread_local LocalVector<VkDescriptorSet> sets;
6521
sets.clear();
6522
sets.resize(p_set_count);
6523
6524
uint32_t dynamic_offsets[MAX_DYNAMIC_BUFFERS];
6525
uint32_t shift = 0u;
6526
uint32_t curr_dynamic_offset = 0u;
6527
6528
for (uint32_t i = 0; i < p_set_count; i++) {
6529
const UniformSetInfo *usi = (const UniformSetInfo *)p_uniform_sets[i].id;
6530
6531
sets[i] = usi->vk_descriptor_set;
6532
6533
// At this point this assert should already have been validated.
6534
DEV_ASSERT(curr_dynamic_offset + usi->dynamic_buffers.size() <= MAX_DYNAMIC_BUFFERS);
6535
6536
const uint32_t dynamic_offset_count = usi->dynamic_buffers.size();
6537
for (uint32_t j = 0u; j < dynamic_offset_count; ++j) {
6538
const uint32_t frame_idx = (p_dynamic_offsets >> shift) & 0xFu;
6539
shift += 4u;
6540
dynamic_offsets[curr_dynamic_offset++] = uint32_t(frame_idx * usi->dynamic_buffers[j]->size);
6541
}
6542
}
6543
6544
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6545
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
6546
vkCmdBindDescriptorSets(command_buffer->vk_command_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, shader_info->vk_pipeline_layout, p_first_set_index, p_set_count, &sets[0], curr_dynamic_offset, dynamic_offsets);
6547
}
6548
6549
void RenderingDeviceDriverVulkan::command_compute_dispatch(CommandBufferID p_cmd_buffer, uint32_t p_x_groups, uint32_t p_y_groups, uint32_t p_z_groups) {
6550
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6551
vkCmdDispatch(command_buffer->vk_command_buffer, p_x_groups, p_y_groups, p_z_groups);
6552
}
6553
6554
void RenderingDeviceDriverVulkan::command_compute_dispatch_indirect(CommandBufferID p_cmd_buffer, BufferID p_indirect_buffer, uint64_t p_offset) {
6555
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6556
const BufferInfo *buf_info = (const BufferInfo *)p_indirect_buffer.id;
6557
vkCmdDispatchIndirect(command_buffer->vk_command_buffer, buf_info->vk_buffer, p_offset);
6558
}
6559
6560
// ----- PIPELINE -----
6561
6562
RDD::PipelineID RenderingDeviceDriverVulkan::compute_pipeline_create(ShaderID p_shader, VectorView<PipelineSpecializationConstant> p_specialization_constants) {
6563
const ShaderInfo *shader_info = (const ShaderInfo *)p_shader.id;
6564
6565
VkComputePipelineCreateInfo pipeline_create_info = {};
6566
pipeline_create_info.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO;
6567
pipeline_create_info.stage = shader_info->vk_stages_create_info[0];
6568
pipeline_create_info.layout = shader_info->vk_pipeline_layout;
6569
6570
if (p_specialization_constants.size()) {
6571
VkSpecializationMapEntry *specialization_map_entries = ALLOCA_ARRAY(VkSpecializationMapEntry, p_specialization_constants.size());
6572
for (uint32_t i = 0; i < p_specialization_constants.size(); i++) {
6573
specialization_map_entries[i] = {};
6574
specialization_map_entries[i].constantID = p_specialization_constants[i].constant_id;
6575
specialization_map_entries[i].offset = (const char *)&p_specialization_constants[i].int_value - (const char *)p_specialization_constants.ptr();
6576
specialization_map_entries[i].size = sizeof(uint32_t);
6577
}
6578
6579
VkSpecializationInfo *specialization_info = ALLOCA_SINGLE(VkSpecializationInfo);
6580
*specialization_info = {};
6581
specialization_info->dataSize = p_specialization_constants.size() * sizeof(PipelineSpecializationConstant);
6582
specialization_info->pData = p_specialization_constants.ptr();
6583
specialization_info->mapEntryCount = p_specialization_constants.size();
6584
specialization_info->pMapEntries = specialization_map_entries;
6585
6586
pipeline_create_info.stage.pSpecializationInfo = specialization_info;
6587
}
6588
6589
VkPipeline vk_pipeline = VK_NULL_HANDLE;
6590
VkResult err = vkCreateComputePipelines(vk_device, pipelines_cache.vk_cache, 1, &pipeline_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_PIPELINE), &vk_pipeline);
6591
ERR_FAIL_COND_V_MSG(err, PipelineID(), "vkCreateComputePipelines failed with error " + itos(err) + ".");
6592
6593
return PipelineID(vk_pipeline);
6594
}
6595
6596
/*****************/
6597
/**** QUERIES ****/
6598
/*****************/
6599
6600
// ----- TIMESTAMP -----
6601
6602
RDD::QueryPoolID RenderingDeviceDriverVulkan::timestamp_query_pool_create(uint32_t p_query_count) {
6603
VkQueryPoolCreateInfo query_pool_create_info = {};
6604
query_pool_create_info.sType = VK_STRUCTURE_TYPE_QUERY_POOL_CREATE_INFO;
6605
query_pool_create_info.queryType = VK_QUERY_TYPE_TIMESTAMP;
6606
query_pool_create_info.queryCount = p_query_count;
6607
6608
VkQueryPool vk_query_pool = VK_NULL_HANDLE;
6609
vkCreateQueryPool(vk_device, &query_pool_create_info, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_QUERY_POOL), &vk_query_pool);
6610
return RDD::QueryPoolID(vk_query_pool);
6611
}
6612
6613
void RenderingDeviceDriverVulkan::timestamp_query_pool_free(QueryPoolID p_pool_id) {
6614
vkDestroyQueryPool(vk_device, (VkQueryPool)p_pool_id.id, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_QUERY_POOL));
6615
}
6616
6617
void RenderingDeviceDriverVulkan::timestamp_query_pool_get_results(QueryPoolID p_pool_id, uint32_t p_query_count, uint64_t *r_results) {
6618
vkGetQueryPoolResults(vk_device, (VkQueryPool)p_pool_id.id, 0, p_query_count, sizeof(uint64_t) * p_query_count, r_results, sizeof(uint64_t), VK_QUERY_RESULT_64_BIT);
6619
}
6620
6621
uint64_t RenderingDeviceDriverVulkan::timestamp_query_result_to_time(uint64_t p_result) {
6622
// This sucks because timestampPeriod multiplier is a float, while the timestamp is 64 bits nanosecs.
6623
// So, in cases like nvidia which give you enormous numbers and 1 as multiplier, multiplying is next to impossible.
6624
// Need to do 128 bits fixed point multiplication to get the right value.
6625
6626
auto mult64to128 = [](uint64_t u, uint64_t v, uint64_t &h, uint64_t &l) {
6627
uint64_t u1 = (u & 0xffffffff);
6628
uint64_t v1 = (v & 0xffffffff);
6629
uint64_t t = (u1 * v1);
6630
uint64_t w3 = (t & 0xffffffff);
6631
uint64_t k = (t >> 32);
6632
6633
u >>= 32;
6634
t = (u * v1) + k;
6635
k = (t & 0xffffffff);
6636
uint64_t w1 = (t >> 32);
6637
6638
v >>= 32;
6639
t = (u1 * v) + k;
6640
k = (t >> 32);
6641
6642
h = (u * v) + w1 + k;
6643
l = (t << 32) + w3;
6644
};
6645
6646
uint64_t shift_bits = 16;
6647
uint64_t h = 0, l = 0;
6648
mult64to128(p_result, uint64_t(double(physical_device_properties.limits.timestampPeriod) * double(1 << shift_bits)), h, l);
6649
l >>= shift_bits;
6650
l |= h << (64 - shift_bits);
6651
6652
return l;
6653
}
6654
6655
void RenderingDeviceDriverVulkan::command_timestamp_query_pool_reset(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_query_count) {
6656
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6657
vkCmdResetQueryPool(command_buffer->vk_command_buffer, (VkQueryPool)p_pool_id.id, 0, p_query_count);
6658
}
6659
6660
void RenderingDeviceDriverVulkan::command_timestamp_write(CommandBufferID p_cmd_buffer, QueryPoolID p_pool_id, uint32_t p_index) {
6661
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6662
vkCmdWriteTimestamp(command_buffer->vk_command_buffer, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, (VkQueryPool)p_pool_id.id, p_index);
6663
}
6664
6665
/****************/
6666
/**** LABELS ****/
6667
/****************/
6668
6669
void RenderingDeviceDriverVulkan::command_begin_label(CommandBufferID p_cmd_buffer, const char *p_label_name, const Color &p_color) {
6670
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6671
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
6672
if (!functions.CmdBeginDebugUtilsLabelEXT) {
6673
if (functions.CmdDebugMarkerBeginEXT) {
6674
// Debug marker extensions.
6675
VkDebugMarkerMarkerInfoEXT marker;
6676
marker.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
6677
marker.pNext = nullptr;
6678
marker.pMarkerName = p_label_name;
6679
marker.color[0] = p_color[0];
6680
marker.color[1] = p_color[1];
6681
marker.color[2] = p_color[2];
6682
marker.color[3] = p_color[3];
6683
functions.CmdDebugMarkerBeginEXT(command_buffer->vk_command_buffer, &marker);
6684
}
6685
return;
6686
}
6687
VkDebugUtilsLabelEXT label;
6688
label.sType = VK_STRUCTURE_TYPE_DEBUG_UTILS_LABEL_EXT;
6689
label.pNext = nullptr;
6690
label.pLabelName = p_label_name;
6691
label.color[0] = p_color[0];
6692
label.color[1] = p_color[1];
6693
label.color[2] = p_color[2];
6694
label.color[3] = p_color[3];
6695
functions.CmdBeginDebugUtilsLabelEXT(command_buffer->vk_command_buffer, &label);
6696
}
6697
6698
void RenderingDeviceDriverVulkan::command_end_label(CommandBufferID p_cmd_buffer) {
6699
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6700
const RenderingContextDriverVulkan::Functions &functions = context_driver->functions_get();
6701
if (!functions.CmdEndDebugUtilsLabelEXT) {
6702
if (functions.CmdDebugMarkerEndEXT) {
6703
// Debug marker extensions.
6704
functions.CmdDebugMarkerEndEXT(command_buffer->vk_command_buffer);
6705
}
6706
return;
6707
}
6708
functions.CmdEndDebugUtilsLabelEXT(command_buffer->vk_command_buffer);
6709
}
6710
6711
/****************/
6712
/**** DEBUG *****/
6713
/****************/
6714
void RenderingDeviceDriverVulkan::command_insert_breadcrumb(CommandBufferID p_cmd_buffer, uint32_t p_data) {
6715
#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
6716
if (p_data == BreadcrumbMarker::NONE) {
6717
return;
6718
}
6719
6720
const CommandBufferInfo *command_buffer = (const CommandBufferInfo *)p_cmd_buffer.id;
6721
if (Engine::get_singleton()->is_accurate_breadcrumbs_enabled()) {
6722
// Force a full barrier so commands are not executed in parallel.
6723
// This will mean that the last breadcrumb to see was actually the
6724
// last (group of) command to be executed (hence, the one causing the crash).
6725
VkMemoryBarrier memoryBarrier;
6726
memoryBarrier.sType = VK_STRUCTURE_TYPE_MEMORY_BARRIER;
6727
memoryBarrier.pNext = nullptr;
6728
memoryBarrier.srcAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
6729
VK_ACCESS_INDEX_READ_BIT |
6730
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
6731
VK_ACCESS_UNIFORM_READ_BIT |
6732
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
6733
VK_ACCESS_SHADER_READ_BIT |
6734
VK_ACCESS_SHADER_WRITE_BIT |
6735
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
6736
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
6737
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
6738
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
6739
VK_ACCESS_TRANSFER_READ_BIT |
6740
VK_ACCESS_TRANSFER_WRITE_BIT |
6741
VK_ACCESS_HOST_READ_BIT |
6742
VK_ACCESS_HOST_WRITE_BIT;
6743
memoryBarrier.dstAccessMask = VK_ACCESS_INDIRECT_COMMAND_READ_BIT |
6744
VK_ACCESS_INDEX_READ_BIT |
6745
VK_ACCESS_VERTEX_ATTRIBUTE_READ_BIT |
6746
VK_ACCESS_UNIFORM_READ_BIT |
6747
VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |
6748
VK_ACCESS_SHADER_READ_BIT |
6749
VK_ACCESS_SHADER_WRITE_BIT |
6750
VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
6751
VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |
6752
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |
6753
VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT |
6754
VK_ACCESS_TRANSFER_READ_BIT |
6755
VK_ACCESS_TRANSFER_WRITE_BIT |
6756
VK_ACCESS_HOST_READ_BIT |
6757
VK_ACCESS_HOST_WRITE_BIT;
6758
6759
vkCmdPipelineBarrier(
6760
command_buffer->vk_command_buffer,
6761
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
6762
VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,
6763
0, 1u, &memoryBarrier, 0u, nullptr, 0u, nullptr);
6764
}
6765
6766
// We write to a circular buffer. If you're getting barrier sync errors here,
6767
// increase the value of BREADCRUMB_BUFFER_ENTRIES.
6768
vkCmdFillBuffer(command_buffer->vk_command_buffer, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset, sizeof(uint32_t), breadcrumb_id++);
6769
vkCmdFillBuffer(command_buffer->vk_command_buffer, ((BufferInfo *)breadcrumb_buffer.id)->vk_buffer, breadcrumb_offset + sizeof(uint32_t), sizeof(uint32_t), p_data);
6770
breadcrumb_offset += sizeof(uint32_t) * 2u;
6771
if (breadcrumb_offset >= BREADCRUMB_BUFFER_ENTRIES * sizeof(uint32_t) * 2u) {
6772
breadcrumb_offset = 0u;
6773
}
6774
#endif
6775
}
6776
6777
void RenderingDeviceDriverVulkan::on_device_lost() const {
6778
if (device_functions.GetDeviceFaultInfoEXT == nullptr) {
6779
_err_print_error(FUNCTION_STR, __FILE__, __LINE__, "VK_EXT_device_fault not available.");
6780
return;
6781
}
6782
6783
VkDeviceFaultCountsEXT fault_counts = {};
6784
fault_counts.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_COUNTS_EXT;
6785
VkResult vkres = device_functions.GetDeviceFaultInfoEXT(vk_device, &fault_counts, nullptr);
6786
6787
if (vkres != VK_SUCCESS) {
6788
_err_print_error(FUNCTION_STR, __FILE__, __LINE__, "vkGetDeviceFaultInfoEXT returned " + itos(vkres) + " when getting fault count, skipping VK_EXT_device_fault report...");
6789
return;
6790
}
6791
6792
String err_msg;
6793
VkDeviceFaultInfoEXT fault_info = {};
6794
fault_info.sType = VK_STRUCTURE_TYPE_DEVICE_FAULT_INFO_EXT;
6795
fault_info.pVendorInfos = fault_counts.vendorInfoCount
6796
? (VkDeviceFaultVendorInfoEXT *)memalloc(fault_counts.vendorInfoCount * sizeof(VkDeviceFaultVendorInfoEXT))
6797
: nullptr;
6798
fault_info.pAddressInfos =
6799
fault_counts.addressInfoCount
6800
? (VkDeviceFaultAddressInfoEXT *)memalloc(fault_counts.addressInfoCount * sizeof(VkDeviceFaultAddressInfoEXT))
6801
: nullptr;
6802
fault_counts.vendorBinarySize = 0;
6803
vkres = device_functions.GetDeviceFaultInfoEXT(vk_device, &fault_counts, &fault_info);
6804
if (vkres != VK_SUCCESS) {
6805
_err_print_error(FUNCTION_STR, __FILE__, __LINE__, "vkGetDeviceFaultInfoEXT returned " + itos(vkres) + " when getting fault info, skipping VK_EXT_device_fault report...");
6806
} else {
6807
err_msg += "** Report from VK_EXT_device_fault **";
6808
err_msg += "\nDescription: " + String(fault_info.description);
6809
err_msg += "\nVendor infos:";
6810
for (uint32_t vd = 0; vd < fault_counts.vendorInfoCount; ++vd) {
6811
const VkDeviceFaultVendorInfoEXT *vendor_info = &fault_info.pVendorInfos[vd];
6812
err_msg += "\nInfo " + itos(vd);
6813
err_msg += "\n Description: " + String(vendor_info->description);
6814
err_msg += "\n Fault code : " + itos(vendor_info->vendorFaultCode);
6815
err_msg += "\n Fault data : " + itos(vendor_info->vendorFaultData);
6816
}
6817
6818
static constexpr const char *addressTypeNames[] = {
6819
"NONE",
6820
"READ_INVALID",
6821
"WRITE_INVALID",
6822
"EXECUTE_INVALID",
6823
"INSTRUCTION_POINTER_UNKNOWN",
6824
"INSTRUCTION_POINTER_INVALID",
6825
"INSTRUCTION_POINTER_FAULT",
6826
};
6827
err_msg += "\nAddresses info:";
6828
for (uint32_t ad = 0; ad < fault_counts.addressInfoCount; ++ad) {
6829
const VkDeviceFaultAddressInfoEXT *addr_info = &fault_info.pAddressInfos[ad];
6830
// From https://registry.khronos.org/vulkan/specs/1.3-extensions/man/html/VkDeviceFaultAddressInfoEXT.html
6831
const VkDeviceAddress lower = (addr_info->reportedAddress & ~(addr_info->addressPrecision - 1));
6832
const VkDeviceAddress upper = (addr_info->reportedAddress | (addr_info->addressPrecision - 1));
6833
err_msg += "\nInfo " + itos(ad);
6834
err_msg += "\n Type : " + String(addressTypeNames[addr_info->addressType]);
6835
err_msg += "\n Reported address: " + itos(addr_info->reportedAddress);
6836
err_msg += "\n Lower address : " + itos(lower);
6837
err_msg += "\n Upper address : " + itos(upper);
6838
err_msg += "\n Precision : " + itos(addr_info->addressPrecision);
6839
}
6840
}
6841
6842
_err_print_error(FUNCTION_STR, __FILE__, __LINE__, err_msg);
6843
6844
if (fault_info.pVendorInfos) {
6845
memfree(fault_info.pVendorInfos);
6846
}
6847
if (fault_info.pAddressInfos) {
6848
memfree(fault_info.pAddressInfos);
6849
}
6850
6851
_err_print_error(FUNCTION_STR, __FILE__, __LINE__, context_driver->get_driver_and_device_memory_report());
6852
}
6853
6854
void RenderingDeviceDriverVulkan::print_lost_device_info() {
6855
#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
6856
{
6857
String error_msg = "Printing last known breadcrumbs in reverse order (last executed first).";
6858
if (!Engine::get_singleton()->is_accurate_breadcrumbs_enabled()) {
6859
error_msg += "\nSome of them might be inaccurate. Try running with --accurate-breadcrumbs for precise information.";
6860
}
6861
_err_print_error(FUNCTION_STR, __FILE__, __LINE__, error_msg);
6862
}
6863
6864
uint8_t *breadcrumb_ptr = nullptr;
6865
VkResult map_result = VK_SUCCESS;
6866
6867
vmaFlushAllocation(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle, 0, BREADCRUMB_BUFFER_ENTRIES * sizeof(uint32_t) * 2u);
6868
vmaInvalidateAllocation(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle, 0, BREADCRUMB_BUFFER_ENTRIES * sizeof(uint32_t) * 2u);
6869
{
6870
void *ptr = nullptr;
6871
map_result = vmaMapMemory(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle, &ptr);
6872
breadcrumb_ptr = reinterpret_cast<uint8_t *>(ptr);
6873
}
6874
6875
if (breadcrumb_ptr && map_result == VK_SUCCESS) {
6876
uint32_t last_breadcrumb_offset = 0;
6877
{
6878
_err_print_error_asap("Searching last breadcrumb. We've sent up to ID: " + itos(breadcrumb_id - 1u));
6879
6880
// Scan the whole buffer to find the offset with the highest ID.
6881
// That means that was the last one to be written.
6882
//
6883
// We use "breadcrumb_id - id" to account for wraparound.
6884
// e.g. breadcrumb_id = 2 and id = 4294967294; then 2 - 4294967294 = 4.
6885
// The one with the smallest difference is the closest to breadcrumb_id, which means it's
6886
// the last written command.
6887
uint32_t biggest_id = 0u;
6888
uint32_t smallest_id_diff = std::numeric_limits<uint32_t>::max();
6889
const uint32_t *breadcrumb_ptr32 = reinterpret_cast<const uint32_t *>(breadcrumb_ptr);
6890
for (size_t i = 0u; i < BREADCRUMB_BUFFER_ENTRIES; ++i) {
6891
const uint32_t id = breadcrumb_ptr32[i * 2u];
6892
const uint32_t id_diff = breadcrumb_id - id;
6893
if (id_diff < smallest_id_diff) {
6894
biggest_id = i;
6895
smallest_id_diff = id_diff;
6896
}
6897
}
6898
6899
_err_print_error_asap("Last breadcrumb ID found: " + itos(breadcrumb_ptr32[biggest_id * 2u]));
6900
6901
last_breadcrumb_offset = biggest_id * sizeof(uint32_t) * 2u;
6902
}
6903
6904
const size_t entries_to_print = 8u; // Note: The value is arbitrary.
6905
for (size_t i = 0u; i < entries_to_print; ++i) {
6906
const uint32_t last_breadcrumb = *reinterpret_cast<uint32_t *>(breadcrumb_ptr + last_breadcrumb_offset + sizeof(uint32_t));
6907
const uint32_t phase = last_breadcrumb & uint32_t(~((1 << 16) - 1));
6908
const uint32_t user_data = last_breadcrumb & ((1 << 16) - 1);
6909
String error_msg = "Last known breadcrumb: ";
6910
6911
switch (phase) {
6912
case BreadcrumbMarker::ALPHA_PASS:
6913
error_msg += "ALPHA_PASS";
6914
break;
6915
case BreadcrumbMarker::BLIT_PASS:
6916
error_msg += "BLIT_PASS";
6917
break;
6918
case BreadcrumbMarker::DEBUG_PASS:
6919
error_msg += "DEBUG_PASS";
6920
break;
6921
case BreadcrumbMarker::LIGHTMAPPER_PASS:
6922
error_msg += "LIGHTMAPPER_PASS";
6923
break;
6924
case BreadcrumbMarker::OPAQUE_PASS:
6925
error_msg += "OPAQUE_PASS";
6926
break;
6927
case BreadcrumbMarker::POST_PROCESSING_PASS:
6928
error_msg += "POST_PROCESSING_PASS";
6929
break;
6930
case BreadcrumbMarker::REFLECTION_PROBES:
6931
error_msg += "REFLECTION_PROBES";
6932
break;
6933
case BreadcrumbMarker::SHADOW_PASS_CUBE:
6934
error_msg += "SHADOW_PASS_CUBE";
6935
break;
6936
case BreadcrumbMarker::SHADOW_PASS_DIRECTIONAL:
6937
error_msg += "SHADOW_PASS_DIRECTIONAL";
6938
break;
6939
case BreadcrumbMarker::SKY_PASS:
6940
error_msg += "SKY_PASS";
6941
break;
6942
case BreadcrumbMarker::TRANSPARENT_PASS:
6943
error_msg += "TRANSPARENT_PASS";
6944
break;
6945
case BreadcrumbMarker::UI_PASS:
6946
error_msg += "UI_PASS";
6947
break;
6948
default:
6949
error_msg += "UNKNOWN_BREADCRUMB(" + itos((uint32_t)phase) + ')';
6950
break;
6951
}
6952
6953
if (user_data != 0) {
6954
error_msg += " | User data: " + itos(user_data);
6955
}
6956
6957
_err_print_error_asap(error_msg);
6958
6959
if (last_breadcrumb_offset == 0u) {
6960
// Decrement last_breadcrumb_idx, wrapping underflow.
6961
last_breadcrumb_offset = BREADCRUMB_BUFFER_ENTRIES * sizeof(uint32_t) * 2u;
6962
}
6963
last_breadcrumb_offset -= sizeof(uint32_t) * 2u;
6964
}
6965
6966
vmaUnmapMemory(allocator, ((BufferInfo *)breadcrumb_buffer.id)->allocation.handle);
6967
breadcrumb_ptr = nullptr;
6968
} else {
6969
_err_print_error(FUNCTION_STR, __FILE__, __LINE__, "Couldn't map breadcrumb buffer. VkResult = " + itos(map_result));
6970
}
6971
#endif
6972
on_device_lost();
6973
}
6974
6975
inline String RenderingDeviceDriverVulkan::get_vulkan_result(VkResult err) {
6976
#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
6977
if (err == VK_ERROR_OUT_OF_HOST_MEMORY) {
6978
return "VK_ERROR_OUT_OF_HOST_MEMORY";
6979
} else if (err == VK_ERROR_OUT_OF_DEVICE_MEMORY) {
6980
return "VK_ERROR_OUT_OF_DEVICE_MEMORY";
6981
} else if (err == VK_ERROR_DEVICE_LOST) {
6982
return "VK_ERROR_DEVICE_LOST";
6983
} else if (err == VK_ERROR_SURFACE_LOST_KHR) {
6984
return "VK_ERROR_SURFACE_LOST_KHR";
6985
} else if (err == VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT) {
6986
return "VK_ERROR_FULL_SCREEN_EXCLUSIVE_MODE_LOST_EXT";
6987
}
6988
#endif
6989
return itos(err);
6990
}
6991
6992
/********************/
6993
/**** SUBMISSION ****/
6994
/********************/
6995
6996
void RenderingDeviceDriverVulkan::begin_segment(uint32_t p_frame_index, uint32_t p_frames_drawn) {
6997
// Per-frame segments are not required in Vulkan.
6998
}
6999
7000
void RenderingDeviceDriverVulkan::end_segment() {
7001
// Per-frame segments are not required in Vulkan.
7002
}
7003
7004
/**************/
7005
/**** MISC ****/
7006
/**************/
7007
7008
void RenderingDeviceDriverVulkan::set_object_name(ObjectType p_type, ID p_driver_id, const String &p_name) {
7009
switch (p_type) {
7010
case OBJECT_TYPE_TEXTURE: {
7011
const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id;
7012
if (tex_info->allocation.handle) {
7013
_set_object_name(VK_OBJECT_TYPE_IMAGE, (uint64_t)tex_info->vk_view_create_info.image, p_name);
7014
}
7015
_set_object_name(VK_OBJECT_TYPE_IMAGE_VIEW, (uint64_t)tex_info->vk_view, p_name + " View");
7016
} break;
7017
case OBJECT_TYPE_SAMPLER: {
7018
_set_object_name(VK_OBJECT_TYPE_SAMPLER, p_driver_id.id, p_name);
7019
} break;
7020
case OBJECT_TYPE_BUFFER: {
7021
const BufferInfo *buf_info = (const BufferInfo *)p_driver_id.id;
7022
_set_object_name(VK_OBJECT_TYPE_BUFFER, (uint64_t)buf_info->vk_buffer, p_name);
7023
if (buf_info->vk_view) {
7024
_set_object_name(VK_OBJECT_TYPE_BUFFER_VIEW, (uint64_t)buf_info->vk_view, p_name + " View");
7025
}
7026
} break;
7027
case OBJECT_TYPE_SHADER: {
7028
const ShaderInfo *shader_info = (const ShaderInfo *)p_driver_id.id;
7029
for (uint32_t i = 0; i < shader_info->vk_descriptor_set_layouts.size(); i++) {
7030
_set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET_LAYOUT, (uint64_t)shader_info->vk_descriptor_set_layouts[i], p_name);
7031
}
7032
_set_object_name(VK_OBJECT_TYPE_PIPELINE_LAYOUT, (uint64_t)shader_info->vk_pipeline_layout, p_name + " Pipeline Layout");
7033
} break;
7034
case OBJECT_TYPE_UNIFORM_SET: {
7035
const UniformSetInfo *usi = (const UniformSetInfo *)p_driver_id.id;
7036
_set_object_name(VK_OBJECT_TYPE_DESCRIPTOR_SET, (uint64_t)usi->vk_descriptor_set, p_name);
7037
} break;
7038
case OBJECT_TYPE_PIPELINE: {
7039
_set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)p_driver_id.id, p_name);
7040
} break;
7041
case OBJECT_TYPE_ACCELERATION_STRUCTURE: {
7042
const AccelerationStructureInfo *asi = (const AccelerationStructureInfo *)p_driver_id.id;
7043
_set_object_name(VK_OBJECT_TYPE_ACCELERATION_STRUCTURE_KHR, (uint64_t)asi->vk_acceleration_structure, p_name);
7044
} break;
7045
case OBJECT_TYPE_RAYTRACING_PIPELINE: {
7046
const RaytracingPipelineInfo *rpi = (const RaytracingPipelineInfo *)p_driver_id.id;
7047
_set_object_name(VK_OBJECT_TYPE_PIPELINE, (uint64_t)rpi->vk_pipeline, p_name);
7048
} break;
7049
default: {
7050
DEV_ASSERT(false);
7051
}
7052
}
7053
}
7054
7055
uint64_t RenderingDeviceDriverVulkan::get_resource_native_handle(DriverResource p_type, ID p_driver_id) {
7056
switch (p_type) {
7057
case DRIVER_RESOURCE_LOGICAL_DEVICE: {
7058
return (uint64_t)vk_device;
7059
}
7060
case DRIVER_RESOURCE_PHYSICAL_DEVICE: {
7061
return (uint64_t)physical_device;
7062
}
7063
case DRIVER_RESOURCE_TOPMOST_OBJECT: {
7064
return (uint64_t)context_driver->instance_get();
7065
}
7066
case DRIVER_RESOURCE_COMMAND_QUEUE: {
7067
const CommandQueue *queue_info = (const CommandQueue *)p_driver_id.id;
7068
return (uint64_t)queue_families[queue_info->queue_family][queue_info->queue_index].queue;
7069
}
7070
case DRIVER_RESOURCE_QUEUE_FAMILY: {
7071
return uint32_t(p_driver_id.id) - 1;
7072
}
7073
case DRIVER_RESOURCE_TEXTURE: {
7074
const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id;
7075
return (uint64_t)tex_info->vk_view_create_info.image;
7076
}
7077
case DRIVER_RESOURCE_TEXTURE_VIEW: {
7078
const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id;
7079
return (uint64_t)tex_info->vk_view;
7080
}
7081
case DRIVER_RESOURCE_TEXTURE_DATA_FORMAT: {
7082
const TextureInfo *tex_info = (const TextureInfo *)p_driver_id.id;
7083
return (uint64_t)tex_info->vk_view_create_info.format;
7084
}
7085
case DRIVER_RESOURCE_SAMPLER:
7086
case DRIVER_RESOURCE_UNIFORM_SET:
7087
case DRIVER_RESOURCE_BUFFER:
7088
case DRIVER_RESOURCE_COMPUTE_PIPELINE:
7089
case DRIVER_RESOURCE_RENDER_PIPELINE: {
7090
return p_driver_id.id;
7091
}
7092
default: {
7093
return 0;
7094
}
7095
}
7096
}
7097
7098
uint64_t RenderingDeviceDriverVulkan::get_total_memory_used() {
7099
VmaTotalStatistics stats = {};
7100
vmaCalculateStatistics(allocator, &stats);
7101
return stats.total.statistics.allocationBytes;
7102
}
7103
7104
uint64_t RenderingDeviceDriverVulkan::get_lazily_memory_used() {
7105
return vmaCalculateLazilyAllocatedBytes(allocator);
7106
}
7107
7108
uint64_t RenderingDeviceDriverVulkan::limit_get(Limit p_limit) {
7109
const VkPhysicalDeviceLimits &limits = physical_device_properties.limits;
7110
uint64_t safe_unbounded = ((uint64_t)1 << 30);
7111
switch (p_limit) {
7112
case LIMIT_MAX_BOUND_UNIFORM_SETS:
7113
return limits.maxBoundDescriptorSets;
7114
case LIMIT_MAX_FRAMEBUFFER_COLOR_ATTACHMENTS:
7115
return limits.maxColorAttachments;
7116
case LIMIT_MAX_TEXTURES_PER_UNIFORM_SET:
7117
return limits.maxDescriptorSetSampledImages;
7118
case LIMIT_MAX_SAMPLERS_PER_UNIFORM_SET:
7119
return limits.maxDescriptorSetSamplers;
7120
case LIMIT_MAX_STORAGE_BUFFERS_PER_UNIFORM_SET:
7121
return limits.maxDescriptorSetStorageBuffers;
7122
case LIMIT_MAX_STORAGE_IMAGES_PER_UNIFORM_SET:
7123
return limits.maxDescriptorSetStorageImages;
7124
case LIMIT_MAX_UNIFORM_BUFFERS_PER_UNIFORM_SET:
7125
return limits.maxDescriptorSetUniformBuffers;
7126
case LIMIT_MAX_DRAW_INDEXED_INDEX:
7127
return limits.maxDrawIndexedIndexValue;
7128
case LIMIT_MAX_FRAMEBUFFER_HEIGHT:
7129
return limits.maxFramebufferHeight;
7130
case LIMIT_MAX_FRAMEBUFFER_WIDTH:
7131
return limits.maxFramebufferWidth;
7132
case LIMIT_MAX_TEXTURE_ARRAY_LAYERS:
7133
return limits.maxImageArrayLayers;
7134
case LIMIT_MAX_TEXTURE_SIZE_1D:
7135
return limits.maxImageDimension1D;
7136
case LIMIT_MAX_TEXTURE_SIZE_2D:
7137
return limits.maxImageDimension2D;
7138
case LIMIT_MAX_TEXTURE_SIZE_3D:
7139
return limits.maxImageDimension3D;
7140
case LIMIT_MAX_TEXTURE_SIZE_CUBE:
7141
return limits.maxImageDimensionCube;
7142
case LIMIT_MAX_TEXTURES_PER_SHADER_STAGE:
7143
return limits.maxPerStageDescriptorSampledImages;
7144
case LIMIT_MAX_SAMPLERS_PER_SHADER_STAGE:
7145
return limits.maxPerStageDescriptorSamplers;
7146
case LIMIT_MAX_STORAGE_BUFFERS_PER_SHADER_STAGE:
7147
return limits.maxPerStageDescriptorStorageBuffers;
7148
case LIMIT_MAX_STORAGE_IMAGES_PER_SHADER_STAGE:
7149
return limits.maxPerStageDescriptorStorageImages;
7150
case LIMIT_MAX_UNIFORM_BUFFERS_PER_SHADER_STAGE:
7151
return limits.maxPerStageDescriptorUniformBuffers;
7152
case LIMIT_MAX_PUSH_CONSTANT_SIZE:
7153
return limits.maxPushConstantsSize;
7154
case LIMIT_MAX_UNIFORM_BUFFER_SIZE:
7155
return limits.maxUniformBufferRange;
7156
case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTE_OFFSET:
7157
return limits.maxVertexInputAttributeOffset;
7158
case LIMIT_MAX_VERTEX_INPUT_ATTRIBUTES:
7159
return limits.maxVertexInputAttributes;
7160
case LIMIT_MAX_VERTEX_INPUT_BINDINGS:
7161
return limits.maxVertexInputBindings;
7162
case LIMIT_MAX_VERTEX_INPUT_BINDING_STRIDE:
7163
return limits.maxVertexInputBindingStride;
7164
case LIMIT_MIN_UNIFORM_BUFFER_OFFSET_ALIGNMENT:
7165
return limits.minUniformBufferOffsetAlignment;
7166
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_X:
7167
return limits.maxComputeWorkGroupCount[0];
7168
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Y:
7169
return limits.maxComputeWorkGroupCount[1];
7170
case LIMIT_MAX_COMPUTE_WORKGROUP_COUNT_Z:
7171
return limits.maxComputeWorkGroupCount[2];
7172
case LIMIT_MAX_COMPUTE_WORKGROUP_INVOCATIONS:
7173
return limits.maxComputeWorkGroupInvocations;
7174
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_X:
7175
return limits.maxComputeWorkGroupSize[0];
7176
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Y:
7177
return limits.maxComputeWorkGroupSize[1];
7178
case LIMIT_MAX_COMPUTE_WORKGROUP_SIZE_Z:
7179
return limits.maxComputeWorkGroupSize[2];
7180
case LIMIT_MAX_COMPUTE_SHARED_MEMORY_SIZE:
7181
return limits.maxComputeSharedMemorySize;
7182
case LIMIT_MAX_VIEWPORT_DIMENSIONS_X:
7183
return limits.maxViewportDimensions[0];
7184
case LIMIT_MAX_VIEWPORT_DIMENSIONS_Y:
7185
return limits.maxViewportDimensions[1];
7186
case LIMIT_SUBGROUP_SIZE:
7187
return subgroup_capabilities.size;
7188
case LIMIT_SUBGROUP_MIN_SIZE:
7189
return subgroup_capabilities.min_size;
7190
case LIMIT_SUBGROUP_MAX_SIZE:
7191
return subgroup_capabilities.max_size;
7192
case LIMIT_SUBGROUP_IN_SHADERS:
7193
return subgroup_capabilities.supported_stages_flags_rd();
7194
case LIMIT_SUBGROUP_OPERATIONS:
7195
return subgroup_capabilities.supported_operations_flags_rd();
7196
case LIMIT_MAX_SHADER_VARYINGS:
7197
// The Vulkan spec states that built in varyings like gl_FragCoord should count against this, but in
7198
// practice, that doesn't seem to be the case. The validation layers don't even complain.
7199
return MIN(limits.maxVertexOutputComponents / 4, limits.maxFragmentInputComponents / 4);
7200
default: {
7201
#ifdef DEV_ENABLED
7202
WARN_PRINT("Returning maximum value for unknown limit " + itos(p_limit) + ".");
7203
#endif
7204
return safe_unbounded;
7205
}
7206
}
7207
}
7208
7209
uint64_t RenderingDeviceDriverVulkan::api_trait_get(ApiTrait p_trait) {
7210
switch (p_trait) {
7211
case API_TRAIT_TEXTURE_TRANSFER_ALIGNMENT:
7212
return (uint64_t)MAX((uint64_t)16, physical_device_properties.limits.optimalBufferCopyOffsetAlignment);
7213
case API_TRAIT_SHADER_CHANGE_INVALIDATION:
7214
return (uint64_t)SHADER_CHANGE_INVALIDATION_INCOMPATIBLE_SETS_PLUS_CASCADE;
7215
default:
7216
return RenderingDeviceDriver::api_trait_get(p_trait);
7217
}
7218
}
7219
7220
bool RenderingDeviceDriverVulkan::has_feature(Features p_feature) {
7221
switch (p_feature) {
7222
case SUPPORTS_HALF_FLOAT:
7223
return shader_capabilities.shader_float16_is_supported && physical_device_features.shaderInt16 && storage_buffer_capabilities.storage_buffer_16_bit_access_is_supported;
7224
case SUPPORTS_FRAGMENT_SHADER_WITH_ONLY_SIDE_EFFECTS:
7225
return true;
7226
case SUPPORTS_BUFFER_DEVICE_ADDRESS:
7227
return buffer_device_address_support;
7228
case SUPPORTS_IMAGE_ATOMIC_32_BIT:
7229
#if (defined(MACOS_ENABLED) || defined(APPLE_EMBEDDED_ENABLED))
7230
// MoltenVK has previously had issues with 32-bit atomics on images.
7231
return false;
7232
#else
7233
return true;
7234
#endif
7235
case SUPPORTS_VULKAN_MEMORY_MODEL:
7236
return vulkan_memory_model_support && vulkan_memory_model_device_scope_support;
7237
case SUPPORTS_FRAMEBUFFER_DEPTH_RESOLVE:
7238
return framebuffer_depth_resolve;
7239
case SUPPORTS_POINT_SIZE:
7240
return true;
7241
case SUPPORTS_RAY_QUERY:
7242
return acceleration_structure_capabilities.acceleration_structure_support && ray_query_support;
7243
case SUPPORTS_RAYTRACING_PIPELINE:
7244
return acceleration_structure_capabilities.acceleration_structure_support && raytracing_capabilities.raytracing_pipeline_support;
7245
default:
7246
return false;
7247
}
7248
}
7249
7250
const RDD::MultiviewCapabilities &RenderingDeviceDriverVulkan::get_multiview_capabilities() {
7251
return multiview_capabilities;
7252
}
7253
7254
const RDD::FragmentShadingRateCapabilities &RenderingDeviceDriverVulkan::get_fragment_shading_rate_capabilities() {
7255
return fsr_capabilities;
7256
}
7257
7258
const RDD::FragmentDensityMapCapabilities &RenderingDeviceDriverVulkan::get_fragment_density_map_capabilities() {
7259
return fdm_capabilities;
7260
}
7261
7262
String RenderingDeviceDriverVulkan::get_api_name() const {
7263
return "Vulkan";
7264
}
7265
7266
String RenderingDeviceDriverVulkan::get_api_version() const {
7267
uint32_t api_version = physical_device_properties.apiVersion;
7268
return vformat("%d.%d.%d", VK_API_VERSION_MAJOR(api_version), VK_API_VERSION_MINOR(api_version), VK_API_VERSION_PATCH(api_version));
7269
}
7270
7271
String RenderingDeviceDriverVulkan::get_pipeline_cache_uuid() const {
7272
return pipeline_cache_id;
7273
}
7274
7275
const RDD::Capabilities &RenderingDeviceDriverVulkan::get_capabilities() const {
7276
return device_capabilities;
7277
}
7278
7279
const RenderingShaderContainerFormat &RenderingDeviceDriverVulkan::get_shader_container_format() const {
7280
return shader_container_format;
7281
}
7282
7283
bool RenderingDeviceDriverVulkan::is_composite_alpha_supported(CommandQueueID p_queue) const {
7284
if (has_comp_alpha.has((uint64_t)p_queue.id)) {
7285
return has_comp_alpha[(uint64_t)p_queue.id];
7286
}
7287
return false;
7288
}
7289
7290
/******************/
7291
7292
RenderingDeviceDriverVulkan::RenderingDeviceDriverVulkan(RenderingContextDriverVulkan *p_context_driver) {
7293
DEV_ASSERT(p_context_driver != nullptr);
7294
7295
context_driver = p_context_driver;
7296
max_descriptor_sets_per_pool = GLOBAL_GET("rendering/rendering_device/vulkan/max_descriptors_per_pool");
7297
}
7298
7299
RenderingDeviceDriverVulkan::~RenderingDeviceDriverVulkan() {
7300
#if defined(DEBUG_ENABLED) || defined(DEV_ENABLED)
7301
if (breadcrumb_buffer != BufferID()) {
7302
buffer_free(breadcrumb_buffer);
7303
}
7304
#endif
7305
7306
while (small_allocs_pools.size()) {
7307
HashMap<uint32_t, VmaPool>::Iterator E = small_allocs_pools.begin();
7308
vmaDestroyPool(allocator, E->value);
7309
small_allocs_pools.remove(E);
7310
}
7311
vmaDestroyAllocator(allocator);
7312
7313
// Destroy linearly allocated descriptor pools.
7314
for (KeyValue<int, DescriptorSetPools> &pool_map : linear_descriptor_set_pools) {
7315
for (KeyValue<DescriptorSetPoolKey, HashMap<VkDescriptorPool, uint32_t>> pools : pool_map.value) {
7316
for (KeyValue<VkDescriptorPool, uint32_t> descriptor_pool : pools.value) {
7317
vkDestroyDescriptorPool(vk_device, descriptor_pool.key, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DESCRIPTOR_POOL));
7318
}
7319
}
7320
}
7321
7322
if (vk_device != VK_NULL_HANDLE) {
7323
vkDestroyDevice(vk_device, VKC::get_allocation_callbacks(VK_OBJECT_TYPE_DEVICE));
7324
}
7325
}
7326
7327