Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/vulkan/radv_device.c
7137 views
1
/*
2
* Copyright © 2016 Red Hat.
3
* Copyright © 2016 Bas Nieuwenhuizen
4
*
5
* based in part on anv driver which is:
6
* Copyright © 2015 Intel Corporation
7
*
8
* Permission is hereby granted, free of charge, to any person obtaining a
9
* copy of this software and associated documentation files (the "Software"),
10
* to deal in the Software without restriction, including without limitation
11
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
12
* and/or sell copies of the Software, and to permit persons to whom the
13
* Software is furnished to do so, subject to the following conditions:
14
*
15
* The above copyright notice and this permission notice (including the next
16
* paragraph) shall be included in all copies or substantial portions of the
17
* Software.
18
*
19
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25
* IN THE SOFTWARE.
26
*/
27
28
#include <fcntl.h>
29
#include <stdbool.h>
30
#include <string.h>
31
32
#ifdef __FreeBSD__
33
#include <sys/types.h>
34
#elif !defined(_WIN32)
35
#include <sys/sysmacros.h>
36
#endif
37
38
#include "util/debug.h"
39
#include "util/disk_cache.h"
40
#include "radv_cs.h"
41
#include "radv_debug.h"
42
#include "radv_private.h"
43
#include "radv_shader.h"
44
#include "vk_util.h"
45
#ifdef _WIN32
46
typedef void *drmDevicePtr;
47
#include <io.h>
48
#else
49
#include <amdgpu.h>
50
#include <xf86drm.h>
51
#include "drm-uapi/amdgpu_drm.h"
52
#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"
53
#endif
54
#include "util/build_id.h"
55
#include "util/debug.h"
56
#include "util/driconf.h"
57
#include "util/mesa-sha1.h"
58
#include "util/timespec.h"
59
#include "util/u_atomic.h"
60
#include "winsys/null/radv_null_winsys_public.h"
61
#include "ac_llvm_util.h"
62
#include "git_sha1.h"
63
#include "sid.h"
64
#include "vk_format.h"
65
#include "vulkan/vk_icd.h"
66
67
/* The number of IBs per submit isn't infinite, it depends on the ring type
68
* (ie. some initial setup needed for a submit) and the number of IBs (4 DW).
69
* This limit is arbitrary but should be safe for now. Ideally, we should get
70
* this limit from the KMD.
71
*/
72
#define RADV_MAX_IBS_PER_SUBMIT 192
73
74
/* The "RAW" clocks on Linux are called "FAST" on FreeBSD */
75
#if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)
76
#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST
77
#endif
78
79
static struct radv_timeline_point *
80
radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
81
uint64_t p);
82
83
static struct radv_timeline_point *radv_timeline_add_point_locked(struct radv_device *device,
84
struct radv_timeline *timeline,
85
uint64_t p);
86
87
static void radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
88
struct list_head *processing_list);
89
90
static void radv_destroy_semaphore_part(struct radv_device *device,
91
struct radv_semaphore_part *part);
92
93
uint64_t
94
radv_get_current_time(void)
95
{
96
return os_time_get_nano();
97
}
98
99
static uint64_t
100
radv_get_absolute_timeout(uint64_t timeout)
101
{
102
uint64_t current_time = radv_get_current_time();
103
104
timeout = MIN2(UINT64_MAX - current_time, timeout);
105
106
return current_time + timeout;
107
}
108
109
static int
110
radv_device_get_cache_uuid(enum radeon_family family, void *uuid)
111
{
112
struct mesa_sha1 ctx;
113
unsigned char sha1[20];
114
unsigned ptr_size = sizeof(void *);
115
116
memset(uuid, 0, VK_UUID_SIZE);
117
_mesa_sha1_init(&ctx);
118
119
if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||
120
!disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))
121
return -1;
122
123
_mesa_sha1_update(&ctx, &family, sizeof(family));
124
_mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));
125
_mesa_sha1_final(&ctx, sha1);
126
127
memcpy(uuid, sha1, VK_UUID_SIZE);
128
return 0;
129
}
130
131
static void
132
radv_get_driver_uuid(void *uuid)
133
{
134
ac_compute_driver_uuid(uuid, VK_UUID_SIZE);
135
}
136
137
static void
138
radv_get_device_uuid(struct radeon_info *info, void *uuid)
139
{
140
ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);
141
}
142
143
static uint64_t
144
radv_get_adjusted_vram_size(struct radv_physical_device *device)
145
{
146
int ov = driQueryOptioni(&device->instance->dri_options, "override_vram_size");
147
if (ov >= 0)
148
return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);
149
return device->rad_info.vram_size;
150
}
151
152
static uint64_t
153
radv_get_visible_vram_size(struct radv_physical_device *device)
154
{
155
return MIN2(radv_get_adjusted_vram_size(device), device->rad_info.vram_vis_size);
156
}
157
158
static uint64_t
159
radv_get_vram_size(struct radv_physical_device *device)
160
{
161
uint64_t total_size = radv_get_adjusted_vram_size(device);
162
return total_size - MIN2(total_size, device->rad_info.vram_vis_size);
163
}
164
165
enum radv_heap {
166
RADV_HEAP_VRAM = 1 << 0,
167
RADV_HEAP_GTT = 1 << 1,
168
RADV_HEAP_VRAM_VIS = 1 << 2,
169
RADV_HEAP_MAX = 1 << 3,
170
};
171
172
static void
173
radv_physical_device_init_mem_types(struct radv_physical_device *device)
174
{
175
uint64_t visible_vram_size = radv_get_visible_vram_size(device);
176
uint64_t vram_size = radv_get_vram_size(device);
177
uint64_t gtt_size = device->rad_info.gart_size;
178
int vram_index = -1, visible_vram_index = -1, gart_index = -1;
179
180
device->memory_properties.memoryHeapCount = 0;
181
device->heaps = 0;
182
183
if (!device->rad_info.has_dedicated_vram) {
184
/* On APUs, the carveout is usually too small for games that request a minimum VRAM size
185
* greater than it. To workaround this, we compute the total available memory size (GTT +
186
* visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.
187
*/
188
const uint64_t total_size = gtt_size + visible_vram_size;
189
visible_vram_size = align64((total_size * 2) / 3, device->rad_info.gart_page_size);
190
gtt_size = total_size - visible_vram_size;
191
vram_size = 0;
192
}
193
194
/* Only get a VRAM heap if it is significant, not if it is a 16 MiB
195
* remainder above visible VRAM. */
196
if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {
197
vram_index = device->memory_properties.memoryHeapCount++;
198
device->heaps |= RADV_HEAP_VRAM;
199
device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){
200
.size = vram_size,
201
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
202
};
203
}
204
205
if (gtt_size > 0) {
206
gart_index = device->memory_properties.memoryHeapCount++;
207
device->heaps |= RADV_HEAP_GTT;
208
device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){
209
.size = gtt_size,
210
.flags = 0,
211
};
212
}
213
214
if (visible_vram_size) {
215
visible_vram_index = device->memory_properties.memoryHeapCount++;
216
device->heaps |= RADV_HEAP_VRAM_VIS;
217
device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){
218
.size = visible_vram_size,
219
.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
220
};
221
}
222
223
unsigned type_count = 0;
224
225
if (vram_index >= 0 || visible_vram_index >= 0) {
226
device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
227
device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;
228
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
229
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,
230
.heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,
231
};
232
}
233
234
if (gart_index >= 0) {
235
device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
236
device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;
237
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
238
.propertyFlags =
239
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
240
.heapIndex = gart_index,
241
};
242
}
243
if (visible_vram_index >= 0) {
244
device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;
245
device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
246
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
247
.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
248
VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
249
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
250
.heapIndex = visible_vram_index,
251
};
252
}
253
254
if (gart_index >= 0) {
255
device->memory_domains[type_count] = RADEON_DOMAIN_GTT;
256
device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;
257
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
258
.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
259
VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
260
.heapIndex = gart_index,
261
};
262
}
263
device->memory_properties.memoryTypeCount = type_count;
264
265
if (device->rad_info.has_l2_uncached) {
266
for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {
267
VkMemoryType mem_type = device->memory_properties.memoryTypes[i];
268
269
if ((mem_type.propertyFlags &
270
(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||
271
mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {
272
273
VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |
274
VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |
275
VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;
276
277
device->memory_domains[type_count] = device->memory_domains[i];
278
device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;
279
device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){
280
.propertyFlags = property_flags,
281
.heapIndex = mem_type.heapIndex,
282
};
283
}
284
}
285
device->memory_properties.memoryTypeCount = type_count;
286
}
287
}
288
289
static const char *
290
radv_get_compiler_string(struct radv_physical_device *pdevice)
291
{
292
if (!pdevice->use_llvm) {
293
/* Some games like SotTR apply shader workarounds if the LLVM
294
* version is too old or if the LLVM version string is
295
* missing. This gives 2-5% performance with SotTR and ACO.
296
*/
297
if (driQueryOptionb(&pdevice->instance->dri_options, "radv_report_llvm9_version_string")) {
298
return " (LLVM 9.0.1)";
299
}
300
301
return "";
302
}
303
304
return " (LLVM " MESA_LLVM_VERSION_STRING ")";
305
}
306
307
int
308
radv_get_int_debug_option(const char *name, int default_value)
309
{
310
const char *str;
311
int result;
312
313
str = getenv(name);
314
if (!str) {
315
result = default_value;
316
} else {
317
char *endptr;
318
319
result = strtol(str, &endptr, 0);
320
if (str == endptr) {
321
/* No digits founs. */
322
result = default_value;
323
}
324
}
325
326
return result;
327
}
328
329
static bool
330
radv_thread_trace_enabled()
331
{
332
return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||
333
getenv("RADV_THREAD_TRACE_TRIGGER");
334
}
335
336
#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || \
337
defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR)
338
#define RADV_USE_WSI_PLATFORM
339
#endif
340
341
#ifdef ANDROID
342
#define RADV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)
343
#else
344
#define RADV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)
345
#endif
346
347
VkResult
348
radv_EnumerateInstanceVersion(uint32_t *pApiVersion)
349
{
350
*pApiVersion = RADV_API_VERSION;
351
return VK_SUCCESS;
352
}
353
354
static const struct vk_instance_extension_table radv_instance_extensions_supported = {
355
.KHR_device_group_creation = true,
356
.KHR_external_fence_capabilities = true,
357
.KHR_external_memory_capabilities = true,
358
.KHR_external_semaphore_capabilities = true,
359
.KHR_get_physical_device_properties2 = true,
360
.EXT_debug_report = true,
361
362
#ifdef RADV_USE_WSI_PLATFORM
363
.KHR_get_surface_capabilities2 = true,
364
.KHR_surface = true,
365
.KHR_surface_protected_capabilities = true,
366
#endif
367
#ifdef VK_USE_PLATFORM_WAYLAND_KHR
368
.KHR_wayland_surface = true,
369
#endif
370
#ifdef VK_USE_PLATFORM_XCB_KHR
371
.KHR_xcb_surface = true,
372
#endif
373
#ifdef VK_USE_PLATFORM_XLIB_KHR
374
.KHR_xlib_surface = true,
375
#endif
376
#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT
377
.EXT_acquire_xlib_display = true,
378
#endif
379
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
380
.KHR_display = true,
381
.KHR_get_display_properties2 = true,
382
.EXT_direct_mode_display = true,
383
.EXT_display_surface_counter = true,
384
.EXT_acquire_drm_display = true,
385
#endif
386
};
387
388
static void
389
radv_physical_device_get_supported_extensions(const struct radv_physical_device *device,
390
struct vk_device_extension_table *ext)
391
{
392
*ext = (struct vk_device_extension_table){
393
.KHR_8bit_storage = true,
394
.KHR_16bit_storage = true,
395
.KHR_acceleration_structure = (device->instance->perftest_flags & RADV_PERFTEST_RT) &&
396
device->rad_info.chip_class >= GFX10_3,
397
.KHR_bind_memory2 = true,
398
.KHR_buffer_device_address = true,
399
.KHR_copy_commands2 = true,
400
.KHR_create_renderpass2 = true,
401
.KHR_dedicated_allocation = true,
402
.KHR_deferred_host_operations = true,
403
.KHR_depth_stencil_resolve = true,
404
.KHR_descriptor_update_template = true,
405
.KHR_device_group = true,
406
.KHR_draw_indirect_count = true,
407
.KHR_driver_properties = true,
408
.KHR_external_fence = true,
409
.KHR_external_fence_fd = true,
410
.KHR_external_memory = true,
411
.KHR_external_memory_fd = true,
412
.KHR_external_semaphore = true,
413
.KHR_external_semaphore_fd = true,
414
.KHR_fragment_shading_rate = device->rad_info.chip_class >= GFX10_3,
415
.KHR_get_memory_requirements2 = true,
416
.KHR_image_format_list = true,
417
.KHR_imageless_framebuffer = true,
418
#ifdef RADV_USE_WSI_PLATFORM
419
.KHR_incremental_present = true,
420
#endif
421
.KHR_maintenance1 = true,
422
.KHR_maintenance2 = true,
423
.KHR_maintenance3 = true,
424
.KHR_multiview = true,
425
.KHR_pipeline_executable_properties = true,
426
.KHR_push_descriptor = true,
427
.KHR_relaxed_block_layout = true,
428
.KHR_sampler_mirror_clamp_to_edge = true,
429
.KHR_sampler_ycbcr_conversion = true,
430
.KHR_separate_depth_stencil_layouts = true,
431
.KHR_shader_atomic_int64 = true,
432
.KHR_shader_clock = true,
433
.KHR_shader_draw_parameters = true,
434
.KHR_shader_float16_int8 = true,
435
.KHR_shader_float_controls = true,
436
.KHR_shader_non_semantic_info = true,
437
.KHR_shader_subgroup_extended_types = true,
438
.KHR_shader_subgroup_uniform_control_flow = true,
439
.KHR_shader_terminate_invocation = true,
440
.KHR_spirv_1_4 = true,
441
.KHR_storage_buffer_storage_class = true,
442
#ifdef RADV_USE_WSI_PLATFORM
443
.KHR_swapchain = true,
444
.KHR_swapchain_mutable_format = true,
445
#endif
446
.KHR_timeline_semaphore = true,
447
.KHR_uniform_buffer_standard_layout = true,
448
.KHR_variable_pointers = true,
449
.KHR_vulkan_memory_model = true,
450
.KHR_workgroup_memory_explicit_layout = true,
451
.KHR_zero_initialize_workgroup_memory = true,
452
.EXT_4444_formats = true,
453
.EXT_buffer_device_address = true,
454
.EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,
455
.EXT_color_write_enable = true,
456
.EXT_conditional_rendering = true,
457
.EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9,
458
.EXT_custom_border_color = true,
459
.EXT_debug_marker = radv_thread_trace_enabled(),
460
.EXT_depth_clip_enable = true,
461
.EXT_depth_range_unrestricted = true,
462
.EXT_descriptor_indexing = true,
463
.EXT_discard_rectangles = true,
464
#ifdef VK_USE_PLATFORM_DISPLAY_KHR
465
.EXT_display_control = true,
466
#endif
467
.EXT_extended_dynamic_state = true,
468
.EXT_extended_dynamic_state2 = true,
469
.EXT_external_memory_dma_buf = true,
470
.EXT_external_memory_host = device->rad_info.has_userptr,
471
.EXT_global_priority = true,
472
.EXT_global_priority_query = true,
473
.EXT_host_query_reset = true,
474
.EXT_image_drm_format_modifier = device->rad_info.chip_class >= GFX9,
475
.EXT_image_robustness = true,
476
.EXT_index_type_uint8 = device->rad_info.chip_class >= GFX8,
477
.EXT_inline_uniform_block = true,
478
.EXT_line_rasterization = true,
479
.EXT_memory_budget = true,
480
.EXT_memory_priority = true,
481
.EXT_multi_draw = true,
482
.EXT_pci_bus_info = true,
483
#ifndef _WIN32
484
.EXT_physical_device_drm = true,
485
#endif
486
.EXT_pipeline_creation_cache_control = true,
487
.EXT_pipeline_creation_feedback = true,
488
.EXT_post_depth_coverage = device->rad_info.chip_class >= GFX10,
489
.EXT_private_data = true,
490
.EXT_provoking_vertex = true,
491
.EXT_queue_family_foreign = true,
492
.EXT_robustness2 = true,
493
.EXT_sample_locations = device->rad_info.chip_class < GFX10,
494
.EXT_sampler_filter_minmax = true,
495
.EXT_scalar_block_layout = device->rad_info.chip_class >= GFX7,
496
.EXT_shader_atomic_float = true,
497
.EXT_shader_demote_to_helper_invocation = true,
498
.EXT_shader_image_atomic_int64 = true,
499
.EXT_shader_stencil_export = true,
500
.EXT_shader_subgroup_ballot = true,
501
.EXT_shader_subgroup_vote = true,
502
.EXT_shader_viewport_index_layer = true,
503
.EXT_subgroup_size_control = true,
504
.EXT_texel_buffer_alignment = true,
505
.EXT_transform_feedback = true,
506
.EXT_vertex_attribute_divisor = true,
507
.EXT_ycbcr_image_arrays = true,
508
.AMD_buffer_marker = true,
509
.AMD_device_coherent_memory = true,
510
.AMD_draw_indirect_count = true,
511
.AMD_gcn_shader = true,
512
.AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,
513
.AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,
514
.AMD_memory_overallocation_behavior = true,
515
.AMD_mixed_attachment_samples = true,
516
.AMD_rasterization_order = device->rad_info.has_out_of_order_rast,
517
.AMD_shader_ballot = true,
518
.AMD_shader_core_properties = true,
519
.AMD_shader_core_properties2 = true,
520
.AMD_shader_explicit_vertex_parameter = true,
521
.AMD_shader_fragment_mask = true,
522
.AMD_shader_image_load_store_lod = true,
523
.AMD_shader_info = true,
524
.AMD_shader_trinary_minmax = true,
525
.AMD_texture_gather_bias_lod = true,
526
#ifdef ANDROID
527
.ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,
528
.ANDROID_native_buffer = true,
529
#endif
530
.GOOGLE_decorate_string = true,
531
.GOOGLE_hlsl_functionality1 = true,
532
.GOOGLE_user_type = true,
533
.NV_compute_shader_derivatives = true,
534
.VALVE_mutable_descriptor_type = true,
535
};
536
}
537
538
static VkResult
539
radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,
540
struct radv_physical_device **device_out)
541
{
542
VkResult result;
543
int fd = -1;
544
int master_fd = -1;
545
546
#ifdef _WIN32
547
assert(drm_device == NULL);
548
#else
549
if (drm_device) {
550
const char *path = drm_device->nodes[DRM_NODE_RENDER];
551
drmVersionPtr version;
552
553
fd = open(path, O_RDWR | O_CLOEXEC);
554
if (fd < 0) {
555
if (instance->debug_flags & RADV_DEBUG_STARTUP)
556
radv_logi("Could not open device '%s'", path);
557
558
return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);
559
}
560
561
version = drmGetVersion(fd);
562
if (!version) {
563
close(fd);
564
565
if (instance->debug_flags & RADV_DEBUG_STARTUP)
566
radv_logi("Could not get the kernel driver version for device '%s'", path);
567
568
return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "failed to get version %s: %m",
569
path);
570
}
571
572
if (strcmp(version->name, "amdgpu")) {
573
drmFreeVersion(version);
574
close(fd);
575
576
if (instance->debug_flags & RADV_DEBUG_STARTUP)
577
radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);
578
579
return VK_ERROR_INCOMPATIBLE_DRIVER;
580
}
581
drmFreeVersion(version);
582
583
if (instance->debug_flags & RADV_DEBUG_STARTUP)
584
radv_logi("Found compatible device '%s'.", path);
585
}
586
#endif
587
588
struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,
589
VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
590
if (!device) {
591
result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);
592
goto fail_fd;
593
}
594
595
struct vk_physical_device_dispatch_table dispatch_table;
596
vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,
597
&radv_physical_device_entrypoints, true);
598
599
result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table);
600
if (result != VK_SUCCESS) {
601
goto fail_alloc;
602
}
603
604
device->instance = instance;
605
606
#ifdef _WIN32
607
device->ws = radv_null_winsys_create();
608
#else
609
if (drm_device) {
610
device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, false);
611
} else {
612
device->ws = radv_null_winsys_create();
613
}
614
#endif
615
616
if (!device->ws) {
617
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");
618
goto fail_base;
619
}
620
621
#ifndef _WIN32
622
if (drm_device && instance->vk.enabled_extensions.KHR_display) {
623
master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);
624
if (master_fd >= 0) {
625
uint32_t accel_working = 0;
626
struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,
627
.return_size = sizeof(accel_working),
628
.query = AMDGPU_INFO_ACCEL_WORKING};
629
630
if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) <
631
0 ||
632
!accel_working) {
633
close(master_fd);
634
master_fd = -1;
635
}
636
}
637
}
638
#endif
639
640
device->master_fd = master_fd;
641
device->local_fd = fd;
642
device->ws->query_info(device->ws, &device->rad_info);
643
644
device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;
645
646
snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name,
647
radv_get_compiler_string(device));
648
649
#ifdef ENABLE_SHADER_CACHE
650
if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {
651
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");
652
goto fail_wsi;
653
}
654
655
/* These flags affect shader compilation. */
656
uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);
657
658
/* The gpu id is already embedded in the uuid so we just pass "radv"
659
* when creating the cache.
660
*/
661
char buf[VK_UUID_SIZE * 2 + 1];
662
disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);
663
device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);
664
#endif
665
666
if (device->rad_info.chip_class < GFX8 || device->rad_info.chip_class > GFX10)
667
vk_warn_non_conformant_implementation("radv");
668
669
radv_get_driver_uuid(&device->driver_uuid);
670
radv_get_device_uuid(&device->rad_info, &device->device_uuid);
671
672
device->out_of_order_rast_allowed =
673
device->rad_info.has_out_of_order_rast &&
674
!(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);
675
676
device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);
677
678
device->use_ngg = device->rad_info.chip_class >= GFX10 &&
679
device->rad_info.family != CHIP_NAVI14 &&
680
!(device->instance->debug_flags & RADV_DEBUG_NO_NGG);
681
682
device->use_ngg_streamout = false;
683
684
/* Determine the number of threads per wave for all stages. */
685
device->cs_wave_size = 64;
686
device->ps_wave_size = 64;
687
device->ge_wave_size = 64;
688
689
if (device->rad_info.chip_class >= GFX10) {
690
if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)
691
device->cs_wave_size = 32;
692
693
/* For pixel shaders, wave64 is recommanded. */
694
if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)
695
device->ps_wave_size = 32;
696
697
if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)
698
device->ge_wave_size = 32;
699
}
700
701
radv_physical_device_init_mem_types(device);
702
703
radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions);
704
705
#ifndef _WIN32
706
if (drm_device) {
707
struct stat primary_stat = {0}, render_stat = {0};
708
709
device->available_nodes = drm_device->available_nodes;
710
device->bus_info = *drm_device->businfo.pci;
711
712
if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&
713
stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {
714
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
715
"failed to stat DRM primary node %s",
716
drm_device->nodes[DRM_NODE_PRIMARY]);
717
goto fail_disk_cache;
718
}
719
device->primary_devid = primary_stat.st_rdev;
720
721
if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&
722
stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {
723
result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,
724
"failed to stat DRM render node %s",
725
drm_device->nodes[DRM_NODE_RENDER]);
726
goto fail_disk_cache;
727
}
728
device->render_devid = render_stat.st_rdev;
729
}
730
#endif
731
732
if ((device->instance->debug_flags & RADV_DEBUG_INFO))
733
ac_print_gpu_info(&device->rad_info, stdout);
734
735
/* The WSI is structured as a layer on top of the driver, so this has
736
* to be the last part of initialization (at least until we get other
737
* semi-layers).
738
*/
739
result = radv_init_wsi(device);
740
if (result != VK_SUCCESS) {
741
vk_error(instance, result);
742
goto fail_disk_cache;
743
}
744
745
*device_out = device;
746
747
return VK_SUCCESS;
748
749
fail_disk_cache:
750
disk_cache_destroy(device->disk_cache);
751
#ifdef ENABLE_SHADER_CACHE
752
fail_wsi:
753
#endif
754
device->ws->destroy(device->ws);
755
fail_base:
756
vk_physical_device_finish(&device->vk);
757
fail_alloc:
758
vk_free(&instance->vk.alloc, device);
759
fail_fd:
760
if (fd != -1)
761
close(fd);
762
if (master_fd != -1)
763
close(master_fd);
764
return result;
765
}
766
767
static void
768
radv_physical_device_destroy(struct radv_physical_device *device)
769
{
770
radv_finish_wsi(device);
771
device->ws->destroy(device->ws);
772
disk_cache_destroy(device->disk_cache);
773
if (device->local_fd != -1)
774
close(device->local_fd);
775
if (device->master_fd != -1)
776
close(device->master_fd);
777
vk_physical_device_finish(&device->vk);
778
vk_free(&device->instance->vk.alloc, device);
779
}
780
781
static const struct debug_control radv_debug_options[] = {
782
{"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},
783
{"nodcc", RADV_DEBUG_NO_DCC},
784
{"shaders", RADV_DEBUG_DUMP_SHADERS},
785
{"nocache", RADV_DEBUG_NO_CACHE},
786
{"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},
787
{"nohiz", RADV_DEBUG_NO_HIZ},
788
{"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},
789
{"allbos", RADV_DEBUG_ALL_BOS},
790
{"noibs", RADV_DEBUG_NO_IBS},
791
{"spirv", RADV_DEBUG_DUMP_SPIRV},
792
{"vmfaults", RADV_DEBUG_VM_FAULTS},
793
{"zerovram", RADV_DEBUG_ZERO_VRAM},
794
{"syncshaders", RADV_DEBUG_SYNC_SHADERS},
795
{"preoptir", RADV_DEBUG_PREOPTIR},
796
{"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},
797
{"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},
798
{"info", RADV_DEBUG_INFO},
799
{"errors", RADV_DEBUG_ERRORS},
800
{"startup", RADV_DEBUG_STARTUP},
801
{"checkir", RADV_DEBUG_CHECKIR},
802
{"nobinning", RADV_DEBUG_NOBINNING},
803
{"nongg", RADV_DEBUG_NO_NGG},
804
{"metashaders", RADV_DEBUG_DUMP_META_SHADERS},
805
{"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},
806
{"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},
807
{"llvm", RADV_DEBUG_LLVM},
808
{"forcecompress", RADV_DEBUG_FORCE_COMPRESS},
809
{"hang", RADV_DEBUG_HANG},
810
{"img", RADV_DEBUG_IMG},
811
{"noumr", RADV_DEBUG_NO_UMR},
812
{"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},
813
{"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},
814
{"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},
815
{"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},
816
{NULL, 0}};
817
818
const char *
819
radv_get_debug_option_name(int id)
820
{
821
assert(id < ARRAY_SIZE(radv_debug_options) - 1);
822
return radv_debug_options[id].string;
823
}
824
825
static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_PERFTEST_LOCAL_BOS},
826
{"dccmsaa", RADV_PERFTEST_DCC_MSAA},
827
{"bolist", RADV_PERFTEST_BO_LIST},
828
{"cswave32", RADV_PERFTEST_CS_WAVE_32},
829
{"pswave32", RADV_PERFTEST_PS_WAVE_32},
830
{"gewave32", RADV_PERFTEST_GE_WAVE_32},
831
{"nosam", RADV_PERFTEST_NO_SAM},
832
{"sam", RADV_PERFTEST_SAM},
833
{"rt", RADV_PERFTEST_RT},
834
{"nggc", RADV_PERFTEST_NGGC},
835
{NULL, 0}};
836
837
const char *
838
radv_get_perftest_option_name(int id)
839
{
840
assert(id < ARRAY_SIZE(radv_perftest_options) - 1);
841
return radv_perftest_options[id].string;
842
}
843
844
// clang-format off
845
static const driOptionDescription radv_dri_options[] = {
846
DRI_CONF_SECTION_PERFORMANCE
847
DRI_CONF_ADAPTIVE_SYNC(true)
848
DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)
849
DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)
850
DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)
851
DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)
852
DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)
853
DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)
854
DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)
855
DRI_CONF_RADV_ABSOLUTE_DEPTH_BIAS(false)
856
DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)
857
DRI_CONF_SECTION_END
858
859
DRI_CONF_SECTION_DEBUG
860
DRI_CONF_OVERRIDE_VRAM_SIZE()
861
DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)
862
DRI_CONF_RADV_ZERO_VRAM(false)
863
DRI_CONF_RADV_LOWER_DISCARD_TO_DEMOTE(false)
864
DRI_CONF_RADV_INVARIANT_GEOM(false)
865
DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false)
866
DRI_CONF_RADV_DISABLE_DCC(false)
867
DRI_CONF_RADV_REPORT_APU_AS_DGPU(false)
868
DRI_CONF_SECTION_END
869
};
870
// clang-format on
871
872
static void
873
radv_init_dri_options(struct radv_instance *instance)
874
{
875
driParseOptionInfo(&instance->available_dri_options, radv_dri_options,
876
ARRAY_SIZE(radv_dri_options));
877
driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL,
878
instance->vk.app_info.app_name, instance->vk.app_info.app_version,
879
instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);
880
881
instance->enable_mrt_output_nan_fixup =
882
driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");
883
884
instance->disable_shrink_image_store =
885
driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");
886
887
instance->absolute_depth_bias =
888
driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias");
889
890
instance->disable_tc_compat_htile_in_general =
891
driQueryOptionb(&instance->dri_options, "radv_disable_tc_compat_htile_general");
892
893
if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))
894
instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;
895
896
if (driQueryOptionb(&instance->dri_options, "radv_zero_vram"))
897
instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;
898
899
if (driQueryOptionb(&instance->dri_options, "radv_lower_discard_to_demote"))
900
instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;
901
902
if (driQueryOptionb(&instance->dri_options, "radv_invariant_geom"))
903
instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;
904
905
if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))
906
instance->debug_flags |= RADV_DEBUG_NO_DCC;
907
908
instance->report_apu_as_dgpu =
909
driQueryOptionb(&instance->dri_options, "radv_report_apu_as_dgpu");
910
}
911
912
VkResult
913
radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,
914
const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)
915
{
916
struct radv_instance *instance;
917
VkResult result;
918
919
if (!pAllocator)
920
pAllocator = vk_default_allocator();
921
922
instance = vk_zalloc(pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
923
if (!instance)
924
return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);
925
926
struct vk_instance_dispatch_table dispatch_table;
927
vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true);
928
result = vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table,
929
pCreateInfo, pAllocator);
930
if (result != VK_SUCCESS) {
931
vk_free(pAllocator, instance);
932
return vk_error(instance, result);
933
}
934
935
instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options);
936
instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options);
937
938
if (instance->debug_flags & RADV_DEBUG_STARTUP)
939
radv_logi("Created an instance");
940
941
instance->physical_devices_enumerated = false;
942
list_inithead(&instance->physical_devices);
943
944
VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
945
946
radv_init_dri_options(instance);
947
948
*pInstance = radv_instance_to_handle(instance);
949
950
return VK_SUCCESS;
951
}
952
953
void
954
radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator)
955
{
956
RADV_FROM_HANDLE(radv_instance, instance, _instance);
957
958
if (!instance)
959
return;
960
961
list_for_each_entry_safe(struct radv_physical_device, pdevice, &instance->physical_devices, link)
962
{
963
radv_physical_device_destroy(pdevice);
964
}
965
966
VG(VALGRIND_DESTROY_MEMPOOL(instance));
967
968
driDestroyOptionCache(&instance->dri_options);
969
driDestroyOptionInfo(&instance->available_dri_options);
970
971
vk_instance_finish(&instance->vk);
972
vk_free(&instance->vk.alloc, instance);
973
}
974
975
static VkResult
976
radv_enumerate_physical_devices(struct radv_instance *instance)
977
{
978
if (instance->physical_devices_enumerated)
979
return VK_SUCCESS;
980
981
instance->physical_devices_enumerated = true;
982
983
VkResult result = VK_SUCCESS;
984
985
if (getenv("RADV_FORCE_FAMILY")) {
986
/* When RADV_FORCE_FAMILY is set, the driver creates a nul
987
* device that allows to test the compiler without having an
988
* AMDGPU instance.
989
*/
990
struct radv_physical_device *pdevice;
991
992
result = radv_physical_device_try_create(instance, NULL, &pdevice);
993
if (result != VK_SUCCESS)
994
return result;
995
996
list_addtail(&pdevice->link, &instance->physical_devices);
997
return VK_SUCCESS;
998
}
999
1000
#ifndef _WIN32
1001
/* TODO: Check for more devices ? */
1002
drmDevicePtr devices[8];
1003
int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));
1004
1005
if (instance->debug_flags & RADV_DEBUG_STARTUP)
1006
radv_logi("Found %d drm nodes", max_devices);
1007
1008
if (max_devices < 1)
1009
return vk_error(instance, VK_SUCCESS);
1010
1011
for (unsigned i = 0; i < (unsigned)max_devices; i++) {
1012
if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&
1013
devices[i]->bustype == DRM_BUS_PCI &&
1014
devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {
1015
1016
struct radv_physical_device *pdevice;
1017
result = radv_physical_device_try_create(instance, devices[i], &pdevice);
1018
/* Incompatible DRM device, skip. */
1019
if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
1020
result = VK_SUCCESS;
1021
continue;
1022
}
1023
1024
/* Error creating the physical device, report the error. */
1025
if (result != VK_SUCCESS)
1026
break;
1027
1028
list_addtail(&pdevice->link, &instance->physical_devices);
1029
}
1030
}
1031
drmFreeDevices(devices, max_devices);
1032
#endif
1033
1034
/* If we successfully enumerated any devices, call it success */
1035
return result;
1036
}
1037
1038
VkResult
1039
radv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount,
1040
VkPhysicalDevice *pPhysicalDevices)
1041
{
1042
RADV_FROM_HANDLE(radv_instance, instance, _instance);
1043
VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount);
1044
1045
VkResult result = radv_enumerate_physical_devices(instance);
1046
if (result != VK_SUCCESS)
1047
return result;
1048
1049
list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1050
{
1051
vk_outarray_append_typed(VkPhysicalDevice, &out, i)
1052
{
1053
*i = radv_physical_device_to_handle(pdevice);
1054
}
1055
}
1056
1057
return vk_outarray_status(&out);
1058
}
1059
1060
VkResult
1061
radv_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pPhysicalDeviceGroupCount,
1062
VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)
1063
{
1064
RADV_FROM_HANDLE(radv_instance, instance, _instance);
1065
VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pPhysicalDeviceGroupProperties,
1066
pPhysicalDeviceGroupCount);
1067
1068
VkResult result = radv_enumerate_physical_devices(instance);
1069
if (result != VK_SUCCESS)
1070
return result;
1071
1072
list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)
1073
{
1074
vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)
1075
{
1076
p->physicalDeviceCount = 1;
1077
memset(p->physicalDevices, 0, sizeof(p->physicalDevices));
1078
p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);
1079
p->subsetAllocation = false;
1080
}
1081
}
1082
1083
return vk_outarray_status(&out);
1084
}
1085
1086
void
1087
radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures)
1088
{
1089
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1090
memset(pFeatures, 0, sizeof(*pFeatures));
1091
1092
*pFeatures = (VkPhysicalDeviceFeatures){
1093
.robustBufferAccess = true,
1094
.fullDrawIndexUint32 = true,
1095
.imageCubeArray = true,
1096
.independentBlend = true,
1097
.geometryShader = true,
1098
.tessellationShader = true,
1099
.sampleRateShading = true,
1100
.dualSrcBlend = true,
1101
.logicOp = true,
1102
.multiDrawIndirect = true,
1103
.drawIndirectFirstInstance = true,
1104
.depthClamp = true,
1105
.depthBiasClamp = true,
1106
.fillModeNonSolid = true,
1107
.depthBounds = true,
1108
.wideLines = true,
1109
.largePoints = true,
1110
.alphaToOne = false,
1111
.multiViewport = true,
1112
.samplerAnisotropy = true,
1113
.textureCompressionETC2 = radv_device_supports_etc(pdevice),
1114
.textureCompressionASTC_LDR = false,
1115
.textureCompressionBC = true,
1116
.occlusionQueryPrecise = true,
1117
.pipelineStatisticsQuery = true,
1118
.vertexPipelineStoresAndAtomics = true,
1119
.fragmentStoresAndAtomics = true,
1120
.shaderTessellationAndGeometryPointSize = true,
1121
.shaderImageGatherExtended = true,
1122
.shaderStorageImageExtendedFormats = true,
1123
.shaderStorageImageMultisample = true,
1124
.shaderUniformBufferArrayDynamicIndexing = true,
1125
.shaderSampledImageArrayDynamicIndexing = true,
1126
.shaderStorageBufferArrayDynamicIndexing = true,
1127
.shaderStorageImageArrayDynamicIndexing = true,
1128
.shaderStorageImageReadWithoutFormat = true,
1129
.shaderStorageImageWriteWithoutFormat = true,
1130
.shaderClipDistance = true,
1131
.shaderCullDistance = true,
1132
.shaderFloat64 = true,
1133
.shaderInt64 = true,
1134
.shaderInt16 = true,
1135
.sparseBinding = true,
1136
.sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,
1137
.sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,
1138
.sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,
1139
.variableMultisampleRate = true,
1140
.shaderResourceMinLod = true,
1141
.shaderResourceResidency = true,
1142
.inheritedQueries = true,
1143
};
1144
}
1145
1146
static void
1147
radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,
1148
VkPhysicalDeviceVulkan11Features *f)
1149
{
1150
assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);
1151
1152
f->storageBuffer16BitAccess = true;
1153
f->uniformAndStorageBuffer16BitAccess = true;
1154
f->storagePushConstant16 = true;
1155
f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit;
1156
f->multiview = true;
1157
f->multiviewGeometryShader = true;
1158
f->multiviewTessellationShader = true;
1159
f->variablePointersStorageBuffer = true;
1160
f->variablePointers = true;
1161
f->protectedMemory = false;
1162
f->samplerYcbcrConversion = true;
1163
f->shaderDrawParameters = true;
1164
}
1165
1166
static void
1167
radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,
1168
VkPhysicalDeviceVulkan12Features *f)
1169
{
1170
assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);
1171
1172
f->samplerMirrorClampToEdge = true;
1173
f->drawIndirectCount = true;
1174
f->storageBuffer8BitAccess = true;
1175
f->uniformAndStorageBuffer8BitAccess = true;
1176
f->storagePushConstant8 = true;
1177
f->shaderBufferInt64Atomics = true;
1178
f->shaderSharedInt64Atomics = true;
1179
f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;
1180
f->shaderInt8 = true;
1181
1182
f->descriptorIndexing = true;
1183
f->shaderInputAttachmentArrayDynamicIndexing = true;
1184
f->shaderUniformTexelBufferArrayDynamicIndexing = true;
1185
f->shaderStorageTexelBufferArrayDynamicIndexing = true;
1186
f->shaderUniformBufferArrayNonUniformIndexing = true;
1187
f->shaderSampledImageArrayNonUniformIndexing = true;
1188
f->shaderStorageBufferArrayNonUniformIndexing = true;
1189
f->shaderStorageImageArrayNonUniformIndexing = true;
1190
f->shaderInputAttachmentArrayNonUniformIndexing = true;
1191
f->shaderUniformTexelBufferArrayNonUniformIndexing = true;
1192
f->shaderStorageTexelBufferArrayNonUniformIndexing = true;
1193
f->descriptorBindingUniformBufferUpdateAfterBind = true;
1194
f->descriptorBindingSampledImageUpdateAfterBind = true;
1195
f->descriptorBindingStorageImageUpdateAfterBind = true;
1196
f->descriptorBindingStorageBufferUpdateAfterBind = true;
1197
f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;
1198
f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;
1199
f->descriptorBindingUpdateUnusedWhilePending = true;
1200
f->descriptorBindingPartiallyBound = true;
1201
f->descriptorBindingVariableDescriptorCount = true;
1202
f->runtimeDescriptorArray = true;
1203
1204
f->samplerFilterMinmax = true;
1205
f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;
1206
f->imagelessFramebuffer = true;
1207
f->uniformBufferStandardLayout = true;
1208
f->shaderSubgroupExtendedTypes = true;
1209
f->separateDepthStencilLayouts = true;
1210
f->hostQueryReset = true;
1211
f->timelineSemaphore = true, f->bufferDeviceAddress = true;
1212
f->bufferDeviceAddressCaptureReplay = true;
1213
f->bufferDeviceAddressMultiDevice = false;
1214
f->vulkanMemoryModel = true;
1215
f->vulkanMemoryModelDeviceScope = true;
1216
f->vulkanMemoryModelAvailabilityVisibilityChains = false;
1217
f->shaderOutputViewportIndex = true;
1218
f->shaderOutputLayer = true;
1219
f->subgroupBroadcastDynamicId = true;
1220
}
1221
1222
void
1223
radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,
1224
VkPhysicalDeviceFeatures2 *pFeatures)
1225
{
1226
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1227
radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);
1228
1229
VkPhysicalDeviceVulkan11Features core_1_1 = {
1230
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,
1231
};
1232
radv_get_physical_device_features_1_1(pdevice, &core_1_1);
1233
1234
VkPhysicalDeviceVulkan12Features core_1_2 = {
1235
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,
1236
};
1237
radv_get_physical_device_features_1_2(pdevice, &core_1_2);
1238
1239
#define CORE_FEATURE(major, minor, feature) features->feature = core_##major##_##minor.feature
1240
1241
vk_foreach_struct(ext, pFeatures->pNext)
1242
{
1243
switch (ext->sType) {
1244
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {
1245
VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;
1246
CORE_FEATURE(1, 1, variablePointersStorageBuffer);
1247
CORE_FEATURE(1, 1, variablePointers);
1248
break;
1249
}
1250
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {
1251
VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures *)ext;
1252
CORE_FEATURE(1, 1, multiview);
1253
CORE_FEATURE(1, 1, multiviewGeometryShader);
1254
CORE_FEATURE(1, 1, multiviewTessellationShader);
1255
break;
1256
}
1257
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {
1258
VkPhysicalDeviceShaderDrawParametersFeatures *features =
1259
(VkPhysicalDeviceShaderDrawParametersFeatures *)ext;
1260
CORE_FEATURE(1, 1, shaderDrawParameters);
1261
break;
1262
}
1263
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {
1264
VkPhysicalDeviceProtectedMemoryFeatures *features =
1265
(VkPhysicalDeviceProtectedMemoryFeatures *)ext;
1266
CORE_FEATURE(1, 1, protectedMemory);
1267
break;
1268
}
1269
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {
1270
VkPhysicalDevice16BitStorageFeatures *features =
1271
(VkPhysicalDevice16BitStorageFeatures *)ext;
1272
CORE_FEATURE(1, 1, storageBuffer16BitAccess);
1273
CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);
1274
CORE_FEATURE(1, 1, storagePushConstant16);
1275
CORE_FEATURE(1, 1, storageInputOutput16);
1276
break;
1277
}
1278
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {
1279
VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =
1280
(VkPhysicalDeviceSamplerYcbcrConversionFeatures *)ext;
1281
CORE_FEATURE(1, 1, samplerYcbcrConversion);
1282
break;
1283
}
1284
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {
1285
VkPhysicalDeviceDescriptorIndexingFeatures *features =
1286
(VkPhysicalDeviceDescriptorIndexingFeatures *)ext;
1287
CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);
1288
CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);
1289
CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);
1290
CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);
1291
CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);
1292
CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);
1293
CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);
1294
CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);
1295
CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);
1296
CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);
1297
CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);
1298
CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);
1299
CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);
1300
CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);
1301
CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);
1302
CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);
1303
CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);
1304
CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);
1305
CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);
1306
CORE_FEATURE(1, 2, runtimeDescriptorArray);
1307
break;
1308
}
1309
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {
1310
VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =
1311
(VkPhysicalDeviceConditionalRenderingFeaturesEXT *)ext;
1312
features->conditionalRendering = true;
1313
features->inheritedConditionalRendering = false;
1314
break;
1315
}
1316
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {
1317
VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =
1318
(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;
1319
features->vertexAttributeInstanceRateDivisor = true;
1320
features->vertexAttributeInstanceRateZeroDivisor = true;
1321
break;
1322
}
1323
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {
1324
VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =
1325
(VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;
1326
features->transformFeedback = true;
1327
features->geometryStreams = !pdevice->use_ngg_streamout;
1328
break;
1329
}
1330
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {
1331
VkPhysicalDeviceScalarBlockLayoutFeatures *features =
1332
(VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;
1333
CORE_FEATURE(1, 2, scalarBlockLayout);
1334
break;
1335
}
1336
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {
1337
VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =
1338
(VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;
1339
features->memoryPriority = true;
1340
break;
1341
}
1342
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {
1343
VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =
1344
(VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;
1345
CORE_FEATURE(1, 2, bufferDeviceAddress);
1346
CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1347
CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1348
break;
1349
}
1350
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {
1351
VkPhysicalDeviceBufferDeviceAddressFeatures *features =
1352
(VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;
1353
CORE_FEATURE(1, 2, bufferDeviceAddress);
1354
CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);
1355
CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);
1356
break;
1357
}
1358
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {
1359
VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =
1360
(VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;
1361
features->depthClipEnable = true;
1362
break;
1363
}
1364
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {
1365
VkPhysicalDeviceHostQueryResetFeatures *features =
1366
(VkPhysicalDeviceHostQueryResetFeatures *)ext;
1367
CORE_FEATURE(1, 2, hostQueryReset);
1368
break;
1369
}
1370
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {
1371
VkPhysicalDevice8BitStorageFeatures *features = (VkPhysicalDevice8BitStorageFeatures *)ext;
1372
CORE_FEATURE(1, 2, storageBuffer8BitAccess);
1373
CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);
1374
CORE_FEATURE(1, 2, storagePushConstant8);
1375
break;
1376
}
1377
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {
1378
VkPhysicalDeviceShaderFloat16Int8Features *features =
1379
(VkPhysicalDeviceShaderFloat16Int8Features *)ext;
1380
CORE_FEATURE(1, 2, shaderFloat16);
1381
CORE_FEATURE(1, 2, shaderInt8);
1382
break;
1383
}
1384
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {
1385
VkPhysicalDeviceShaderAtomicInt64Features *features =
1386
(VkPhysicalDeviceShaderAtomicInt64Features *)ext;
1387
CORE_FEATURE(1, 2, shaderBufferInt64Atomics);
1388
CORE_FEATURE(1, 2, shaderSharedInt64Atomics);
1389
break;
1390
}
1391
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {
1392
VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =
1393
(VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;
1394
features->shaderDemoteToHelperInvocation = true;
1395
break;
1396
}
1397
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {
1398
VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =
1399
(VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;
1400
1401
features->inlineUniformBlock = true;
1402
features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;
1403
break;
1404
}
1405
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {
1406
VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =
1407
(VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;
1408
features->computeDerivativeGroupQuads = false;
1409
features->computeDerivativeGroupLinear = true;
1410
break;
1411
}
1412
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {
1413
VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =
1414
(VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;
1415
features->ycbcrImageArrays = true;
1416
break;
1417
}
1418
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {
1419
VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =
1420
(VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;
1421
CORE_FEATURE(1, 2, uniformBufferStandardLayout);
1422
break;
1423
}
1424
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {
1425
VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =
1426
(VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;
1427
features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;
1428
break;
1429
}
1430
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {
1431
VkPhysicalDeviceImagelessFramebufferFeatures *features =
1432
(VkPhysicalDeviceImagelessFramebufferFeatures *)ext;
1433
CORE_FEATURE(1, 2, imagelessFramebuffer);
1434
break;
1435
}
1436
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {
1437
VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =
1438
(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;
1439
features->pipelineExecutableInfo = true;
1440
break;
1441
}
1442
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {
1443
VkPhysicalDeviceShaderClockFeaturesKHR *features =
1444
(VkPhysicalDeviceShaderClockFeaturesKHR *)ext;
1445
features->shaderSubgroupClock = true;
1446
features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;
1447
break;
1448
}
1449
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {
1450
VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =
1451
(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;
1452
features->texelBufferAlignment = true;
1453
break;
1454
}
1455
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {
1456
VkPhysicalDeviceTimelineSemaphoreFeatures *features =
1457
(VkPhysicalDeviceTimelineSemaphoreFeatures *)ext;
1458
CORE_FEATURE(1, 2, timelineSemaphore);
1459
break;
1460
}
1461
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {
1462
VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =
1463
(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;
1464
features->subgroupSizeControl = true;
1465
features->computeFullSubgroups = true;
1466
break;
1467
}
1468
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {
1469
VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =
1470
(VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;
1471
features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;
1472
break;
1473
}
1474
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {
1475
VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =
1476
(VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;
1477
CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);
1478
break;
1479
}
1480
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {
1481
VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =
1482
(VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;
1483
CORE_FEATURE(1, 2, separateDepthStencilLayouts);
1484
break;
1485
}
1486
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {
1487
radv_get_physical_device_features_1_1(pdevice, (void *)ext);
1488
break;
1489
}
1490
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {
1491
radv_get_physical_device_features_1_2(pdevice, (void *)ext);
1492
break;
1493
}
1494
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {
1495
VkPhysicalDeviceLineRasterizationFeaturesEXT *features =
1496
(VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;
1497
features->rectangularLines = false;
1498
features->bresenhamLines = true;
1499
features->smoothLines = false;
1500
features->stippledRectangularLines = false;
1501
/* FIXME: Some stippled Bresenham CTS fails on Vega10
1502
* but work on Raven.
1503
*/
1504
features->stippledBresenhamLines = pdevice->rad_info.chip_class != GFX9;
1505
features->stippledSmoothLines = false;
1506
break;
1507
}
1508
case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
1509
VkDeviceMemoryOverallocationCreateInfoAMD *features =
1510
(VkDeviceMemoryOverallocationCreateInfoAMD *)ext;
1511
features->overallocationBehavior = true;
1512
break;
1513
}
1514
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
1515
VkPhysicalDeviceRobustness2FeaturesEXT *features =
1516
(VkPhysicalDeviceRobustness2FeaturesEXT *)ext;
1517
features->robustBufferAccess2 = true;
1518
features->robustImageAccess2 = true;
1519
features->nullDescriptor = true;
1520
break;
1521
}
1522
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
1523
VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =
1524
(VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;
1525
features->customBorderColors = true;
1526
features->customBorderColorWithoutFormat = true;
1527
break;
1528
}
1529
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {
1530
VkPhysicalDevicePrivateDataFeaturesEXT *features =
1531
(VkPhysicalDevicePrivateDataFeaturesEXT *)ext;
1532
features->privateData = true;
1533
break;
1534
}
1535
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {
1536
VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =
1537
(VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;
1538
features->pipelineCreationCacheControl = true;
1539
break;
1540
}
1541
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {
1542
VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features =
1543
(VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;
1544
CORE_FEATURE(1, 2, vulkanMemoryModel);
1545
CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);
1546
CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);
1547
break;
1548
}
1549
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {
1550
VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =
1551
(VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;
1552
features->extendedDynamicState = true;
1553
break;
1554
}
1555
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {
1556
VkPhysicalDeviceImageRobustnessFeaturesEXT *features =
1557
(VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;
1558
features->robustImageAccess = true;
1559
break;
1560
}
1561
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {
1562
VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =
1563
(VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;
1564
features->shaderBufferFloat32Atomics = true;
1565
features->shaderBufferFloat32AtomicAdd = false;
1566
features->shaderBufferFloat64Atomics = true;
1567
features->shaderBufferFloat64AtomicAdd = false;
1568
features->shaderSharedFloat32Atomics = true;
1569
features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8;
1570
features->shaderSharedFloat64Atomics = true;
1571
features->shaderSharedFloat64AtomicAdd = false;
1572
features->shaderImageFloat32Atomics = true;
1573
features->shaderImageFloat32AtomicAdd = false;
1574
features->sparseImageFloat32Atomics = true;
1575
features->sparseImageFloat32AtomicAdd = false;
1576
break;
1577
}
1578
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {
1579
VkPhysicalDevice4444FormatsFeaturesEXT *features =
1580
(VkPhysicalDevice4444FormatsFeaturesEXT *)ext;
1581
features->formatA4R4G4B4 = true;
1582
features->formatA4B4G4R4 = true;
1583
break;
1584
}
1585
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {
1586
VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =
1587
(VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;
1588
features->shaderTerminateInvocation = true;
1589
break;
1590
}
1591
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {
1592
VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =
1593
(VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;
1594
features->shaderImageInt64Atomics = true;
1595
features->sparseImageInt64Atomics = true;
1596
break;
1597
}
1598
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {
1599
VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =
1600
(VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;
1601
features->mutableDescriptorType = true;
1602
break;
1603
}
1604
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
1605
VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =
1606
(VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;
1607
features->pipelineFragmentShadingRate = true;
1608
features->primitiveFragmentShadingRate = true;
1609
features->attachmentFragmentShadingRate = true;
1610
break;
1611
}
1612
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {
1613
VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =
1614
(VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;
1615
features->workgroupMemoryExplicitLayout = true;
1616
features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;
1617
features->workgroupMemoryExplicitLayout8BitAccess = true;
1618
features->workgroupMemoryExplicitLayout16BitAccess = true;
1619
break;
1620
}
1621
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {
1622
VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =
1623
(VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;
1624
features->shaderZeroInitializeWorkgroupMemory = true;
1625
break;
1626
}
1627
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {
1628
VkPhysicalDeviceProvokingVertexFeaturesEXT *features =
1629
(VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;
1630
features->provokingVertexLast = true;
1631
features->transformFeedbackPreservesProvokingVertex = true;
1632
break;
1633
}
1634
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {
1635
VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =
1636
(VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;
1637
features->extendedDynamicState2 = true;
1638
features->extendedDynamicState2LogicOp = true;
1639
features->extendedDynamicState2PatchControlPoints = false;
1640
break;
1641
}
1642
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: {
1643
VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *features =
1644
(VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *)ext;
1645
features->globalPriorityQuery = true;
1646
break;
1647
}
1648
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {
1649
VkPhysicalDeviceAccelerationStructureFeaturesKHR *features =
1650
(VkPhysicalDeviceAccelerationStructureFeaturesKHR *)ext;
1651
features->accelerationStructure = true;
1652
features->accelerationStructureCaptureReplay = false;
1653
features->accelerationStructureIndirectBuild = false;
1654
features->accelerationStructureHostCommands = true;
1655
features->descriptorBindingAccelerationStructureUpdateAfterBind = true;
1656
break;
1657
}
1658
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {
1659
VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =
1660
(VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;
1661
features->shaderSubgroupUniformControlFlow = true;
1662
break;
1663
}
1664
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {
1665
VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;
1666
features->multiDraw = true;
1667
break;
1668
}
1669
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {
1670
VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =
1671
(VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;
1672
features->colorWriteEnable = true;
1673
break;
1674
}
1675
default:
1676
break;
1677
}
1678
}
1679
#undef CORE_FEATURE
1680
}
1681
1682
static size_t
1683
radv_max_descriptor_set_size()
1684
{
1685
/* make sure that the entire descriptor set is addressable with a signed
1686
* 32-bit int. So the sum of all limits scaled by descriptor size has to
1687
* be at most 2 GiB. the combined image & samples object count as one of
1688
* both. This limit is for the pipeline layout, not for the set layout, but
1689
* there is no set limit, so we just set a pipeline limit. I don't think
1690
* any app is going to hit this soon. */
1691
return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1692
MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1693
(32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1694
32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1695
32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
1696
64 /* storage image */);
1697
}
1698
1699
static uint32_t
1700
radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)
1701
{
1702
uint32_t uniform_offset_alignment =
1703
driQueryOptioni(&pdevice->instance->dri_options, "radv_override_uniform_offset_alignment");
1704
if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {
1705
fprintf(stderr,
1706
"ERROR: invalid radv_override_uniform_offset_alignment setting %d:"
1707
"not a power of two\n",
1708
uniform_offset_alignment);
1709
uniform_offset_alignment = 0;
1710
}
1711
1712
/* Take at least the hardware limit. */
1713
return MAX2(uniform_offset_alignment, 4);
1714
}
1715
1716
void
1717
radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,
1718
VkPhysicalDeviceProperties *pProperties)
1719
{
1720
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
1721
VkSampleCountFlags sample_counts = 0xf;
1722
1723
size_t max_descriptor_set_size = radv_max_descriptor_set_size();
1724
1725
VkPhysicalDeviceLimits limits = {
1726
.maxImageDimension1D = (1 << 14),
1727
.maxImageDimension2D = (1 << 14),
1728
.maxImageDimension3D = (1 << 11),
1729
.maxImageDimensionCube = (1 << 14),
1730
.maxImageArrayLayers = (1 << 11),
1731
.maxTexelBufferElements = UINT32_MAX,
1732
.maxUniformBufferRange = UINT32_MAX,
1733
.maxStorageBufferRange = UINT32_MAX,
1734
.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,
1735
.maxMemoryAllocationCount = UINT32_MAX,
1736
.maxSamplerAllocationCount = 64 * 1024,
1737
.bufferImageGranularity = 64, /* A cache line */
1738
.sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */
1739
.maxBoundDescriptorSets = MAX_SETS,
1740
.maxPerStageDescriptorSamplers = max_descriptor_set_size,
1741
.maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,
1742
.maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,
1743
.maxPerStageDescriptorSampledImages = max_descriptor_set_size,
1744
.maxPerStageDescriptorStorageImages = max_descriptor_set_size,
1745
.maxPerStageDescriptorInputAttachments = max_descriptor_set_size,
1746
.maxPerStageResources = max_descriptor_set_size,
1747
.maxDescriptorSetSamplers = max_descriptor_set_size,
1748
.maxDescriptorSetUniformBuffers = max_descriptor_set_size,
1749
.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,
1750
.maxDescriptorSetStorageBuffers = max_descriptor_set_size,
1751
.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,
1752
.maxDescriptorSetSampledImages = max_descriptor_set_size,
1753
.maxDescriptorSetStorageImages = max_descriptor_set_size,
1754
.maxDescriptorSetInputAttachments = max_descriptor_set_size,
1755
.maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,
1756
.maxVertexInputBindings = MAX_VBS,
1757
.maxVertexInputAttributeOffset = UINT32_MAX,
1758
.maxVertexInputBindingStride = 2048,
1759
.maxVertexOutputComponents = 128,
1760
.maxTessellationGenerationLevel = 64,
1761
.maxTessellationPatchSize = 32,
1762
.maxTessellationControlPerVertexInputComponents = 128,
1763
.maxTessellationControlPerVertexOutputComponents = 128,
1764
.maxTessellationControlPerPatchOutputComponents = 120,
1765
.maxTessellationControlTotalOutputComponents = 4096,
1766
.maxTessellationEvaluationInputComponents = 128,
1767
.maxTessellationEvaluationOutputComponents = 128,
1768
.maxGeometryShaderInvocations = 127,
1769
.maxGeometryInputComponents = 64,
1770
.maxGeometryOutputComponents = 128,
1771
.maxGeometryOutputVertices = 256,
1772
.maxGeometryTotalOutputComponents = 1024,
1773
.maxFragmentInputComponents = 128,
1774
.maxFragmentOutputAttachments = 8,
1775
.maxFragmentDualSrcAttachments = 1,
1776
.maxFragmentCombinedOutputResources = 8,
1777
.maxComputeSharedMemorySize = pdevice->rad_info.chip_class >= GFX7 ? 65536 : 32768,
1778
.maxComputeWorkGroupCount = {65535, 65535, 65535},
1779
.maxComputeWorkGroupInvocations = 1024,
1780
.maxComputeWorkGroupSize = {1024, 1024, 1024},
1781
.subPixelPrecisionBits = 8,
1782
.subTexelPrecisionBits = 8,
1783
.mipmapPrecisionBits = 8,
1784
.maxDrawIndexedIndexValue = UINT32_MAX,
1785
.maxDrawIndirectCount = UINT32_MAX,
1786
.maxSamplerLodBias = 16,
1787
.maxSamplerAnisotropy = 16,
1788
.maxViewports = MAX_VIEWPORTS,
1789
.maxViewportDimensions = {(1 << 14), (1 << 14)},
1790
.viewportBoundsRange = {INT16_MIN, INT16_MAX},
1791
.viewportSubPixelBits = 8,
1792
.minMemoryMapAlignment = 4096, /* A page */
1793
.minTexelBufferOffsetAlignment = 4,
1794
.minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),
1795
.minStorageBufferOffsetAlignment = 4,
1796
.minTexelOffset = -32,
1797
.maxTexelOffset = 31,
1798
.minTexelGatherOffset = -32,
1799
.maxTexelGatherOffset = 31,
1800
.minInterpolationOffset = -2,
1801
.maxInterpolationOffset = 2,
1802
.subPixelInterpolationOffsetBits = 8,
1803
.maxFramebufferWidth = (1 << 14),
1804
.maxFramebufferHeight = (1 << 14),
1805
.maxFramebufferLayers = (1 << 10),
1806
.framebufferColorSampleCounts = sample_counts,
1807
.framebufferDepthSampleCounts = sample_counts,
1808
.framebufferStencilSampleCounts = sample_counts,
1809
.framebufferNoAttachmentsSampleCounts = sample_counts,
1810
.maxColorAttachments = MAX_RTS,
1811
.sampledImageColorSampleCounts = sample_counts,
1812
.sampledImageIntegerSampleCounts = sample_counts,
1813
.sampledImageDepthSampleCounts = sample_counts,
1814
.sampledImageStencilSampleCounts = sample_counts,
1815
.storageImageSampleCounts = sample_counts,
1816
.maxSampleMaskWords = 1,
1817
.timestampComputeAndGraphics = true,
1818
.timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,
1819
.maxClipDistances = 8,
1820
.maxCullDistances = 8,
1821
.maxCombinedClipAndCullDistances = 8,
1822
.discreteQueuePriorities = 2,
1823
.pointSizeRange = {0.0, 8191.875},
1824
.lineWidthRange = {0.0, 8191.875},
1825
.pointSizeGranularity = (1.0 / 8.0),
1826
.lineWidthGranularity = (1.0 / 8.0),
1827
.strictLines = false, /* FINISHME */
1828
.standardSampleLocations = true,
1829
.optimalBufferCopyOffsetAlignment = 128,
1830
.optimalBufferCopyRowPitchAlignment = 128,
1831
.nonCoherentAtomSize = 64,
1832
};
1833
1834
VkPhysicalDeviceType device_type;
1835
1836
if (pdevice->rad_info.has_dedicated_vram || pdevice->instance->report_apu_as_dgpu) {
1837
device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;
1838
} else {
1839
device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;
1840
}
1841
1842
*pProperties = (VkPhysicalDeviceProperties){
1843
.apiVersion = RADV_API_VERSION,
1844
.driverVersion = vk_get_driver_version(),
1845
.vendorID = ATI_VENDOR_ID,
1846
.deviceID = pdevice->rad_info.pci_id,
1847
.deviceType = device_type,
1848
.limits = limits,
1849
.sparseProperties =
1850
{
1851
.residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,
1852
.residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,
1853
},
1854
};
1855
1856
strcpy(pProperties->deviceName, pdevice->name);
1857
memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);
1858
}
1859
1860
static void
1861
radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,
1862
VkPhysicalDeviceVulkan11Properties *p)
1863
{
1864
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);
1865
1866
memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);
1867
memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);
1868
memset(p->deviceLUID, 0, VK_LUID_SIZE);
1869
/* The LUID is for Windows. */
1870
p->deviceLUIDValid = false;
1871
p->deviceNodeMask = 0;
1872
1873
p->subgroupSize = RADV_SUBGROUP_SIZE;
1874
p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;
1875
p->subgroupSupportedOperations =
1876
VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |
1877
VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |
1878
VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |
1879
VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;
1880
p->subgroupQuadOperationsInAllStages = true;
1881
1882
p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;
1883
p->maxMultiviewViewCount = MAX_VIEWS;
1884
p->maxMultiviewInstanceIndex = INT_MAX;
1885
p->protectedNoFault = false;
1886
p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;
1887
p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;
1888
}
1889
1890
static void
1891
radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,
1892
VkPhysicalDeviceVulkan12Properties *p)
1893
{
1894
assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);
1895
1896
p->driverID = VK_DRIVER_ID_MESA_RADV;
1897
snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");
1898
snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",
1899
radv_get_compiler_string(pdevice));
1900
p->conformanceVersion = (VkConformanceVersion){
1901
.major = 1,
1902
.minor = 2,
1903
.subminor = 3,
1904
.patch = 0,
1905
};
1906
1907
/* On AMD hardware, denormals and rounding modes for fp16/fp64 are
1908
* controlled by the same config register.
1909
*/
1910
if (pdevice->rad_info.has_packed_math_16bit) {
1911
p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1912
p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;
1913
} else {
1914
p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1915
p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;
1916
}
1917
1918
/* With LLVM, do not allow both preserving and flushing denorms because
1919
* different shaders in the same pipeline can have different settings and
1920
* this won't work for merged shaders. To make it work, this requires LLVM
1921
* support for changing the register. The same logic applies for the
1922
* rounding modes because they are configured with the same config
1923
* register.
1924
*/
1925
p->shaderDenormFlushToZeroFloat32 = true;
1926
p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;
1927
p->shaderRoundingModeRTEFloat32 = true;
1928
p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;
1929
p->shaderSignedZeroInfNanPreserveFloat32 = true;
1930
1931
p->shaderDenormFlushToZeroFloat16 =
1932
pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1933
p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1934
p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;
1935
p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;
1936
p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;
1937
1938
p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1939
p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1940
p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;
1941
p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;
1942
p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;
1943
1944
p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;
1945
p->shaderUniformBufferArrayNonUniformIndexingNative = false;
1946
p->shaderSampledImageArrayNonUniformIndexingNative = false;
1947
p->shaderStorageBufferArrayNonUniformIndexingNative = false;
1948
p->shaderStorageImageArrayNonUniformIndexingNative = false;
1949
p->shaderInputAttachmentArrayNonUniformIndexingNative = false;
1950
p->robustBufferAccessUpdateAfterBind = true;
1951
p->quadDivergentImplicitLod = false;
1952
1953
size_t max_descriptor_set_size =
1954
((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -
1955
MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /
1956
(32 /* uniform buffer, 32 due to potential space wasted on alignment */ +
1957
32 /* storage buffer, 32 due to potential space wasted on alignment */ +
1958
32 /* sampler, largest when combined with image */ + 64 /* sampled image */ +
1959
64 /* storage image */);
1960
p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;
1961
p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1962
p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1963
p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;
1964
p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;
1965
p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;
1966
p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;
1967
p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;
1968
p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;
1969
p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;
1970
p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;
1971
p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;
1972
p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;
1973
p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;
1974
p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;
1975
1976
/* We support all of the depth resolve modes */
1977
p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1978
VK_RESOLVE_MODE_AVERAGE_BIT_KHR | VK_RESOLVE_MODE_MIN_BIT_KHR |
1979
VK_RESOLVE_MODE_MAX_BIT_KHR;
1980
1981
/* Average doesn't make sense for stencil so we don't support that */
1982
p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |
1983
VK_RESOLVE_MODE_MIN_BIT_KHR | VK_RESOLVE_MODE_MAX_BIT_KHR;
1984
1985
p->independentResolveNone = true;
1986
p->independentResolve = true;
1987
1988
/* GFX6-8 only support single channel min/max filter. */
1989
p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;
1990
p->filterMinmaxSingleComponentFormats = true;
1991
1992
p->maxTimelineSemaphoreValueDifference = UINT64_MAX;
1993
1994
p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;
1995
}
1996
1997
void
1998
radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,
1999
VkPhysicalDeviceProperties2 *pProperties)
2000
{
2001
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2002
radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);
2003
2004
VkPhysicalDeviceVulkan11Properties core_1_1 = {
2005
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,
2006
};
2007
radv_get_physical_device_properties_1_1(pdevice, &core_1_1);
2008
2009
VkPhysicalDeviceVulkan12Properties core_1_2 = {
2010
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,
2011
};
2012
radv_get_physical_device_properties_1_2(pdevice, &core_1_2);
2013
2014
#define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \
2015
memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \
2016
sizeof(core_##major##_##minor.core_property))
2017
2018
#define CORE_PROPERTY(major, minor, property) \
2019
CORE_RENAMED_PROPERTY(major, minor, property, property)
2020
2021
vk_foreach_struct(ext, pProperties->pNext)
2022
{
2023
switch (ext->sType) {
2024
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {
2025
VkPhysicalDevicePushDescriptorPropertiesKHR *properties =
2026
(VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;
2027
properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;
2028
break;
2029
}
2030
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {
2031
VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties *)ext;
2032
CORE_PROPERTY(1, 1, deviceUUID);
2033
CORE_PROPERTY(1, 1, driverUUID);
2034
CORE_PROPERTY(1, 1, deviceLUID);
2035
CORE_PROPERTY(1, 1, deviceLUIDValid);
2036
break;
2037
}
2038
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {
2039
VkPhysicalDeviceMultiviewProperties *properties =
2040
(VkPhysicalDeviceMultiviewProperties *)ext;
2041
CORE_PROPERTY(1, 1, maxMultiviewViewCount);
2042
CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);
2043
break;
2044
}
2045
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {
2046
VkPhysicalDevicePointClippingProperties *properties =
2047
(VkPhysicalDevicePointClippingProperties *)ext;
2048
CORE_PROPERTY(1, 1, pointClippingBehavior);
2049
break;
2050
}
2051
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {
2052
VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =
2053
(VkPhysicalDeviceDiscardRectanglePropertiesEXT *)ext;
2054
properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;
2055
break;
2056
}
2057
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {
2058
VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =
2059
(VkPhysicalDeviceExternalMemoryHostPropertiesEXT *)ext;
2060
properties->minImportedHostPointerAlignment = 4096;
2061
break;
2062
}
2063
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {
2064
VkPhysicalDeviceSubgroupProperties *properties = (VkPhysicalDeviceSubgroupProperties *)ext;
2065
CORE_PROPERTY(1, 1, subgroupSize);
2066
CORE_RENAMED_PROPERTY(1, 1, supportedStages, subgroupSupportedStages);
2067
CORE_RENAMED_PROPERTY(1, 1, supportedOperations, subgroupSupportedOperations);
2068
CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages, subgroupQuadOperationsInAllStages);
2069
break;
2070
}
2071
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {
2072
VkPhysicalDeviceMaintenance3Properties *properties =
2073
(VkPhysicalDeviceMaintenance3Properties *)ext;
2074
CORE_PROPERTY(1, 1, maxPerSetDescriptors);
2075
CORE_PROPERTY(1, 1, maxMemoryAllocationSize);
2076
break;
2077
}
2078
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {
2079
VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =
2080
(VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;
2081
CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);
2082
CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);
2083
break;
2084
}
2085
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {
2086
VkPhysicalDeviceShaderCorePropertiesAMD *properties =
2087
(VkPhysicalDeviceShaderCorePropertiesAMD *)ext;
2088
2089
/* Shader engines. */
2090
properties->shaderEngineCount = pdevice->rad_info.max_se;
2091
properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se;
2092
properties->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa;
2093
properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit;
2094
properties->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd;
2095
properties->wavefrontSize = 64;
2096
2097
/* SGPR. */
2098
properties->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd;
2099
properties->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc;
2100
properties->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc;
2101
properties->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity;
2102
2103
/* VGPR. */
2104
properties->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd;
2105
properties->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc;
2106
properties->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc;
2107
properties->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity;
2108
break;
2109
}
2110
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {
2111
VkPhysicalDeviceShaderCoreProperties2AMD *properties =
2112
(VkPhysicalDeviceShaderCoreProperties2AMD *)ext;
2113
2114
properties->shaderCoreFeatures = 0;
2115
properties->activeComputeUnitCount = pdevice->rad_info.num_good_compute_units;
2116
break;
2117
}
2118
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {
2119
VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =
2120
(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;
2121
properties->maxVertexAttribDivisor = UINT32_MAX;
2122
break;
2123
}
2124
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {
2125
VkPhysicalDeviceDescriptorIndexingProperties *properties =
2126
(VkPhysicalDeviceDescriptorIndexingProperties *)ext;
2127
CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);
2128
CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);
2129
CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);
2130
CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);
2131
CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);
2132
CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);
2133
CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);
2134
CORE_PROPERTY(1, 2, quadDivergentImplicitLod);
2135
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);
2136
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);
2137
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);
2138
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);
2139
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);
2140
CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);
2141
CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);
2142
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);
2143
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);
2144
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);
2145
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);
2146
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);
2147
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);
2148
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);
2149
CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);
2150
break;
2151
}
2152
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {
2153
VkPhysicalDeviceProtectedMemoryProperties *properties =
2154
(VkPhysicalDeviceProtectedMemoryProperties *)ext;
2155
CORE_PROPERTY(1, 1, protectedNoFault);
2156
break;
2157
}
2158
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {
2159
VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =
2160
(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;
2161
properties->primitiveOverestimationSize = 0;
2162
properties->maxExtraPrimitiveOverestimationSize = 0;
2163
properties->extraPrimitiveOverestimationSizeGranularity = 0;
2164
properties->primitiveUnderestimation = false;
2165
properties->conservativePointAndLineRasterization = false;
2166
properties->degenerateTrianglesRasterized = true;
2167
properties->degenerateLinesRasterized = false;
2168
properties->fullyCoveredFragmentShaderInputVariable = false;
2169
properties->conservativeRasterizationPostDepthCoverage = false;
2170
break;
2171
}
2172
#ifndef _WIN32
2173
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {
2174
VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =
2175
(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;
2176
properties->pciDomain = pdevice->bus_info.domain;
2177
properties->pciBus = pdevice->bus_info.bus;
2178
properties->pciDevice = pdevice->bus_info.dev;
2179
properties->pciFunction = pdevice->bus_info.func;
2180
break;
2181
}
2182
#endif
2183
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {
2184
VkPhysicalDeviceDriverProperties *properties = (VkPhysicalDeviceDriverProperties *)ext;
2185
CORE_PROPERTY(1, 2, driverID);
2186
CORE_PROPERTY(1, 2, driverName);
2187
CORE_PROPERTY(1, 2, driverInfo);
2188
CORE_PROPERTY(1, 2, conformanceVersion);
2189
break;
2190
}
2191
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {
2192
VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =
2193
(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;
2194
properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;
2195
properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;
2196
properties->maxTransformFeedbackBufferSize = UINT32_MAX;
2197
properties->maxTransformFeedbackStreamDataSize = 512;
2198
properties->maxTransformFeedbackBufferDataSize = 512;
2199
properties->maxTransformFeedbackBufferDataStride = 512;
2200
properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;
2201
properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;
2202
properties->transformFeedbackRasterizationStreamSelect = false;
2203
properties->transformFeedbackDraw = true;
2204
break;
2205
}
2206
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {
2207
VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =
2208
(VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;
2209
2210
props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;
2211
props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2212
props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =
2213
MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;
2214
props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2215
props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;
2216
break;
2217
}
2218
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {
2219
VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =
2220
(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;
2221
2222
VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
2223
if (pdevice->rad_info.chip_class < GFX10) {
2224
/* FIXME: Some MSAA8x tests fail for weird
2225
* reasons on GFX10+ when the same pattern is
2226
* used inside the same render pass.
2227
*/
2228
supported_samples |= VK_SAMPLE_COUNT_8_BIT;
2229
}
2230
2231
properties->sampleLocationSampleCounts = supported_samples;
2232
properties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
2233
properties->sampleLocationCoordinateRange[0] = 0.0f;
2234
properties->sampleLocationCoordinateRange[1] = 0.9375f;
2235
properties->sampleLocationSubPixelBits = 4;
2236
properties->variableSampleLocations = false;
2237
break;
2238
}
2239
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {
2240
VkPhysicalDeviceDepthStencilResolveProperties *properties =
2241
(VkPhysicalDeviceDepthStencilResolveProperties *)ext;
2242
CORE_PROPERTY(1, 2, supportedDepthResolveModes);
2243
CORE_PROPERTY(1, 2, supportedStencilResolveModes);
2244
CORE_PROPERTY(1, 2, independentResolveNone);
2245
CORE_PROPERTY(1, 2, independentResolve);
2246
break;
2247
}
2248
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {
2249
VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =
2250
(VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;
2251
properties->storageTexelBufferOffsetAlignmentBytes = 4;
2252
properties->storageTexelBufferOffsetSingleTexelAlignment = true;
2253
properties->uniformTexelBufferOffsetAlignmentBytes = 4;
2254
properties->uniformTexelBufferOffsetSingleTexelAlignment = true;
2255
break;
2256
}
2257
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES: {
2258
VkPhysicalDeviceFloatControlsProperties *properties =
2259
(VkPhysicalDeviceFloatControlsProperties *)ext;
2260
CORE_PROPERTY(1, 2, denormBehaviorIndependence);
2261
CORE_PROPERTY(1, 2, roundingModeIndependence);
2262
CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);
2263
CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);
2264
CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);
2265
CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);
2266
CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);
2267
CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);
2268
CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);
2269
CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);
2270
CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);
2271
CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);
2272
CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);
2273
CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);
2274
CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);
2275
CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);
2276
CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);
2277
break;
2278
}
2279
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {
2280
VkPhysicalDeviceTimelineSemaphoreProperties *properties =
2281
(VkPhysicalDeviceTimelineSemaphoreProperties *)ext;
2282
CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);
2283
break;
2284
}
2285
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {
2286
VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =
2287
(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;
2288
props->minSubgroupSize = 64;
2289
props->maxSubgroupSize = 64;
2290
props->maxComputeWorkgroupSubgroups = UINT32_MAX;
2291
props->requiredSubgroupSizeStages = 0;
2292
2293
if (pdevice->rad_info.chip_class >= GFX10) {
2294
/* Only GFX10+ supports wave32. */
2295
props->minSubgroupSize = 32;
2296
props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;
2297
}
2298
break;
2299
}
2300
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:
2301
radv_get_physical_device_properties_1_1(pdevice, (void *)ext);
2302
break;
2303
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:
2304
radv_get_physical_device_properties_1_2(pdevice, (void *)ext);
2305
break;
2306
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {
2307
VkPhysicalDeviceLineRasterizationPropertiesEXT *props =
2308
(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;
2309
props->lineSubPixelPrecisionBits = 4;
2310
break;
2311
}
2312
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {
2313
VkPhysicalDeviceRobustness2PropertiesEXT *properties =
2314
(VkPhysicalDeviceRobustness2PropertiesEXT *)ext;
2315
properties->robustStorageBufferAccessSizeAlignment = 4;
2316
properties->robustUniformBufferAccessSizeAlignment = 4;
2317
break;
2318
}
2319
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {
2320
VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =
2321
(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;
2322
props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;
2323
break;
2324
}
2325
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {
2326
VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =
2327
(VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;
2328
props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2329
props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};
2330
props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;
2331
props->primitiveFragmentShadingRateWithMultipleViewports = true;
2332
props->layeredShadingRateAttachments = false; /* TODO */
2333
props->fragmentShadingRateNonTrivialCombinerOps = true;
2334
props->maxFragmentSize = (VkExtent2D){2, 2};
2335
props->maxFragmentSizeAspectRatio = 1;
2336
props->maxFragmentShadingRateCoverageSamples = 2 * 2;
2337
props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;
2338
props->fragmentShadingRateWithShaderDepthStencilWrites = false;
2339
props->fragmentShadingRateWithSampleMask = true;
2340
props->fragmentShadingRateWithShaderSampleMask = false;
2341
props->fragmentShadingRateWithConservativeRasterization = true;
2342
props->fragmentShadingRateWithFragmentShaderInterlock = false;
2343
props->fragmentShadingRateWithCustomSampleLocations = true;
2344
props->fragmentShadingRateStrictMultiplyCombiner = true;
2345
break;
2346
}
2347
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {
2348
VkPhysicalDeviceProvokingVertexPropertiesEXT *props =
2349
(VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;
2350
props->provokingVertexModePerPipeline = true;
2351
props->transformFeedbackPreservesTriangleFanProvokingVertex = true;
2352
break;
2353
}
2354
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {
2355
VkPhysicalDeviceAccelerationStructurePropertiesKHR *props =
2356
(VkPhysicalDeviceAccelerationStructurePropertiesKHR *)ext;
2357
props->maxGeometryCount = (1 << 24) - 1;
2358
props->maxInstanceCount = (1 << 24) - 1;
2359
props->maxPrimitiveCount = (1 << 29) - 1;
2360
props->maxPerStageDescriptorAccelerationStructures =
2361
pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2362
props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures =
2363
pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;
2364
props->maxDescriptorSetAccelerationStructures =
2365
pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2366
props->maxDescriptorSetUpdateAfterBindAccelerationStructures =
2367
pProperties->properties.limits.maxDescriptorSetStorageBuffers;
2368
props->minAccelerationStructureScratchOffsetAlignment = 128;
2369
break;
2370
}
2371
#ifndef _WIN32
2372
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {
2373
VkPhysicalDeviceDrmPropertiesEXT *props = (VkPhysicalDeviceDrmPropertiesEXT *)ext;
2374
if (pdevice->available_nodes & (1 << DRM_NODE_PRIMARY)) {
2375
props->hasPrimary = true;
2376
props->primaryMajor = (int64_t)major(pdevice->primary_devid);
2377
props->primaryMinor = (int64_t)minor(pdevice->primary_devid);
2378
} else {
2379
props->hasPrimary = false;
2380
}
2381
if (pdevice->available_nodes & (1 << DRM_NODE_RENDER)) {
2382
props->hasRender = true;
2383
props->renderMajor = (int64_t)major(pdevice->render_devid);
2384
props->renderMinor = (int64_t)minor(pdevice->render_devid);
2385
} else {
2386
props->hasRender = false;
2387
}
2388
break;
2389
}
2390
#endif
2391
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {
2392
VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;
2393
props->maxMultiDrawCount = 2048;
2394
break;
2395
}
2396
default:
2397
break;
2398
}
2399
}
2400
}
2401
2402
static void
2403
radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice,
2404
uint32_t *pCount,
2405
VkQueueFamilyProperties **pQueueFamilyProperties)
2406
{
2407
int num_queue_families = 1;
2408
int idx;
2409
if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2410
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))
2411
num_queue_families++;
2412
2413
if (pQueueFamilyProperties == NULL) {
2414
*pCount = num_queue_families;
2415
return;
2416
}
2417
2418
if (!*pCount)
2419
return;
2420
2421
idx = 0;
2422
if (*pCount >= 1) {
2423
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2424
.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |
2425
VK_QUEUE_SPARSE_BINDING_BIT,
2426
.queueCount = 1,
2427
.timestampValidBits = 64,
2428
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2429
};
2430
idx++;
2431
}
2432
2433
if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&
2434
!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {
2435
if (*pCount > idx) {
2436
*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){
2437
.queueFlags =
2438
VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,
2439
.queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],
2440
.timestampValidBits = 64,
2441
.minImageTransferGranularity = (VkExtent3D){1, 1, 1},
2442
};
2443
idx++;
2444
}
2445
}
2446
*pCount = idx;
2447
}
2448
2449
void
2450
radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2451
VkQueueFamilyProperties *pQueueFamilyProperties)
2452
{
2453
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2454
if (!pQueueFamilyProperties) {
2455
radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2456
return;
2457
}
2458
VkQueueFamilyProperties *properties[] = {
2459
pQueueFamilyProperties + 0,
2460
pQueueFamilyProperties + 1,
2461
pQueueFamilyProperties + 2,
2462
};
2463
radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2464
assert(*pCount <= 3);
2465
}
2466
2467
static const VkQueueGlobalPriorityEXT radv_global_queue_priorities[] = {
2468
VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT,
2469
VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT,
2470
VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT,
2471
VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT,
2472
};
2473
2474
void
2475
radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,
2476
VkQueueFamilyProperties2 *pQueueFamilyProperties)
2477
{
2478
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
2479
if (!pQueueFamilyProperties) {
2480
radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);
2481
return;
2482
}
2483
VkQueueFamilyProperties *properties[] = {
2484
&pQueueFamilyProperties[0].queueFamilyProperties,
2485
&pQueueFamilyProperties[1].queueFamilyProperties,
2486
&pQueueFamilyProperties[2].queueFamilyProperties,
2487
};
2488
radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);
2489
assert(*pCount <= 3);
2490
2491
for (uint32_t i = 0; i < *pCount; i++) {
2492
vk_foreach_struct(ext, pQueueFamilyProperties[i].pNext)
2493
{
2494
switch (ext->sType) {
2495
case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT: {
2496
VkQueueFamilyGlobalPriorityPropertiesEXT *prop =
2497
(VkQueueFamilyGlobalPriorityPropertiesEXT *)ext;
2498
STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_EXT);
2499
prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);
2500
memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));
2501
break;
2502
}
2503
default:
2504
break;
2505
}
2506
}
2507
}
2508
}
2509
2510
void
2511
radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,
2512
VkPhysicalDeviceMemoryProperties *pMemoryProperties)
2513
{
2514
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2515
2516
*pMemoryProperties = physical_device->memory_properties;
2517
}
2518
2519
static void
2520
radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,
2521
VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)
2522
{
2523
RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);
2524
VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;
2525
2526
/* For all memory heaps, the computation of budget is as follow:
2527
* heap_budget = heap_size - global_heap_usage + app_heap_usage
2528
*
2529
* The Vulkan spec 1.1.97 says that the budget should include any
2530
* currently allocated device memory.
2531
*
2532
* Note that the application heap usages are not really accurate (eg.
2533
* in presence of shared buffers).
2534
*/
2535
if (!device->rad_info.has_dedicated_vram) {
2536
/* On APUs, the driver exposes fake heaps to the application because usually the carveout is
2537
* too small for games but the budgets need to be redistributed accordingly.
2538
*/
2539
2540
assert(device->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));
2541
assert(device->memory_properties.memoryHeaps[0].flags == 0); /* GTT */
2542
assert(device->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);
2543
uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;
2544
2545
/* Get the visible VRAM/GTT heap sizes and internal usages. */
2546
uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size;
2547
uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;
2548
2549
uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +
2550
device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2551
uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2552
2553
/* Compute the total heap size, internal and system usage. */
2554
uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;
2555
uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;
2556
uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +
2557
device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2558
2559
uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);
2560
2561
/* Compute the total free space that can be allocated for this process accross all heaps. */
2562
uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);
2563
2564
/* Compute the remaining visible VRAM size for this process. */
2565
uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);
2566
2567
/* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap sizes,
2568
* and align down to the page size to be conservative.
2569
*/
2570
vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space),
2571
device->rad_info.gart_page_size);
2572
uint64_t gtt_free_space = total_free_space - vram_vis_free_space;
2573
2574
memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;
2575
memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;
2576
memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;
2577
memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;
2578
} else {
2579
unsigned mask = device->heaps;
2580
unsigned heap = 0;
2581
while (mask) {
2582
uint64_t internal_usage = 0, system_usage = 0;
2583
unsigned type = 1u << u_bit_scan(&mask);
2584
2585
switch (type) {
2586
case RADV_HEAP_VRAM:
2587
internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2588
system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);
2589
break;
2590
case RADV_HEAP_VRAM_VIS:
2591
internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);
2592
if (!(device->heaps & RADV_HEAP_VRAM))
2593
internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);
2594
system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);
2595
break;
2596
case RADV_HEAP_GTT:
2597
internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);
2598
system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);
2599
break;
2600
}
2601
2602
uint64_t total_usage = MAX2(internal_usage, system_usage);
2603
2604
uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -
2605
MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage);
2606
memoryBudget->heapBudget[heap] = free_space + internal_usage;
2607
memoryBudget->heapUsage[heap] = internal_usage;
2608
++heap;
2609
}
2610
2611
assert(heap == memory_properties->memoryHeapCount);
2612
}
2613
2614
/* The heapBudget and heapUsage values must be zero for array elements
2615
* greater than or equal to
2616
* VkPhysicalDeviceMemoryProperties::memoryHeapCount.
2617
*/
2618
for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {
2619
memoryBudget->heapBudget[i] = 0;
2620
memoryBudget->heapUsage[i] = 0;
2621
}
2622
}
2623
2624
void
2625
radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,
2626
VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)
2627
{
2628
radv_GetPhysicalDeviceMemoryProperties(physicalDevice, &pMemoryProperties->memoryProperties);
2629
2630
VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =
2631
vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);
2632
if (memory_budget)
2633
radv_get_memory_budget_properties(physicalDevice, memory_budget);
2634
}
2635
2636
VkResult
2637
radv_GetMemoryHostPointerPropertiesEXT(
2638
VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer,
2639
VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)
2640
{
2641
RADV_FROM_HANDLE(radv_device, device, _device);
2642
2643
switch (handleType) {
2644
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {
2645
const struct radv_physical_device *physical_device = device->physical_device;
2646
uint32_t memoryTypeBits = 0;
2647
for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {
2648
if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&
2649
!(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {
2650
memoryTypeBits = (1 << i);
2651
break;
2652
}
2653
}
2654
pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;
2655
return VK_SUCCESS;
2656
}
2657
default:
2658
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
2659
}
2660
}
2661
2662
static enum radeon_ctx_priority
2663
radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)
2664
{
2665
/* Default to MEDIUM when a specific global priority isn't requested */
2666
if (!pObj)
2667
return RADEON_CTX_PRIORITY_MEDIUM;
2668
2669
switch (pObj->globalPriority) {
2670
case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:
2671
return RADEON_CTX_PRIORITY_REALTIME;
2672
case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:
2673
return RADEON_CTX_PRIORITY_HIGH;
2674
case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:
2675
return RADEON_CTX_PRIORITY_MEDIUM;
2676
case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:
2677
return RADEON_CTX_PRIORITY_LOW;
2678
default:
2679
unreachable("Illegal global priority value");
2680
return RADEON_CTX_PRIORITY_INVALID;
2681
}
2682
}
2683
2684
static int
2685
radv_queue_init(struct radv_device *device, struct radv_queue *queue, uint32_t queue_family_index,
2686
int idx, VkDeviceQueueCreateFlags flags,
2687
const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)
2688
{
2689
queue->device = device;
2690
queue->queue_family_index = queue_family_index;
2691
queue->queue_idx = idx;
2692
queue->priority = radv_get_queue_global_priority(global_priority);
2693
queue->flags = flags;
2694
queue->hw_ctx = device->hw_ctx[queue->priority];
2695
2696
vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);
2697
2698
list_inithead(&queue->pending_submissions);
2699
mtx_init(&queue->pending_mutex, mtx_plain);
2700
2701
mtx_init(&queue->thread_mutex, mtx_plain);
2702
if (u_cnd_monotonic_init(&queue->thread_cond)) {
2703
vk_object_base_finish(&queue->base);
2704
return vk_error(device->instance, VK_ERROR_INITIALIZATION_FAILED);
2705
}
2706
queue->cond_created = true;
2707
2708
return VK_SUCCESS;
2709
}
2710
2711
static void
2712
radv_queue_finish(struct radv_queue *queue)
2713
{
2714
if (queue->hw_ctx) {
2715
if (queue->cond_created) {
2716
if (queue->thread_running) {
2717
p_atomic_set(&queue->thread_exit, true);
2718
u_cnd_monotonic_broadcast(&queue->thread_cond);
2719
thrd_join(queue->submission_thread, NULL);
2720
}
2721
2722
u_cnd_monotonic_destroy(&queue->thread_cond);
2723
}
2724
2725
mtx_destroy(&queue->pending_mutex);
2726
mtx_destroy(&queue->thread_mutex);
2727
}
2728
2729
if (queue->initial_full_flush_preamble_cs)
2730
queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
2731
if (queue->initial_preamble_cs)
2732
queue->device->ws->cs_destroy(queue->initial_preamble_cs);
2733
if (queue->continue_preamble_cs)
2734
queue->device->ws->cs_destroy(queue->continue_preamble_cs);
2735
if (queue->descriptor_bo)
2736
queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
2737
if (queue->scratch_bo)
2738
queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
2739
if (queue->esgs_ring_bo)
2740
queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
2741
if (queue->gsvs_ring_bo)
2742
queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
2743
if (queue->tess_rings_bo)
2744
queue->device->ws->buffer_destroy(queue->device->ws, queue->tess_rings_bo);
2745
if (queue->gds_bo)
2746
queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_bo);
2747
if (queue->gds_oa_bo)
2748
queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_oa_bo);
2749
if (queue->compute_scratch_bo)
2750
queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
2751
2752
vk_object_base_finish(&queue->base);
2753
}
2754
2755
static void
2756
radv_device_init_gs_info(struct radv_device *device)
2757
{
2758
device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,
2759
device->physical_device->rad_info.family);
2760
}
2761
2762
static VkResult
2763
check_physical_device_features(VkPhysicalDevice physicalDevice,
2764
const VkPhysicalDeviceFeatures *features)
2765
{
2766
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2767
VkPhysicalDeviceFeatures supported_features;
2768
radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);
2769
VkBool32 *supported_feature = (VkBool32 *)&supported_features;
2770
VkBool32 *enabled_feature = (VkBool32 *)features;
2771
unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);
2772
for (uint32_t i = 0; i < num_features; i++) {
2773
if (enabled_feature[i] && !supported_feature[i])
2774
return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);
2775
}
2776
2777
return VK_SUCCESS;
2778
}
2779
2780
static VkResult
2781
radv_device_init_border_color(struct radv_device *device)
2782
{
2783
VkResult result;
2784
2785
result = device->ws->buffer_create(
2786
device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,
2787
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,
2788
RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);
2789
2790
if (result != VK_SUCCESS)
2791
return vk_error(device->physical_device->instance, result);
2792
2793
result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);
2794
if (result != VK_SUCCESS)
2795
return vk_error(device->physical_device->instance, result);
2796
2797
device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);
2798
if (!device->border_color_data.colors_gpu_ptr)
2799
return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
2800
mtx_init(&device->border_color_data.mutex, mtx_plain);
2801
2802
return VK_SUCCESS;
2803
}
2804
2805
static void
2806
radv_device_finish_border_color(struct radv_device *device)
2807
{
2808
if (device->border_color_data.bo) {
2809
device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);
2810
device->ws->buffer_destroy(device->ws, device->border_color_data.bo);
2811
2812
mtx_destroy(&device->border_color_data.mutex);
2813
}
2814
}
2815
2816
VkResult
2817
radv_device_init_vrs_image(struct radv_device *device)
2818
{
2819
/* FIXME: 4k depth buffers should be large enough for now but we might want to adjust this
2820
* dynamically at some point. Also, it's probably better to use S8_UINT but no HTILE support yet.
2821
*/
2822
uint32_t width = 4096, height = 4096;
2823
VkMemoryRequirements mem_req;
2824
VkDeviceMemory mem;
2825
VkResult result;
2826
VkImage image;
2827
2828
VkImageCreateInfo image_create_info = {
2829
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2830
.imageType = VK_IMAGE_TYPE_2D,
2831
.format = VK_FORMAT_D16_UNORM,
2832
.extent = {width, height, 1},
2833
.mipLevels = 1,
2834
.arrayLayers = 1,
2835
.samples = VK_SAMPLE_COUNT_1_BIT,
2836
.tiling = VK_IMAGE_TILING_OPTIMAL,
2837
.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,
2838
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2839
.queueFamilyIndexCount = 0,
2840
.pQueueFamilyIndices = NULL,
2841
.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,
2842
};
2843
2844
result = radv_CreateImage(radv_device_to_handle(device), &image_create_info,
2845
&device->meta_state.alloc, &image);
2846
if (result != VK_SUCCESS)
2847
return result;
2848
2849
radv_GetImageMemoryRequirements(radv_device_to_handle(device), image, &mem_req);
2850
2851
VkMemoryAllocateInfo alloc_info = {
2852
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2853
.allocationSize = mem_req.size,
2854
};
2855
2856
result = radv_AllocateMemory(radv_device_to_handle(device), &alloc_info,
2857
&device->meta_state.alloc, &mem);
2858
if (result != VK_SUCCESS)
2859
goto fail_alloc;
2860
2861
result = radv_BindImageMemory(radv_device_to_handle(device), image, mem, 0);
2862
if (result != VK_SUCCESS)
2863
goto fail_bind;
2864
2865
device->vrs.image = radv_image_from_handle(image);
2866
device->vrs.mem = radv_device_memory_from_handle(mem);
2867
2868
return VK_SUCCESS;
2869
2870
fail_bind:
2871
radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc);
2872
fail_alloc:
2873
radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc);
2874
2875
return result;
2876
}
2877
2878
static void
2879
radv_device_finish_vrs_image(struct radv_device *device)
2880
{
2881
radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),
2882
&device->meta_state.alloc);
2883
radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image),
2884
&device->meta_state.alloc);
2885
}
2886
2887
VkResult
2888
_radv_device_set_lost(struct radv_device *device, const char *file, int line, const char *msg, ...)
2889
{
2890
VkResult err;
2891
va_list ap;
2892
2893
p_atomic_inc(&device->lost);
2894
2895
va_start(ap, msg);
2896
err =
2897
__vk_errorv(device->physical_device->instance, device, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,
2898
VK_ERROR_DEVICE_LOST, file, line, msg, ap);
2899
va_end(ap);
2900
2901
return err;
2902
}
2903
2904
VkResult
2905
radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,
2906
const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)
2907
{
2908
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
2909
VkResult result;
2910
struct radv_device *device;
2911
2912
bool keep_shader_info = false;
2913
bool robust_buffer_access = false;
2914
bool robust_buffer_access2 = false;
2915
bool overallocation_disallowed = false;
2916
bool custom_border_colors = false;
2917
bool vrs_enabled = false;
2918
bool attachment_vrs_enabled = false;
2919
2920
/* Check enabled features */
2921
if (pCreateInfo->pEnabledFeatures) {
2922
result = check_physical_device_features(physicalDevice, pCreateInfo->pEnabledFeatures);
2923
if (result != VK_SUCCESS)
2924
return result;
2925
2926
if (pCreateInfo->pEnabledFeatures->robustBufferAccess)
2927
robust_buffer_access = true;
2928
}
2929
2930
vk_foreach_struct_const(ext, pCreateInfo->pNext)
2931
{
2932
switch (ext->sType) {
2933
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {
2934
const VkPhysicalDeviceFeatures2 *features = (const void *)ext;
2935
result = check_physical_device_features(physicalDevice, &features->features);
2936
if (result != VK_SUCCESS)
2937
return result;
2938
2939
if (features->features.robustBufferAccess)
2940
robust_buffer_access = true;
2941
break;
2942
}
2943
case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {
2944
const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;
2945
if (overallocation->overallocationBehavior ==
2946
VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)
2947
overallocation_disallowed = true;
2948
break;
2949
}
2950
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {
2951
const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features =
2952
(const void *)ext;
2953
custom_border_colors = border_color_features->customBorderColors;
2954
break;
2955
}
2956
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {
2957
const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;
2958
attachment_vrs_enabled = vrs->attachmentFragmentShadingRate;
2959
vrs_enabled = vrs->pipelineFragmentShadingRate || vrs->primitiveFragmentShadingRate ||
2960
attachment_vrs_enabled;
2961
break;
2962
}
2963
case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {
2964
const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;
2965
if (features->robustBufferAccess2)
2966
robust_buffer_access2 = true;
2967
break;
2968
}
2969
default:
2970
break;
2971
}
2972
}
2973
2974
device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,
2975
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2976
if (!device)
2977
return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2978
2979
struct vk_device_dispatch_table dispatch_table;
2980
2981
if (radv_thread_trace_enabled()) {
2982
vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true);
2983
vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);
2984
} else {
2985
vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, true);
2986
}
2987
2988
result =
2989
vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator);
2990
if (result != VK_SUCCESS) {
2991
vk_free(&device->vk.alloc, device);
2992
return result;
2993
}
2994
2995
device->instance = physical_device->instance;
2996
device->physical_device = physical_device;
2997
2998
device->ws = physical_device->ws;
2999
3000
keep_shader_info = device->vk.enabled_extensions.AMD_shader_info;
3001
3002
/* With update after bind we can't attach bo's to the command buffer
3003
* from the descriptor set anymore, so we have to use a global BO list.
3004
*/
3005
device->use_global_bo_list = (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||
3006
device->vk.enabled_extensions.EXT_descriptor_indexing ||
3007
device->vk.enabled_extensions.EXT_buffer_device_address ||
3008
device->vk.enabled_extensions.KHR_buffer_device_address ||
3009
device->vk.enabled_extensions.KHR_ray_tracing_pipeline ||
3010
device->vk.enabled_extensions.KHR_acceleration_structure;
3011
3012
device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;
3013
device->robust_buffer_access2 = robust_buffer_access2;
3014
3015
device->adjust_frag_coord_z =
3016
(vrs_enabled || device->vk.enabled_extensions.KHR_fragment_shading_rate ||
3017
device->force_vrs != RADV_FORCE_VRS_NONE) &&
3018
(device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||
3019
device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||
3020
device->physical_device->rad_info.family == CHIP_VANGOGH);
3021
device->attachment_vrs_enabled = attachment_vrs_enabled;
3022
3023
mtx_init(&device->shader_slab_mutex, mtx_plain);
3024
list_inithead(&device->shader_slabs);
3025
3026
device->overallocation_disallowed = overallocation_disallowed;
3027
mtx_init(&device->overallocation_mutex, mtx_plain);
3028
3029
/* Create one context per queue priority. */
3030
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3031
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3032
const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3033
vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3034
enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);
3035
3036
if (device->hw_ctx[priority])
3037
continue;
3038
3039
result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);
3040
if (result != VK_SUCCESS)
3041
goto fail;
3042
}
3043
3044
for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {
3045
const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];
3046
uint32_t qfi = queue_create->queueFamilyIndex;
3047
const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =
3048
vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);
3049
3050
device->queues[qfi] =
3051
vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,
3052
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3053
if (!device->queues[qfi]) {
3054
result = VK_ERROR_OUT_OF_HOST_MEMORY;
3055
goto fail;
3056
}
3057
3058
memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));
3059
3060
device->queue_count[qfi] = queue_create->queueCount;
3061
3062
for (unsigned q = 0; q < queue_create->queueCount; q++) {
3063
result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, queue_create->flags,
3064
global_priority);
3065
if (result != VK_SUCCESS)
3066
goto fail;
3067
}
3068
}
3069
3070
device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&
3071
!(device->instance->debug_flags & RADV_DEBUG_NOBINNING);
3072
3073
/* The maximum number of scratch waves. Scratch space isn't divided
3074
* evenly between CUs. The number is only a function of the number of CUs.
3075
* We can decrease the constant to decrease the scratch buffer size.
3076
*
3077
* sctx->scratch_waves must be >= the maximum possible size of
3078
* 1 threadgroup, so that the hw doesn't hang from being unable
3079
* to start any.
3080
*
3081
* The recommended value is 4 per CU at most. Higher numbers don't
3082
* bring much benefit, but they still occupy chip resources (think
3083
* async compute). I've seen ~2% performance difference between 4 and 32.
3084
*/
3085
uint32_t max_threads_per_block = 2048;
3086
device->scratch_waves =
3087
MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64);
3088
3089
device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);
3090
3091
if (device->physical_device->rad_info.chip_class >= GFX7) {
3092
/* If the KMD allows it (there is a KMD hw register for it),
3093
* allow launching waves out-of-order.
3094
*/
3095
device->dispatch_initiator |= S_00B800_ORDER_MODE(1);
3096
}
3097
3098
radv_device_init_gs_info(device);
3099
3100
device->tess_offchip_block_dw_size =
3101
device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;
3102
3103
if (getenv("RADV_TRACE_FILE")) {
3104
fprintf(
3105
stderr,
3106
"***********************************************************************************\n");
3107
fprintf(
3108
stderr,
3109
"* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");
3110
fprintf(
3111
stderr,
3112
"***********************************************************************************\n");
3113
abort();
3114
}
3115
3116
if (device->instance->debug_flags & RADV_DEBUG_HANG) {
3117
/* Enable GPU hangs detection and dump logs if a GPU hang is
3118
* detected.
3119
*/
3120
keep_shader_info = true;
3121
3122
if (!radv_init_trace(device))
3123
goto fail;
3124
3125
fprintf(stderr,
3126
"*****************************************************************************\n");
3127
fprintf(stderr,
3128
"* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");
3129
fprintf(stderr,
3130
"*****************************************************************************\n");
3131
3132
/* Wait for idle after every draw/dispatch to identify the
3133
* first bad call.
3134
*/
3135
device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;
3136
3137
radv_dump_enabled_options(device, stderr);
3138
}
3139
3140
if (radv_thread_trace_enabled()) {
3141
fprintf(stderr, "*************************************************\n");
3142
fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");
3143
fprintf(stderr, "*************************************************\n");
3144
3145
if (device->physical_device->rad_info.chip_class < GFX8 ||
3146
device->physical_device->rad_info.chip_class > GFX10_3) {
3147
fprintf(stderr, "GPU hardware not supported: refer to "
3148
"the RGP documentation for the list of "
3149
"supported GPUs!\n");
3150
abort();
3151
}
3152
3153
if (!radv_thread_trace_init(device))
3154
goto fail;
3155
}
3156
3157
if (getenv("RADV_TRAP_HANDLER")) {
3158
/* TODO: Add support for more hardware. */
3159
assert(device->physical_device->rad_info.chip_class == GFX8);
3160
3161
fprintf(stderr, "**********************************************************************\n");
3162
fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");
3163
fprintf(stderr, "**********************************************************************\n");
3164
3165
/* To get the disassembly of the faulty shaders, we have to
3166
* keep some shader info around.
3167
*/
3168
keep_shader_info = true;
3169
3170
if (!radv_trap_handler_init(device))
3171
goto fail;
3172
}
3173
3174
if (getenv("RADV_FORCE_VRS")) {
3175
const char *vrs_rates = getenv("RADV_FORCE_VRS");
3176
3177
if (device->physical_device->rad_info.chip_class < GFX10_3)
3178
fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");
3179
else if (!strcmp(vrs_rates, "2x2"))
3180
device->force_vrs = RADV_FORCE_VRS_2x2;
3181
else if (!strcmp(vrs_rates, "2x1"))
3182
device->force_vrs = RADV_FORCE_VRS_2x1;
3183
else if (!strcmp(vrs_rates, "1x2"))
3184
device->force_vrs = RADV_FORCE_VRS_1x2;
3185
else
3186
fprintf(stderr, "radv: Invalid VRS rates specified "
3187
"(valid values are 2x2, 2x1 and 1x2)\n");
3188
}
3189
3190
device->keep_shader_info = keep_shader_info;
3191
result = radv_device_init_meta(device);
3192
if (result != VK_SUCCESS)
3193
goto fail;
3194
3195
radv_device_init_msaa(device);
3196
3197
/* If the border color extension is enabled, let's create the buffer we need. */
3198
if (custom_border_colors) {
3199
result = radv_device_init_border_color(device);
3200
if (result != VK_SUCCESS)
3201
goto fail;
3202
}
3203
3204
for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {
3205
device->empty_cs[family] = device->ws->cs_create(device->ws, family);
3206
if (!device->empty_cs[family])
3207
goto fail;
3208
3209
switch (family) {
3210
case RADV_QUEUE_GENERAL:
3211
radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));
3212
radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));
3213
radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));
3214
break;
3215
case RADV_QUEUE_COMPUTE:
3216
radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));
3217
radeon_emit(device->empty_cs[family], 0);
3218
break;
3219
}
3220
3221
result = device->ws->cs_finalize(device->empty_cs[family]);
3222
if (result != VK_SUCCESS)
3223
goto fail;
3224
}
3225
3226
if (device->physical_device->rad_info.chip_class >= GFX7)
3227
cik_create_gfx_config(device);
3228
3229
VkPipelineCacheCreateInfo ci;
3230
ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;
3231
ci.pNext = NULL;
3232
ci.flags = 0;
3233
ci.pInitialData = NULL;
3234
ci.initialDataSize = 0;
3235
VkPipelineCache pc;
3236
result = radv_CreatePipelineCache(radv_device_to_handle(device), &ci, NULL, &pc);
3237
if (result != VK_SUCCESS)
3238
goto fail_meta;
3239
3240
device->mem_cache = radv_pipeline_cache_from_handle(pc);
3241
3242
if (u_cnd_monotonic_init(&device->timeline_cond)) {
3243
result = VK_ERROR_INITIALIZATION_FAILED;
3244
goto fail_mem_cache;
3245
}
3246
3247
device->force_aniso = MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));
3248
if (device->force_aniso >= 0) {
3249
fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",
3250
1 << util_logbase2(device->force_aniso));
3251
}
3252
3253
*pDevice = radv_device_to_handle(device);
3254
return VK_SUCCESS;
3255
3256
fail_mem_cache:
3257
radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3258
fail_meta:
3259
radv_device_finish_meta(device);
3260
fail:
3261
radv_thread_trace_finish(device);
3262
free(device->thread_trace.trigger_file);
3263
3264
radv_trap_handler_finish(device);
3265
radv_finish_trace(device);
3266
3267
if (device->gfx_init)
3268
device->ws->buffer_destroy(device->ws, device->gfx_init);
3269
3270
radv_device_finish_border_color(device);
3271
3272
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3273
for (unsigned q = 0; q < device->queue_count[i]; q++)
3274
radv_queue_finish(&device->queues[i][q]);
3275
if (device->queue_count[i])
3276
vk_free(&device->vk.alloc, device->queues[i]);
3277
}
3278
3279
for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3280
if (device->hw_ctx[i])
3281
device->ws->ctx_destroy(device->hw_ctx[i]);
3282
}
3283
3284
vk_device_finish(&device->vk);
3285
vk_free(&device->vk.alloc, device);
3286
return result;
3287
}
3288
3289
void
3290
radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)
3291
{
3292
RADV_FROM_HANDLE(radv_device, device, _device);
3293
3294
if (!device)
3295
return;
3296
3297
if (device->gfx_init)
3298
device->ws->buffer_destroy(device->ws, device->gfx_init);
3299
3300
radv_device_finish_border_color(device);
3301
radv_device_finish_vrs_image(device);
3302
3303
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
3304
for (unsigned q = 0; q < device->queue_count[i]; q++)
3305
radv_queue_finish(&device->queues[i][q]);
3306
if (device->queue_count[i])
3307
vk_free(&device->vk.alloc, device->queues[i]);
3308
if (device->empty_cs[i])
3309
device->ws->cs_destroy(device->empty_cs[i]);
3310
}
3311
3312
for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {
3313
if (device->hw_ctx[i])
3314
device->ws->ctx_destroy(device->hw_ctx[i]);
3315
}
3316
3317
radv_device_finish_meta(device);
3318
3319
VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);
3320
radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);
3321
3322
radv_trap_handler_finish(device);
3323
radv_finish_trace(device);
3324
3325
radv_destroy_shader_slabs(device);
3326
3327
u_cnd_monotonic_destroy(&device->timeline_cond);
3328
3329
free(device->thread_trace.trigger_file);
3330
radv_thread_trace_finish(device);
3331
3332
vk_device_finish(&device->vk);
3333
vk_free(&device->vk.alloc, device);
3334
}
3335
3336
VkResult
3337
radv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties)
3338
{
3339
if (pProperties == NULL) {
3340
*pPropertyCount = 0;
3341
return VK_SUCCESS;
3342
}
3343
3344
/* None supported at this time */
3345
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3346
}
3347
3348
VkResult
3349
radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,
3350
VkLayerProperties *pProperties)
3351
{
3352
if (pProperties == NULL) {
3353
*pPropertyCount = 0;
3354
return VK_SUCCESS;
3355
}
3356
3357
/* None supported at this time */
3358
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
3359
}
3360
3361
void
3362
radv_GetDeviceQueue2(VkDevice _device, const VkDeviceQueueInfo2 *pQueueInfo, VkQueue *pQueue)
3363
{
3364
RADV_FROM_HANDLE(radv_device, device, _device);
3365
struct radv_queue *queue;
3366
3367
queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];
3368
if (pQueueInfo->flags != queue->flags) {
3369
/* From the Vulkan 1.1.70 spec:
3370
*
3371
* "The queue returned by vkGetDeviceQueue2 must have the same
3372
* flags value from this structure as that used at device
3373
* creation time in a VkDeviceQueueCreateInfo instance. If no
3374
* matching flags were specified at device creation time then
3375
* pQueue will return VK_NULL_HANDLE."
3376
*/
3377
*pQueue = VK_NULL_HANDLE;
3378
return;
3379
}
3380
3381
*pQueue = radv_queue_to_handle(queue);
3382
}
3383
3384
void
3385
radv_GetDeviceQueue(VkDevice _device, uint32_t queueFamilyIndex, uint32_t queueIndex,
3386
VkQueue *pQueue)
3387
{
3388
const VkDeviceQueueInfo2 info =
3389
(VkDeviceQueueInfo2){.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,
3390
.queueFamilyIndex = queueFamilyIndex,
3391
.queueIndex = queueIndex};
3392
3393
radv_GetDeviceQueue2(_device, &info, pQueue);
3394
}
3395
3396
static void
3397
fill_geom_tess_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_positions,
3398
uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo,
3399
uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo,
3400
uint32_t tess_factor_ring_size, uint32_t tess_offchip_ring_offset,
3401
uint32_t tess_offchip_ring_size, struct radeon_winsys_bo *tess_rings_bo)
3402
{
3403
uint32_t *desc = &map[4];
3404
3405
if (esgs_ring_bo) {
3406
uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);
3407
3408
/* stride 0, num records - size, add tid, swizzle, elsize4,
3409
index stride 64 */
3410
desc[0] = esgs_va;
3411
desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | S_008F04_SWIZZLE_ENABLE(true);
3412
desc[2] = esgs_ring_size;
3413
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3414
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3415
S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);
3416
3417
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3418
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3419
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3420
} else {
3421
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3422
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3423
}
3424
3425
/* GS entry for ES->GS ring */
3426
/* stride 0, num records - size, elsize0,
3427
index stride 0 */
3428
desc[4] = esgs_va;
3429
desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);
3430
desc[6] = esgs_ring_size;
3431
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3432
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3433
3434
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3435
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3436
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3437
} else {
3438
desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3439
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3440
}
3441
}
3442
3443
desc += 8;
3444
3445
if (gsvs_ring_bo) {
3446
uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);
3447
3448
/* VS entry for GS->VS ring */
3449
/* stride 0, num records - size, elsize0,
3450
index stride 0 */
3451
desc[0] = gsvs_va;
3452
desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);
3453
desc[2] = gsvs_ring_size;
3454
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3455
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3456
3457
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3458
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3459
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3460
} else {
3461
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3462
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3463
}
3464
3465
/* stride gsvs_itemsize, num records 64
3466
elsize 4, index stride 16 */
3467
/* shader will patch stride and desc[2] */
3468
desc[4] = gsvs_va;
3469
desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
3470
desc[6] = 0;
3471
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3472
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |
3473
S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);
3474
3475
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3476
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3477
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);
3478
} else {
3479
desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3480
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);
3481
}
3482
}
3483
3484
desc += 8;
3485
3486
if (tess_rings_bo) {
3487
uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);
3488
uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;
3489
3490
desc[0] = tess_va;
3491
desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);
3492
desc[2] = tess_factor_ring_size;
3493
desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3494
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3495
3496
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3497
desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3498
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3499
} else {
3500
desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3501
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3502
}
3503
3504
desc[4] = tess_offchip_va;
3505
desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);
3506
desc[6] = tess_offchip_ring_size;
3507
desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |
3508
S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);
3509
3510
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3511
desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |
3512
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);
3513
} else {
3514
desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |
3515
S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);
3516
}
3517
}
3518
3519
desc += 8;
3520
3521
if (add_sample_positions) {
3522
/* add sample positions after all rings */
3523
memcpy(desc, queue->device->sample_locations_1x, 8);
3524
desc += 2;
3525
memcpy(desc, queue->device->sample_locations_2x, 16);
3526
desc += 4;
3527
memcpy(desc, queue->device->sample_locations_4x, 32);
3528
desc += 8;
3529
memcpy(desc, queue->device->sample_locations_8x, 64);
3530
}
3531
}
3532
3533
static unsigned
3534
radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)
3535
{
3536
bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&
3537
device->physical_device->rad_info.family != CHIP_CARRIZO &&
3538
device->physical_device->rad_info.family != CHIP_STONEY;
3539
unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;
3540
unsigned max_offchip_buffers;
3541
unsigned offchip_granularity;
3542
unsigned hs_offchip_param;
3543
3544
/*
3545
* Per RadeonSI:
3546
* This must be one less than the maximum number due to a hw limitation.
3547
* Various hardware bugs need thGFX7
3548
*
3549
* Per AMDVLK:
3550
* Vega10 should limit max_offchip_buffers to 508 (4 * 127).
3551
* Gfx7 should limit max_offchip_buffers to 508
3552
* Gfx6 should limit max_offchip_buffers to 126 (2 * 63)
3553
*
3554
* Follow AMDVLK here.
3555
*/
3556
if (device->physical_device->rad_info.chip_class >= GFX10) {
3557
max_offchip_buffers_per_se = 128;
3558
} else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||
3559
device->physical_device->rad_info.chip_class == GFX7 ||
3560
device->physical_device->rad_info.chip_class == GFX6)
3561
--max_offchip_buffers_per_se;
3562
3563
max_offchip_buffers = max_offchip_buffers_per_se * device->physical_device->rad_info.max_se;
3564
3565
/* Hawaii has a bug with offchip buffers > 256 that can be worked
3566
* around by setting 4K granularity.
3567
*/
3568
if (device->tess_offchip_block_dw_size == 4096) {
3569
assert(device->physical_device->rad_info.family == CHIP_HAWAII);
3570
offchip_granularity = V_03093C_X_4K_DWORDS;
3571
} else {
3572
assert(device->tess_offchip_block_dw_size == 8192);
3573
offchip_granularity = V_03093C_X_8K_DWORDS;
3574
}
3575
3576
switch (device->physical_device->rad_info.chip_class) {
3577
case GFX6:
3578
max_offchip_buffers = MIN2(max_offchip_buffers, 126);
3579
break;
3580
case GFX7:
3581
case GFX8:
3582
case GFX9:
3583
max_offchip_buffers = MIN2(max_offchip_buffers, 508);
3584
break;
3585
case GFX10:
3586
break;
3587
default:
3588
break;
3589
}
3590
3591
*max_offchip_buffers_p = max_offchip_buffers;
3592
if (device->physical_device->rad_info.chip_class >= GFX10_3) {
3593
hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |
3594
S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);
3595
} else if (device->physical_device->rad_info.chip_class >= GFX7) {
3596
if (device->physical_device->rad_info.chip_class >= GFX8)
3597
--max_offchip_buffers;
3598
hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |
3599
S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);
3600
} else {
3601
hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);
3602
}
3603
return hs_offchip_param;
3604
}
3605
3606
static void
3607
radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3608
struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size,
3609
struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)
3610
{
3611
if (!esgs_ring_bo && !gsvs_ring_bo)
3612
return;
3613
3614
if (esgs_ring_bo)
3615
radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);
3616
3617
if (gsvs_ring_bo)
3618
radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);
3619
3620
if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3621
radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);
3622
radeon_emit(cs, esgs_ring_size >> 8);
3623
radeon_emit(cs, gsvs_ring_size >> 8);
3624
} else {
3625
radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);
3626
radeon_emit(cs, esgs_ring_size >> 8);
3627
radeon_emit(cs, gsvs_ring_size >> 8);
3628
}
3629
}
3630
3631
static void
3632
radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3633
unsigned hs_offchip_param, unsigned tf_ring_size,
3634
struct radeon_winsys_bo *tess_rings_bo)
3635
{
3636
uint64_t tf_va;
3637
3638
if (!tess_rings_bo)
3639
return;
3640
3641
tf_va = radv_buffer_get_va(tess_rings_bo);
3642
3643
radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);
3644
3645
if (queue->device->physical_device->rad_info.chip_class >= GFX7) {
3646
radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size / 4));
3647
radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);
3648
3649
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3650
radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,
3651
S_030984_BASE_HI(tf_va >> 40));
3652
} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3653
radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));
3654
}
3655
radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
3656
} else {
3657
radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size / 4));
3658
radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);
3659
radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);
3660
}
3661
}
3662
3663
static void
3664
radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3665
uint32_t size_per_wave, uint32_t waves,
3666
struct radeon_winsys_bo *scratch_bo)
3667
{
3668
if (queue->queue_family_index != RADV_QUEUE_GENERAL)
3669
return;
3670
3671
if (!scratch_bo)
3672
return;
3673
3674
radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3675
3676
radeon_set_context_reg(
3677
cs, R_0286E8_SPI_TMPRING_SIZE,
3678
S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3679
}
3680
3681
static void
3682
radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3683
uint32_t size_per_wave, uint32_t waves,
3684
struct radeon_winsys_bo *compute_scratch_bo)
3685
{
3686
uint64_t scratch_va;
3687
3688
if (!compute_scratch_bo)
3689
return;
3690
3691
scratch_va = radv_buffer_get_va(compute_scratch_bo);
3692
3693
radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);
3694
3695
radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);
3696
radeon_emit(cs, scratch_va);
3697
radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1));
3698
3699
radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,
3700
S_00B860_WAVES(waves) | S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));
3701
}
3702
3703
static void
3704
radv_emit_global_shader_pointers(struct radv_queue *queue, struct radeon_cmdbuf *cs,
3705
struct radeon_winsys_bo *descriptor_bo)
3706
{
3707
uint64_t va;
3708
3709
if (!descriptor_bo)
3710
return;
3711
3712
va = radv_buffer_get_va(descriptor_bo);
3713
3714
radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);
3715
3716
if (queue->device->physical_device->rad_info.chip_class >= GFX10) {
3717
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
3718
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3719
R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3720
3721
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3722
radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
3723
}
3724
} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {
3725
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
3726
R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,
3727
R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};
3728
3729
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3730
radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
3731
}
3732
} else {
3733
uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,
3734
R_00B230_SPI_SHADER_USER_DATA_GS_0, R_00B330_SPI_SHADER_USER_DATA_ES_0,
3735
R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0};
3736
3737
for (int i = 0; i < ARRAY_SIZE(regs); ++i) {
3738
radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);
3739
}
3740
}
3741
}
3742
3743
static void
3744
radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3745
{
3746
struct radv_device *device = queue->device;
3747
3748
if (device->gfx_init) {
3749
uint64_t va = radv_buffer_get_va(device->gfx_init);
3750
3751
radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));
3752
radeon_emit(cs, va);
3753
radeon_emit(cs, va >> 32);
3754
radeon_emit(cs, device->gfx_init_size_dw & 0xffff);
3755
3756
radv_cs_add_buffer(device->ws, cs, device->gfx_init);
3757
} else {
3758
si_emit_graphics(device, cs);
3759
}
3760
}
3761
3762
static void
3763
radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)
3764
{
3765
si_emit_compute(queue->device, cs);
3766
}
3767
3768
static VkResult
3769
radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,
3770
uint32_t scratch_waves, uint32_t compute_scratch_size_per_wave,
3771
uint32_t compute_scratch_waves, uint32_t esgs_ring_size,
3772
uint32_t gsvs_ring_size, bool needs_tess_rings, bool needs_gds,
3773
bool needs_gds_oa, bool needs_sample_positions,
3774
struct radeon_cmdbuf **initial_full_flush_preamble_cs,
3775
struct radeon_cmdbuf **initial_preamble_cs,
3776
struct radeon_cmdbuf **continue_preamble_cs)
3777
{
3778
struct radeon_winsys_bo *scratch_bo = NULL;
3779
struct radeon_winsys_bo *descriptor_bo = NULL;
3780
struct radeon_winsys_bo *compute_scratch_bo = NULL;
3781
struct radeon_winsys_bo *esgs_ring_bo = NULL;
3782
struct radeon_winsys_bo *gsvs_ring_bo = NULL;
3783
struct radeon_winsys_bo *tess_rings_bo = NULL;
3784
struct radeon_winsys_bo *gds_bo = NULL;
3785
struct radeon_winsys_bo *gds_oa_bo = NULL;
3786
struct radeon_cmdbuf *dest_cs[3] = {0};
3787
bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;
3788
unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;
3789
unsigned max_offchip_buffers;
3790
unsigned hs_offchip_param = 0;
3791
unsigned tess_offchip_ring_offset;
3792
uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;
3793
VkResult result = VK_SUCCESS;
3794
if (!queue->has_tess_rings) {
3795
if (needs_tess_rings)
3796
add_tess_rings = true;
3797
}
3798
if (!queue->has_gds) {
3799
if (needs_gds)
3800
add_gds = true;
3801
}
3802
if (!queue->has_gds_oa) {
3803
if (needs_gds_oa)
3804
add_gds_oa = true;
3805
}
3806
if (!queue->has_sample_positions) {
3807
if (needs_sample_positions)
3808
add_sample_positions = true;
3809
}
3810
tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;
3811
hs_offchip_param = radv_get_hs_offchip_param(queue->device, &max_offchip_buffers);
3812
tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);
3813
tess_offchip_ring_size = max_offchip_buffers * queue->device->tess_offchip_block_dw_size * 4;
3814
3815
scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);
3816
if (scratch_size_per_wave)
3817
scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);
3818
else
3819
scratch_waves = 0;
3820
3821
compute_scratch_size_per_wave =
3822
MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);
3823
if (compute_scratch_size_per_wave)
3824
compute_scratch_waves =
3825
MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);
3826
else
3827
compute_scratch_waves = 0;
3828
3829
if (scratch_size_per_wave <= queue->scratch_size_per_wave &&
3830
scratch_waves <= queue->scratch_waves &&
3831
compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&
3832
compute_scratch_waves <= queue->compute_scratch_waves &&
3833
esgs_ring_size <= queue->esgs_ring_size && gsvs_ring_size <= queue->gsvs_ring_size &&
3834
!add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&
3835
queue->initial_preamble_cs) {
3836
*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
3837
*initial_preamble_cs = queue->initial_preamble_cs;
3838
*continue_preamble_cs = queue->continue_preamble_cs;
3839
if (!scratch_size_per_wave && !compute_scratch_size_per_wave && !esgs_ring_size &&
3840
!gsvs_ring_size && !needs_tess_rings && !needs_gds && !needs_gds_oa &&
3841
!needs_sample_positions)
3842
*continue_preamble_cs = NULL;
3843
return VK_SUCCESS;
3844
}
3845
3846
uint32_t scratch_size = scratch_size_per_wave * scratch_waves;
3847
uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;
3848
if (scratch_size > queue_scratch_size) {
3849
result =
3850
queue->device->ws->buffer_create(queue->device->ws, scratch_size, 4096, RADEON_DOMAIN_VRAM,
3851
ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo);
3852
if (result != VK_SUCCESS)
3853
goto fail;
3854
} else
3855
scratch_bo = queue->scratch_bo;
3856
3857
uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;
3858
uint32_t compute_queue_scratch_size =
3859
queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;
3860
if (compute_scratch_size > compute_queue_scratch_size) {
3861
result = queue->device->ws->buffer_create(queue->device->ws, compute_scratch_size, 4096,
3862
RADEON_DOMAIN_VRAM, ring_bo_flags,
3863
RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo);
3864
if (result != VK_SUCCESS)
3865
goto fail;
3866
3867
} else
3868
compute_scratch_bo = queue->compute_scratch_bo;
3869
3870
if (esgs_ring_size > queue->esgs_ring_size) {
3871
result = queue->device->ws->buffer_create(queue->device->ws, esgs_ring_size, 4096,
3872
RADEON_DOMAIN_VRAM, ring_bo_flags,
3873
RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo);
3874
if (result != VK_SUCCESS)
3875
goto fail;
3876
} else {
3877
esgs_ring_bo = queue->esgs_ring_bo;
3878
esgs_ring_size = queue->esgs_ring_size;
3879
}
3880
3881
if (gsvs_ring_size > queue->gsvs_ring_size) {
3882
result = queue->device->ws->buffer_create(queue->device->ws, gsvs_ring_size, 4096,
3883
RADEON_DOMAIN_VRAM, ring_bo_flags,
3884
RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo);
3885
if (result != VK_SUCCESS)
3886
goto fail;
3887
} else {
3888
gsvs_ring_bo = queue->gsvs_ring_bo;
3889
gsvs_ring_size = queue->gsvs_ring_size;
3890
}
3891
3892
if (add_tess_rings) {
3893
result = queue->device->ws->buffer_create(
3894
queue->device->ws, tess_offchip_ring_offset + tess_offchip_ring_size, 256,
3895
RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);
3896
if (result != VK_SUCCESS)
3897
goto fail;
3898
} else {
3899
tess_rings_bo = queue->tess_rings_bo;
3900
}
3901
3902
if (add_gds) {
3903
assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3904
3905
/* 4 streamout GDS counters.
3906
* We need 256B (64 dw) of GDS, otherwise streamout hangs.
3907
*/
3908
result =
3909
queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,
3910
ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);
3911
if (result != VK_SUCCESS)
3912
goto fail;
3913
} else {
3914
gds_bo = queue->gds_bo;
3915
}
3916
3917
if (add_gds_oa) {
3918
assert(queue->device->physical_device->rad_info.chip_class >= GFX10);
3919
3920
result =
3921
queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,
3922
RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);
3923
if (result != VK_SUCCESS)
3924
goto fail;
3925
} else {
3926
gds_oa_bo = queue->gds_oa_bo;
3927
}
3928
3929
if (scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo ||
3930
gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo ||
3931
add_sample_positions) {
3932
uint32_t size = 0;
3933
if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || add_sample_positions) {
3934
size = 112; /* 2 dword + 2 padding + 4 dword * 6 */
3935
if (add_sample_positions)
3936
size += 128; /* 64+32+16+8 = 120 bytes */
3937
} else if (scratch_bo)
3938
size = 8; /* 2 dword */
3939
3940
result = queue->device->ws->buffer_create(
3941
queue->device->ws, size, 4096, RADEON_DOMAIN_VRAM,
3942
RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,
3943
RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);
3944
if (result != VK_SUCCESS)
3945
goto fail;
3946
} else
3947
descriptor_bo = queue->descriptor_bo;
3948
3949
if (descriptor_bo != queue->descriptor_bo) {
3950
uint32_t *map = (uint32_t *)queue->device->ws->buffer_map(descriptor_bo);
3951
if (!map)
3952
goto fail;
3953
3954
if (scratch_bo) {
3955
uint64_t scratch_va = radv_buffer_get_va(scratch_bo);
3956
uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);
3957
map[0] = scratch_va;
3958
map[1] = rsrc1;
3959
}
3960
3961
if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)
3962
fill_geom_tess_rings(queue, map, add_sample_positions, esgs_ring_size, esgs_ring_bo,
3963
gsvs_ring_size, gsvs_ring_bo, tess_factor_ring_size,
3964
tess_offchip_ring_offset, tess_offchip_ring_size, tess_rings_bo);
3965
3966
queue->device->ws->buffer_unmap(descriptor_bo);
3967
}
3968
3969
for (int i = 0; i < 3; ++i) {
3970
enum rgp_flush_bits sqtt_flush_bits = 0;
3971
struct radeon_cmdbuf *cs = NULL;
3972
cs = queue->device->ws->cs_create(queue->device->ws,
3973
queue->queue_family_index ? RING_COMPUTE : RING_GFX);
3974
if (!cs) {
3975
result = VK_ERROR_OUT_OF_HOST_MEMORY;
3976
goto fail;
3977
}
3978
3979
dest_cs[i] = cs;
3980
3981
if (scratch_bo)
3982
radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);
3983
3984
/* Emit initial configuration. */
3985
switch (queue->queue_family_index) {
3986
case RADV_QUEUE_GENERAL:
3987
radv_init_graphics_state(cs, queue);
3988
break;
3989
case RADV_QUEUE_COMPUTE:
3990
radv_init_compute_state(cs, queue);
3991
break;
3992
case RADV_QUEUE_TRANSFER:
3993
break;
3994
}
3995
3996
if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {
3997
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
3998
radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));
3999
4000
radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));
4001
radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));
4002
}
4003
4004
radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size, gsvs_ring_bo,
4005
gsvs_ring_size);
4006
radv_emit_tess_factor_ring(queue, cs, hs_offchip_param, tess_factor_ring_size, tess_rings_bo);
4007
radv_emit_global_shader_pointers(queue, cs, descriptor_bo);
4008
radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave, compute_scratch_waves,
4009
compute_scratch_bo);
4010
radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave, scratch_waves, scratch_bo);
4011
4012
if (gds_bo)
4013
radv_cs_add_buffer(queue->device->ws, cs, gds_bo);
4014
if (gds_oa_bo)
4015
radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);
4016
4017
if (i == 0) {
4018
si_cs_emit_cache_flush(
4019
cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
4020
queue->queue_family_index == RING_COMPUTE &&
4021
queue->device->physical_device->rad_info.chip_class >= GFX7,
4022
(queue->queue_family_index == RADV_QUEUE_COMPUTE
4023
? RADV_CMD_FLAG_CS_PARTIAL_FLUSH
4024
: (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |
4025
RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |
4026
RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_START_PIPELINE_STATS,
4027
&sqtt_flush_bits, 0);
4028
} else if (i == 1) {
4029
si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,
4030
queue->queue_family_index == RING_COMPUTE &&
4031
queue->device->physical_device->rad_info.chip_class >= GFX7,
4032
RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |
4033
RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |
4034
RADV_CMD_FLAG_START_PIPELINE_STATS,
4035
&sqtt_flush_bits, 0);
4036
}
4037
4038
result = queue->device->ws->cs_finalize(cs);
4039
if (result != VK_SUCCESS)
4040
goto fail;
4041
}
4042
4043
if (queue->initial_full_flush_preamble_cs)
4044
queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);
4045
4046
if (queue->initial_preamble_cs)
4047
queue->device->ws->cs_destroy(queue->initial_preamble_cs);
4048
4049
if (queue->continue_preamble_cs)
4050
queue->device->ws->cs_destroy(queue->continue_preamble_cs);
4051
4052
queue->initial_full_flush_preamble_cs = dest_cs[0];
4053
queue->initial_preamble_cs = dest_cs[1];
4054
queue->continue_preamble_cs = dest_cs[2];
4055
4056
if (scratch_bo != queue->scratch_bo) {
4057
if (queue->scratch_bo)
4058
queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);
4059
queue->scratch_bo = scratch_bo;
4060
}
4061
queue->scratch_size_per_wave = scratch_size_per_wave;
4062
queue->scratch_waves = scratch_waves;
4063
4064
if (compute_scratch_bo != queue->compute_scratch_bo) {
4065
if (queue->compute_scratch_bo)
4066
queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);
4067
queue->compute_scratch_bo = compute_scratch_bo;
4068
}
4069
queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;
4070
queue->compute_scratch_waves = compute_scratch_waves;
4071
4072
if (esgs_ring_bo != queue->esgs_ring_bo) {
4073
if (queue->esgs_ring_bo)
4074
queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);
4075
queue->esgs_ring_bo = esgs_ring_bo;
4076
queue->esgs_ring_size = esgs_ring_size;
4077
}
4078
4079
if (gsvs_ring_bo != queue->gsvs_ring_bo) {
4080
if (queue->gsvs_ring_bo)
4081
queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);
4082
queue->gsvs_ring_bo = gsvs_ring_bo;
4083
queue->gsvs_ring_size = gsvs_ring_size;
4084
}
4085
4086
if (tess_rings_bo != queue->tess_rings_bo) {
4087
queue->tess_rings_bo = tess_rings_bo;
4088
queue->has_tess_rings = true;
4089
}
4090
4091
if (gds_bo != queue->gds_bo) {
4092
queue->gds_bo = gds_bo;
4093
queue->has_gds = true;
4094
}
4095
4096
if (gds_oa_bo != queue->gds_oa_bo) {
4097
queue->gds_oa_bo = gds_oa_bo;
4098
queue->has_gds_oa = true;
4099
}
4100
4101
if (descriptor_bo != queue->descriptor_bo) {
4102
if (queue->descriptor_bo)
4103
queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);
4104
4105
queue->descriptor_bo = descriptor_bo;
4106
}
4107
4108
if (add_sample_positions)
4109
queue->has_sample_positions = true;
4110
4111
*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;
4112
*initial_preamble_cs = queue->initial_preamble_cs;
4113
*continue_preamble_cs = queue->continue_preamble_cs;
4114
if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)
4115
*continue_preamble_cs = NULL;
4116
return VK_SUCCESS;
4117
fail:
4118
for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)
4119
if (dest_cs[i])
4120
queue->device->ws->cs_destroy(dest_cs[i]);
4121
if (descriptor_bo && descriptor_bo != queue->descriptor_bo)
4122
queue->device->ws->buffer_destroy(queue->device->ws, descriptor_bo);
4123
if (scratch_bo && scratch_bo != queue->scratch_bo)
4124
queue->device->ws->buffer_destroy(queue->device->ws, scratch_bo);
4125
if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)
4126
queue->device->ws->buffer_destroy(queue->device->ws, compute_scratch_bo);
4127
if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)
4128
queue->device->ws->buffer_destroy(queue->device->ws, esgs_ring_bo);
4129
if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)
4130
queue->device->ws->buffer_destroy(queue->device->ws, gsvs_ring_bo);
4131
if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)
4132
queue->device->ws->buffer_destroy(queue->device->ws, tess_rings_bo);
4133
if (gds_bo && gds_bo != queue->gds_bo)
4134
queue->device->ws->buffer_destroy(queue->device->ws, gds_bo);
4135
if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)
4136
queue->device->ws->buffer_destroy(queue->device->ws, gds_oa_bo);
4137
4138
return vk_error(queue->device->instance, result);
4139
}
4140
4141
static VkResult
4142
radv_alloc_sem_counts(struct radv_device *device, struct radv_winsys_sem_counts *counts,
4143
int num_sems, struct radv_semaphore_part **sems,
4144
const uint64_t *timeline_values, VkFence _fence, bool is_signal)
4145
{
4146
int syncobj_idx = 0, non_reset_idx = 0, timeline_idx = 0;
4147
4148
if (num_sems == 0 && _fence == VK_NULL_HANDLE)
4149
return VK_SUCCESS;
4150
4151
for (uint32_t i = 0; i < num_sems; i++) {
4152
switch (sems[i]->kind) {
4153
case RADV_SEMAPHORE_SYNCOBJ:
4154
counts->syncobj_count++;
4155
counts->syncobj_reset_count++;
4156
break;
4157
case RADV_SEMAPHORE_NONE:
4158
break;
4159
case RADV_SEMAPHORE_TIMELINE:
4160
counts->syncobj_count++;
4161
break;
4162
case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4163
counts->timeline_syncobj_count++;
4164
break;
4165
}
4166
}
4167
4168
if (_fence != VK_NULL_HANDLE)
4169
counts->syncobj_count++;
4170
4171
if (counts->syncobj_count || counts->timeline_syncobj_count) {
4172
counts->points = (uint64_t *)malloc(sizeof(*counts->syncobj) * counts->syncobj_count +
4173
(sizeof(*counts->syncobj) + sizeof(*counts->points)) *
4174
counts->timeline_syncobj_count);
4175
if (!counts->points)
4176
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4177
counts->syncobj = (uint32_t *)(counts->points + counts->timeline_syncobj_count);
4178
}
4179
4180
non_reset_idx = counts->syncobj_reset_count;
4181
4182
for (uint32_t i = 0; i < num_sems; i++) {
4183
switch (sems[i]->kind) {
4184
case RADV_SEMAPHORE_NONE:
4185
unreachable("Empty semaphore");
4186
break;
4187
case RADV_SEMAPHORE_SYNCOBJ:
4188
counts->syncobj[syncobj_idx++] = sems[i]->syncobj;
4189
break;
4190
case RADV_SEMAPHORE_TIMELINE: {
4191
mtx_lock(&sems[i]->timeline.mutex);
4192
struct radv_timeline_point *point = NULL;
4193
if (is_signal) {
4194
point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);
4195
} else {
4196
point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline,
4197
timeline_values[i]);
4198
}
4199
4200
mtx_unlock(&sems[i]->timeline.mutex);
4201
4202
if (point) {
4203
counts->syncobj[non_reset_idx++] = point->syncobj;
4204
} else {
4205
/* Explicitly remove the semaphore so we might not find
4206
* a point later post-submit. */
4207
sems[i] = NULL;
4208
}
4209
break;
4210
}
4211
case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
4212
counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;
4213
counts->points[timeline_idx] = timeline_values[i];
4214
++timeline_idx;
4215
break;
4216
}
4217
}
4218
4219
if (_fence != VK_NULL_HANDLE) {
4220
RADV_FROM_HANDLE(radv_fence, fence, _fence);
4221
4222
struct radv_fence_part *part =
4223
fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
4224
counts->syncobj[non_reset_idx++] = part->syncobj;
4225
}
4226
4227
assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);
4228
counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);
4229
4230
return VK_SUCCESS;
4231
}
4232
4233
static void
4234
radv_free_sem_info(struct radv_winsys_sem_info *sem_info)
4235
{
4236
free(sem_info->wait.points);
4237
free(sem_info->signal.points);
4238
}
4239
4240
static void
4241
radv_free_temp_syncobjs(struct radv_device *device, int num_sems, struct radv_semaphore_part *sems)
4242
{
4243
for (uint32_t i = 0; i < num_sems; i++) {
4244
radv_destroy_semaphore_part(device, sems + i);
4245
}
4246
}
4247
4248
static VkResult
4249
radv_alloc_sem_info(struct radv_device *device, struct radv_winsys_sem_info *sem_info,
4250
int num_wait_sems, struct radv_semaphore_part **wait_sems,
4251
const uint64_t *wait_values, int num_signal_sems,
4252
struct radv_semaphore_part **signal_sems, const uint64_t *signal_values,
4253
VkFence fence)
4254
{
4255
VkResult ret;
4256
4257
ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values,
4258
VK_NULL_HANDLE, false);
4259
if (ret)
4260
return ret;
4261
ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems,
4262
signal_values, fence, true);
4263
if (ret)
4264
radv_free_sem_info(sem_info);
4265
4266
/* caller can override these */
4267
sem_info->cs_emit_wait = true;
4268
sem_info->cs_emit_signal = true;
4269
return ret;
4270
}
4271
4272
static void
4273
radv_finalize_timelines(struct radv_device *device, uint32_t num_wait_sems,
4274
struct radv_semaphore_part **wait_sems, const uint64_t *wait_values,
4275
uint32_t num_signal_sems, struct radv_semaphore_part **signal_sems,
4276
const uint64_t *signal_values, struct list_head *processing_list)
4277
{
4278
for (uint32_t i = 0; i < num_wait_sems; ++i) {
4279
if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4280
mtx_lock(&wait_sems[i]->timeline.mutex);
4281
struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
4282
device, &wait_sems[i]->timeline, wait_values[i]);
4283
point->wait_count -= 2;
4284
mtx_unlock(&wait_sems[i]->timeline.mutex);
4285
}
4286
}
4287
for (uint32_t i = 0; i < num_signal_sems; ++i) {
4288
if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4289
mtx_lock(&signal_sems[i]->timeline.mutex);
4290
struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(
4291
device, &signal_sems[i]->timeline, signal_values[i]);
4292
signal_sems[i]->timeline.highest_submitted =
4293
MAX2(signal_sems[i]->timeline.highest_submitted, point->value);
4294
point->wait_count -= 2;
4295
radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);
4296
mtx_unlock(&signal_sems[i]->timeline.mutex);
4297
} else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {
4298
signal_sems[i]->timeline_syncobj.max_point =
4299
MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);
4300
}
4301
}
4302
}
4303
4304
static VkResult
4305
radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferMemoryBindInfo *bind)
4306
{
4307
RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);
4308
VkResult result;
4309
4310
for (uint32_t i = 0; i < bind->bindCount; ++i) {
4311
struct radv_device_memory *mem = NULL;
4312
4313
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4314
mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4315
4316
result = device->ws->buffer_virtual_bind(device->ws, buffer->bo,
4317
bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4318
mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4319
if (result != VK_SUCCESS)
4320
return result;
4321
}
4322
4323
return VK_SUCCESS;
4324
}
4325
4326
static VkResult
4327
radv_sparse_image_opaque_bind_memory(struct radv_device *device,
4328
const VkSparseImageOpaqueMemoryBindInfo *bind)
4329
{
4330
RADV_FROM_HANDLE(radv_image, image, bind->image);
4331
VkResult result;
4332
4333
for (uint32_t i = 0; i < bind->bindCount; ++i) {
4334
struct radv_device_memory *mem = NULL;
4335
4336
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4337
mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4338
4339
result = device->ws->buffer_virtual_bind(device->ws, image->bo,
4340
bind->pBinds[i].resourceOffset, bind->pBinds[i].size,
4341
mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);
4342
if (result != VK_SUCCESS)
4343
return result;
4344
}
4345
4346
return VK_SUCCESS;
4347
}
4348
4349
static VkResult
4350
radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)
4351
{
4352
RADV_FROM_HANDLE(radv_image, image, bind->image);
4353
struct radeon_surf *surface = &image->planes[0].surface;
4354
uint32_t bs = vk_format_get_blocksize(image->vk_format);
4355
VkResult result;
4356
4357
for (uint32_t i = 0; i < bind->bindCount; ++i) {
4358
struct radv_device_memory *mem = NULL;
4359
uint32_t offset, pitch;
4360
uint32_t mem_offset = bind->pBinds[i].memoryOffset;
4361
const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;
4362
const uint32_t level = bind->pBinds[i].subresource.mipLevel;
4363
4364
VkExtent3D bind_extent = bind->pBinds[i].extent;
4365
bind_extent.width =
4366
DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk_format));
4367
bind_extent.height =
4368
DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk_format));
4369
4370
VkOffset3D bind_offset = bind->pBinds[i].offset;
4371
bind_offset.x /= vk_format_get_blockwidth(image->vk_format);
4372
bind_offset.y /= vk_format_get_blockheight(image->vk_format);
4373
4374
if (bind->pBinds[i].memory != VK_NULL_HANDLE)
4375
mem = radv_device_memory_from_handle(bind->pBinds[i].memory);
4376
4377
if (device->physical_device->rad_info.chip_class >= GFX9) {
4378
offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];
4379
pitch = surface->u.gfx9.prt_level_pitch[level];
4380
} else {
4381
offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
4382
surface->u.legacy.level[level].slice_size_dw * 4 * layer;
4383
pitch = surface->u.legacy.level[level].nblk_x;
4384
}
4385
4386
offset += (bind_offset.y * pitch * bs) + (bind_offset.x * surface->prt_tile_height * bs);
4387
4388
uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);
4389
4390
bool whole_subres = bind_offset.x == 0 && aligned_extent_width == pitch;
4391
4392
if (whole_subres) {
4393
uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);
4394
4395
uint32_t size = aligned_extent_width * aligned_extent_height * bs;
4396
result = device->ws->buffer_virtual_bind(device->ws, image->bo, offset, size,
4397
mem ? mem->bo : NULL, mem_offset);
4398
if (result != VK_SUCCESS)
4399
return result;
4400
} else {
4401
uint32_t img_increment = pitch * bs;
4402
uint32_t mem_increment = aligned_extent_width * bs;
4403
uint32_t size = mem_increment * surface->prt_tile_height;
4404
for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {
4405
result = device->ws->buffer_virtual_bind(
4406
device->ws, image->bo, offset + img_increment * y, size, mem ? mem->bo : NULL,
4407
mem_offset + mem_increment * y);
4408
if (result != VK_SUCCESS)
4409
return result;
4410
}
4411
}
4412
}
4413
4414
return VK_SUCCESS;
4415
}
4416
4417
static VkResult
4418
radv_get_preambles(struct radv_queue *queue, const VkCommandBuffer *cmd_buffers,
4419
uint32_t cmd_buffer_count, struct radeon_cmdbuf **initial_full_flush_preamble_cs,
4420
struct radeon_cmdbuf **initial_preamble_cs,
4421
struct radeon_cmdbuf **continue_preamble_cs)
4422
{
4423
uint32_t scratch_size_per_wave = 0, waves_wanted = 0;
4424
uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;
4425
uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;
4426
bool tess_rings_needed = false;
4427
bool gds_needed = false;
4428
bool gds_oa_needed = false;
4429
bool sample_positions_needed = false;
4430
4431
for (uint32_t j = 0; j < cmd_buffer_count; j++) {
4432
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, cmd_buffers[j]);
4433
4434
scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);
4435
waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);
4436
compute_scratch_size_per_wave =
4437
MAX2(compute_scratch_size_per_wave, cmd_buffer->compute_scratch_size_per_wave_needed);
4438
compute_waves_wanted = MAX2(compute_waves_wanted, cmd_buffer->compute_scratch_waves_wanted);
4439
esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);
4440
gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);
4441
tess_rings_needed |= cmd_buffer->tess_rings_needed;
4442
gds_needed |= cmd_buffer->gds_needed;
4443
gds_oa_needed |= cmd_buffer->gds_oa_needed;
4444
sample_positions_needed |= cmd_buffer->sample_positions_needed;
4445
}
4446
4447
return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,
4448
compute_scratch_size_per_wave, compute_waves_wanted, esgs_ring_size,
4449
gsvs_ring_size, tess_rings_needed, gds_needed, gds_oa_needed,
4450
sample_positions_needed, initial_full_flush_preamble_cs,
4451
initial_preamble_cs, continue_preamble_cs);
4452
}
4453
4454
struct radv_deferred_queue_submission {
4455
struct radv_queue *queue;
4456
VkCommandBuffer *cmd_buffers;
4457
uint32_t cmd_buffer_count;
4458
4459
/* Sparse bindings that happen on a queue. */
4460
VkSparseBufferMemoryBindInfo *buffer_binds;
4461
uint32_t buffer_bind_count;
4462
VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4463
uint32_t image_opaque_bind_count;
4464
VkSparseImageMemoryBindInfo *image_binds;
4465
uint32_t image_bind_count;
4466
4467
bool flush_caches;
4468
VkShaderStageFlags wait_dst_stage_mask;
4469
struct radv_semaphore_part **wait_semaphores;
4470
uint32_t wait_semaphore_count;
4471
struct radv_semaphore_part **signal_semaphores;
4472
uint32_t signal_semaphore_count;
4473
VkFence fence;
4474
4475
uint64_t *wait_values;
4476
uint64_t *signal_values;
4477
4478
struct radv_semaphore_part *temporary_semaphore_parts;
4479
uint32_t temporary_semaphore_part_count;
4480
4481
struct list_head queue_pending_list;
4482
uint32_t submission_wait_count;
4483
struct radv_timeline_waiter *wait_nodes;
4484
4485
struct list_head processing_list;
4486
};
4487
4488
struct radv_queue_submission {
4489
const VkCommandBuffer *cmd_buffers;
4490
uint32_t cmd_buffer_count;
4491
4492
/* Sparse bindings that happen on a queue. */
4493
const VkSparseBufferMemoryBindInfo *buffer_binds;
4494
uint32_t buffer_bind_count;
4495
const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;
4496
uint32_t image_opaque_bind_count;
4497
const VkSparseImageMemoryBindInfo *image_binds;
4498
uint32_t image_bind_count;
4499
4500
bool flush_caches;
4501
VkPipelineStageFlags wait_dst_stage_mask;
4502
const VkSemaphore *wait_semaphores;
4503
uint32_t wait_semaphore_count;
4504
const VkSemaphore *signal_semaphores;
4505
uint32_t signal_semaphore_count;
4506
VkFence fence;
4507
4508
const uint64_t *wait_values;
4509
uint32_t wait_value_count;
4510
const uint64_t *signal_values;
4511
uint32_t signal_value_count;
4512
};
4513
4514
static VkResult radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,
4515
uint32_t decrement,
4516
struct list_head *processing_list);
4517
4518
static VkResult
4519
radv_create_deferred_submission(struct radv_queue *queue,
4520
const struct radv_queue_submission *submission,
4521
struct radv_deferred_queue_submission **out)
4522
{
4523
struct radv_deferred_queue_submission *deferred = NULL;
4524
size_t size = sizeof(struct radv_deferred_queue_submission);
4525
4526
uint32_t temporary_count = 0;
4527
for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4528
RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4529
if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)
4530
++temporary_count;
4531
}
4532
4533
size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);
4534
size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);
4535
size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);
4536
size += submission->image_bind_count * sizeof(VkSparseImageMemoryBindInfo);
4537
4538
for (uint32_t i = 0; i < submission->image_bind_count; ++i)
4539
size += submission->image_binds[i].bindCount * sizeof(VkSparseImageMemoryBind);
4540
4541
size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);
4542
size += temporary_count * sizeof(struct radv_semaphore_part);
4543
size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);
4544
size += submission->wait_value_count * sizeof(uint64_t);
4545
size += submission->signal_value_count * sizeof(uint64_t);
4546
size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);
4547
4548
deferred = calloc(1, size);
4549
if (!deferred)
4550
return VK_ERROR_OUT_OF_HOST_MEMORY;
4551
4552
deferred->queue = queue;
4553
4554
deferred->cmd_buffers = (void *)(deferred + 1);
4555
deferred->cmd_buffer_count = submission->cmd_buffer_count;
4556
if (submission->cmd_buffer_count) {
4557
memcpy(deferred->cmd_buffers, submission->cmd_buffers,
4558
submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));
4559
}
4560
4561
deferred->buffer_binds = (void *)(deferred->cmd_buffers + submission->cmd_buffer_count);
4562
deferred->buffer_bind_count = submission->buffer_bind_count;
4563
if (submission->buffer_bind_count) {
4564
memcpy(deferred->buffer_binds, submission->buffer_binds,
4565
submission->buffer_bind_count * sizeof(*deferred->buffer_binds));
4566
}
4567
4568
deferred->image_opaque_binds = (void *)(deferred->buffer_binds + submission->buffer_bind_count);
4569
deferred->image_opaque_bind_count = submission->image_opaque_bind_count;
4570
if (submission->image_opaque_bind_count) {
4571
memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,
4572
submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));
4573
}
4574
4575
deferred->image_binds =
4576
(void *)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);
4577
deferred->image_bind_count = submission->image_bind_count;
4578
4579
VkSparseImageMemoryBind *sparse_image_binds =
4580
(void *)(deferred->image_binds + deferred->image_bind_count);
4581
for (uint32_t i = 0; i < deferred->image_bind_count; ++i) {
4582
deferred->image_binds[i] = submission->image_binds[i];
4583
deferred->image_binds[i].pBinds = sparse_image_binds;
4584
4585
for (uint32_t j = 0; j < deferred->image_binds[i].bindCount; ++j)
4586
*sparse_image_binds++ = submission->image_binds[i].pBinds[j];
4587
}
4588
4589
deferred->flush_caches = submission->flush_caches;
4590
deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;
4591
4592
deferred->wait_semaphores = (void *)sparse_image_binds;
4593
deferred->wait_semaphore_count = submission->wait_semaphore_count;
4594
4595
deferred->signal_semaphores =
4596
(void *)(deferred->wait_semaphores + deferred->wait_semaphore_count);
4597
deferred->signal_semaphore_count = submission->signal_semaphore_count;
4598
4599
deferred->fence = submission->fence;
4600
4601
deferred->temporary_semaphore_parts =
4602
(void *)(deferred->signal_semaphores + deferred->signal_semaphore_count);
4603
deferred->temporary_semaphore_part_count = temporary_count;
4604
4605
uint32_t temporary_idx = 0;
4606
for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4607
RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);
4608
if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4609
deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];
4610
deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;
4611
semaphore->temporary.kind = RADV_SEMAPHORE_NONE;
4612
++temporary_idx;
4613
} else
4614
deferred->wait_semaphores[i] = &semaphore->permanent;
4615
}
4616
4617
for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {
4618
RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);
4619
if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {
4620
deferred->signal_semaphores[i] = &semaphore->temporary;
4621
} else {
4622
deferred->signal_semaphores[i] = &semaphore->permanent;
4623
}
4624
}
4625
4626
deferred->wait_values = (void *)(deferred->temporary_semaphore_parts + temporary_count);
4627
if (submission->wait_value_count) {
4628
memcpy(deferred->wait_values, submission->wait_values,
4629
submission->wait_value_count * sizeof(uint64_t));
4630
}
4631
deferred->signal_values = deferred->wait_values + submission->wait_value_count;
4632
if (submission->signal_value_count) {
4633
memcpy(deferred->signal_values, submission->signal_values,
4634
submission->signal_value_count * sizeof(uint64_t));
4635
}
4636
4637
deferred->wait_nodes = (void *)(deferred->signal_values + submission->signal_value_count);
4638
/* This is worst-case. radv_queue_enqueue_submission will fill in further, but this
4639
* ensure the submission is not accidentally triggered early when adding wait timelines. */
4640
deferred->submission_wait_count = 1 + submission->wait_semaphore_count;
4641
4642
*out = deferred;
4643
return VK_SUCCESS;
4644
}
4645
4646
static VkResult
4647
radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,
4648
struct list_head *processing_list)
4649
{
4650
uint32_t wait_cnt = 0;
4651
struct radv_timeline_waiter *waiter = submission->wait_nodes;
4652
for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4653
if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {
4654
mtx_lock(&submission->wait_semaphores[i]->timeline.mutex);
4655
if (submission->wait_semaphores[i]->timeline.highest_submitted <
4656
submission->wait_values[i]) {
4657
++wait_cnt;
4658
waiter->value = submission->wait_values[i];
4659
waiter->submission = submission;
4660
list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);
4661
++waiter;
4662
}
4663
mtx_unlock(&submission->wait_semaphores[i]->timeline.mutex);
4664
}
4665
}
4666
4667
mtx_lock(&submission->queue->pending_mutex);
4668
4669
bool is_first = list_is_empty(&submission->queue->pending_submissions);
4670
list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);
4671
4672
mtx_unlock(&submission->queue->pending_mutex);
4673
4674
/* If there is already a submission in the queue, that will decrement the counter by 1 when
4675
* submitted, but if the queue was empty, we decrement ourselves as there is no previous
4676
* submission. */
4677
uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);
4678
4679
/* if decrement is zero, then we don't have a refcounted reference to the
4680
* submission anymore, so it is not safe to access the submission. */
4681
if (!decrement)
4682
return VK_SUCCESS;
4683
4684
return radv_queue_trigger_submission(submission, decrement, processing_list);
4685
}
4686
4687
static void
4688
radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,
4689
struct list_head *processing_list)
4690
{
4691
mtx_lock(&submission->queue->pending_mutex);
4692
list_del(&submission->queue_pending_list);
4693
4694
/* trigger the next submission in the queue. */
4695
if (!list_is_empty(&submission->queue->pending_submissions)) {
4696
struct radv_deferred_queue_submission *next_submission =
4697
list_first_entry(&submission->queue->pending_submissions,
4698
struct radv_deferred_queue_submission, queue_pending_list);
4699
radv_queue_trigger_submission(next_submission, 1, processing_list);
4700
}
4701
mtx_unlock(&submission->queue->pending_mutex);
4702
4703
u_cnd_monotonic_broadcast(&submission->queue->device->timeline_cond);
4704
}
4705
4706
static VkResult
4707
radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,
4708
struct list_head *processing_list)
4709
{
4710
struct radv_queue *queue = submission->queue;
4711
struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4712
uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;
4713
bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;
4714
bool can_patch = true;
4715
uint32_t advance;
4716
struct radv_winsys_sem_info sem_info = {0};
4717
VkResult result;
4718
struct radeon_cmdbuf *initial_preamble_cs = NULL;
4719
struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;
4720
struct radeon_cmdbuf *continue_preamble_cs = NULL;
4721
4722
result =
4723
radv_get_preambles(queue, submission->cmd_buffers, submission->cmd_buffer_count,
4724
&initial_preamble_cs, &initial_flush_preamble_cs, &continue_preamble_cs);
4725
if (result != VK_SUCCESS)
4726
goto fail;
4727
4728
result = radv_alloc_sem_info(queue->device, &sem_info, submission->wait_semaphore_count,
4729
submission->wait_semaphores, submission->wait_values,
4730
submission->signal_semaphore_count, submission->signal_semaphores,
4731
submission->signal_values, submission->fence);
4732
if (result != VK_SUCCESS)
4733
goto fail;
4734
4735
for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {
4736
result = radv_sparse_buffer_bind_memory(queue->device, submission->buffer_binds + i);
4737
if (result != VK_SUCCESS)
4738
goto fail;
4739
}
4740
4741
for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {
4742
result =
4743
radv_sparse_image_opaque_bind_memory(queue->device, submission->image_opaque_binds + i);
4744
if (result != VK_SUCCESS)
4745
goto fail;
4746
}
4747
4748
for (uint32_t i = 0; i < submission->image_bind_count; ++i) {
4749
result = radv_sparse_image_bind_memory(queue->device, submission->image_binds + i);
4750
if (result != VK_SUCCESS)
4751
goto fail;
4752
}
4753
4754
if (!submission->cmd_buffer_count) {
4755
result = queue->device->ws->cs_submit(ctx, queue->queue_idx,
4756
&queue->device->empty_cs[queue->queue_family_index], 1,
4757
NULL, NULL, &sem_info, false);
4758
if (result != VK_SUCCESS)
4759
goto fail;
4760
} else {
4761
struct radeon_cmdbuf **cs_array =
4762
malloc(sizeof(struct radeon_cmdbuf *) * (submission->cmd_buffer_count));
4763
4764
for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {
4765
RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);
4766
assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
4767
4768
cs_array[j] = cmd_buffer->cs;
4769
if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))
4770
can_patch = false;
4771
4772
cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;
4773
}
4774
4775
for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {
4776
struct radeon_cmdbuf *initial_preamble =
4777
(do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;
4778
advance = MIN2(max_cs_submission, submission->cmd_buffer_count - j);
4779
4780
if (queue->device->trace_bo)
4781
*queue->device->trace_id_ptr = 0;
4782
4783
sem_info.cs_emit_wait = j == 0;
4784
sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;
4785
4786
result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance,
4787
initial_preamble, continue_preamble_cs, &sem_info,
4788
can_patch);
4789
if (result != VK_SUCCESS) {
4790
free(cs_array);
4791
goto fail;
4792
}
4793
4794
if (queue->device->trace_bo) {
4795
radv_check_gpu_hangs(queue, cs_array[j]);
4796
}
4797
4798
if (queue->device->tma_bo) {
4799
radv_check_trap_handler(queue);
4800
}
4801
}
4802
4803
free(cs_array);
4804
}
4805
4806
radv_finalize_timelines(queue->device, submission->wait_semaphore_count,
4807
submission->wait_semaphores, submission->wait_values,
4808
submission->signal_semaphore_count, submission->signal_semaphores,
4809
submission->signal_values, processing_list);
4810
/* Has to happen after timeline finalization to make sure the
4811
* condition variable is only triggered when timelines and queue have
4812
* been updated. */
4813
radv_queue_submission_update_queue(submission, processing_list);
4814
4815
fail:
4816
if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {
4817
/* When something bad happened during the submission, such as
4818
* an out of memory issue, it might be hard to recover from
4819
* this inconsistent state. To avoid this sort of problem, we
4820
* assume that we are in a really bad situation and return
4821
* VK_ERROR_DEVICE_LOST to ensure the clients do not attempt
4822
* to submit the same job again to this device.
4823
*/
4824
result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");
4825
}
4826
4827
radv_free_temp_syncobjs(queue->device, submission->temporary_semaphore_part_count,
4828
submission->temporary_semaphore_parts);
4829
radv_free_sem_info(&sem_info);
4830
free(submission);
4831
return result;
4832
}
4833
4834
static VkResult
4835
radv_process_submissions(struct list_head *processing_list)
4836
{
4837
while (!list_is_empty(processing_list)) {
4838
struct radv_deferred_queue_submission *submission =
4839
list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);
4840
list_del(&submission->processing_list);
4841
4842
VkResult result = radv_queue_submit_deferred(submission, processing_list);
4843
if (result != VK_SUCCESS)
4844
return result;
4845
}
4846
return VK_SUCCESS;
4847
}
4848
4849
static VkResult
4850
wait_for_submission_timelines_available(struct radv_deferred_queue_submission *submission,
4851
uint64_t timeout)
4852
{
4853
struct radv_device *device = submission->queue->device;
4854
uint32_t syncobj_count = 0;
4855
uint32_t syncobj_idx = 0;
4856
4857
for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4858
if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4859
continue;
4860
4861
if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4862
continue;
4863
++syncobj_count;
4864
}
4865
4866
if (!syncobj_count)
4867
return VK_SUCCESS;
4868
4869
uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);
4870
if (!points)
4871
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
4872
4873
uint32_t *syncobj = (uint32_t *)(points + syncobj_count);
4874
4875
for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {
4876
if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
4877
continue;
4878
4879
if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])
4880
continue;
4881
4882
syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;
4883
points[syncobj_idx] = submission->wait_values[i];
4884
++syncobj_idx;
4885
}
4886
bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true,
4887
true, timeout);
4888
4889
free(points);
4890
return success ? VK_SUCCESS : VK_TIMEOUT;
4891
}
4892
4893
static int
4894
radv_queue_submission_thread_run(void *q)
4895
{
4896
struct radv_queue *queue = q;
4897
4898
mtx_lock(&queue->thread_mutex);
4899
while (!p_atomic_read(&queue->thread_exit)) {
4900
struct radv_deferred_queue_submission *submission = queue->thread_submission;
4901
struct list_head processing_list;
4902
VkResult result = VK_SUCCESS;
4903
if (!submission) {
4904
u_cnd_monotonic_wait(&queue->thread_cond, &queue->thread_mutex);
4905
continue;
4906
}
4907
mtx_unlock(&queue->thread_mutex);
4908
4909
/* Wait at most 5 seconds so we have a chance to notice shutdown when
4910
* a semaphore never gets signaled. If it takes longer we just retry
4911
* the wait next iteration. */
4912
result =
4913
wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(5000000000));
4914
if (result != VK_SUCCESS) {
4915
mtx_lock(&queue->thread_mutex);
4916
continue;
4917
}
4918
4919
/* The lock isn't held but nobody will add one until we finish
4920
* the current submission. */
4921
p_atomic_set(&queue->thread_submission, NULL);
4922
4923
list_inithead(&processing_list);
4924
list_addtail(&submission->processing_list, &processing_list);
4925
result = radv_process_submissions(&processing_list);
4926
4927
mtx_lock(&queue->thread_mutex);
4928
}
4929
mtx_unlock(&queue->thread_mutex);
4930
return 0;
4931
}
4932
4933
static VkResult
4934
radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission, uint32_t decrement,
4935
struct list_head *processing_list)
4936
{
4937
struct radv_queue *queue = submission->queue;
4938
int ret;
4939
if (p_atomic_add_return(&submission->submission_wait_count, -decrement))
4940
return VK_SUCCESS;
4941
4942
if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) ==
4943
VK_SUCCESS) {
4944
list_addtail(&submission->processing_list, processing_list);
4945
return VK_SUCCESS;
4946
}
4947
4948
mtx_lock(&queue->thread_mutex);
4949
4950
/* A submission can only be ready for the thread if it doesn't have
4951
* any predecessors in the same queue, so there can only be one such
4952
* submission at a time. */
4953
assert(queue->thread_submission == NULL);
4954
4955
/* Only start the thread on demand to save resources for the many games
4956
* which only use binary semaphores. */
4957
if (!queue->thread_running) {
4958
ret = thrd_create(&queue->submission_thread, radv_queue_submission_thread_run, queue);
4959
if (ret) {
4960
mtx_unlock(&queue->thread_mutex);
4961
return vk_errorf(queue->device->instance, VK_ERROR_DEVICE_LOST,
4962
"Failed to start submission thread");
4963
}
4964
queue->thread_running = true;
4965
}
4966
4967
queue->thread_submission = submission;
4968
mtx_unlock(&queue->thread_mutex);
4969
4970
u_cnd_monotonic_signal(&queue->thread_cond);
4971
return VK_SUCCESS;
4972
}
4973
4974
static VkResult
4975
radv_queue_submit(struct radv_queue *queue, const struct radv_queue_submission *submission)
4976
{
4977
struct radv_deferred_queue_submission *deferred = NULL;
4978
4979
VkResult result = radv_create_deferred_submission(queue, submission, &deferred);
4980
if (result != VK_SUCCESS)
4981
return result;
4982
4983
struct list_head processing_list;
4984
list_inithead(&processing_list);
4985
4986
result = radv_queue_enqueue_submission(deferred, &processing_list);
4987
if (result != VK_SUCCESS) {
4988
/* If anything is in the list we leak. */
4989
assert(list_is_empty(&processing_list));
4990
return result;
4991
}
4992
return radv_process_submissions(&processing_list);
4993
}
4994
4995
bool
4996
radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)
4997
{
4998
struct radeon_winsys_ctx *ctx = queue->hw_ctx;
4999
struct radv_winsys_sem_info sem_info = {0};
5000
VkResult result;
5001
5002
result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0, 0, NULL, VK_NULL_HANDLE);
5003
if (result != VK_SUCCESS)
5004
return false;
5005
5006
result =
5007
queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1, NULL, NULL, &sem_info, false);
5008
radv_free_sem_info(&sem_info);
5009
if (result != VK_SUCCESS)
5010
return false;
5011
5012
return true;
5013
}
5014
5015
/* Signals fence as soon as all the work currently put on queue is done. */
5016
static VkResult
5017
radv_signal_fence(struct radv_queue *queue, VkFence fence)
5018
{
5019
return radv_queue_submit(queue, &(struct radv_queue_submission){.fence = fence});
5020
}
5021
5022
static bool
5023
radv_submit_has_effects(const VkSubmitInfo *info)
5024
{
5025
return info->commandBufferCount || info->waitSemaphoreCount || info->signalSemaphoreCount;
5026
}
5027
5028
VkResult
5029
radv_QueueSubmit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence)
5030
{
5031
RADV_FROM_HANDLE(radv_queue, queue, _queue);
5032
VkResult result;
5033
uint32_t fence_idx = 0;
5034
bool flushed_caches = false;
5035
5036
if (radv_device_is_lost(queue->device))
5037
return VK_ERROR_DEVICE_LOST;
5038
5039
if (fence != VK_NULL_HANDLE) {
5040
for (uint32_t i = 0; i < submitCount; ++i)
5041
if (radv_submit_has_effects(pSubmits + i))
5042
fence_idx = i;
5043
} else
5044
fence_idx = UINT32_MAX;
5045
5046
for (uint32_t i = 0; i < submitCount; i++) {
5047
if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)
5048
continue;
5049
5050
VkPipelineStageFlags wait_dst_stage_mask = 0;
5051
for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {
5052
wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];
5053
}
5054
5055
const VkTimelineSemaphoreSubmitInfo *timeline_info =
5056
vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5057
5058
result = radv_queue_submit(
5059
queue, &(struct radv_queue_submission){
5060
.cmd_buffers = pSubmits[i].pCommandBuffers,
5061
.cmd_buffer_count = pSubmits[i].commandBufferCount,
5062
.wait_dst_stage_mask = wait_dst_stage_mask,
5063
.flush_caches = !flushed_caches,
5064
.wait_semaphores = pSubmits[i].pWaitSemaphores,
5065
.wait_semaphore_count = pSubmits[i].waitSemaphoreCount,
5066
.signal_semaphores = pSubmits[i].pSignalSemaphores,
5067
.signal_semaphore_count = pSubmits[i].signalSemaphoreCount,
5068
.fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5069
.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5070
.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
5071
? timeline_info->waitSemaphoreValueCount
5072
: 0,
5073
.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5074
.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
5075
? timeline_info->signalSemaphoreValueCount
5076
: 0,
5077
});
5078
if (result != VK_SUCCESS)
5079
return result;
5080
5081
flushed_caches = true;
5082
}
5083
5084
if (fence != VK_NULL_HANDLE && !submitCount) {
5085
result = radv_signal_fence(queue, fence);
5086
if (result != VK_SUCCESS)
5087
return result;
5088
}
5089
5090
return VK_SUCCESS;
5091
}
5092
5093
static const char *
5094
radv_get_queue_family_name(struct radv_queue *queue)
5095
{
5096
switch (queue->queue_family_index) {
5097
case RADV_QUEUE_GENERAL:
5098
return "graphics";
5099
case RADV_QUEUE_COMPUTE:
5100
return "compute";
5101
case RADV_QUEUE_TRANSFER:
5102
return "transfer";
5103
default:
5104
unreachable("Unknown queue family");
5105
}
5106
}
5107
5108
VkResult
5109
radv_QueueWaitIdle(VkQueue _queue)
5110
{
5111
RADV_FROM_HANDLE(radv_queue, queue, _queue);
5112
5113
if (radv_device_is_lost(queue->device))
5114
return VK_ERROR_DEVICE_LOST;
5115
5116
mtx_lock(&queue->pending_mutex);
5117
while (!list_is_empty(&queue->pending_submissions)) {
5118
u_cnd_monotonic_wait(&queue->device->timeline_cond, &queue->pending_mutex);
5119
}
5120
mtx_unlock(&queue->pending_mutex);
5121
5122
if (!queue->device->ws->ctx_wait_idle(
5123
queue->hw_ctx, radv_queue_family_to_ring(queue->queue_family_index), queue->queue_idx)) {
5124
return radv_device_set_lost(queue->device,
5125
"Failed to wait for a '%s' queue "
5126
"to be idle. GPU hang ?",
5127
radv_get_queue_family_name(queue));
5128
}
5129
5130
return VK_SUCCESS;
5131
}
5132
5133
VkResult
5134
radv_DeviceWaitIdle(VkDevice _device)
5135
{
5136
RADV_FROM_HANDLE(radv_device, device, _device);
5137
5138
for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {
5139
for (unsigned q = 0; q < device->queue_count[i]; q++) {
5140
VkResult result = radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));
5141
5142
if (result != VK_SUCCESS)
5143
return result;
5144
}
5145
}
5146
return VK_SUCCESS;
5147
}
5148
5149
VkResult
5150
radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount,
5151
VkExtensionProperties *pProperties)
5152
{
5153
if (pLayerName)
5154
return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);
5155
5156
return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported,
5157
pPropertyCount, pProperties);
5158
}
5159
5160
PFN_vkVoidFunction
5161
radv_GetInstanceProcAddr(VkInstance _instance, const char *pName)
5162
{
5163
RADV_FROM_HANDLE(radv_instance, instance, _instance);
5164
5165
/* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly
5166
* when we have to return valid function pointers, NULL, or it's left
5167
* undefined. See the table for exact details.
5168
*/
5169
if (pName == NULL)
5170
return NULL;
5171
5172
#define LOOKUP_RADV_ENTRYPOINT(entrypoint) \
5173
if (strcmp(pName, "vk" #entrypoint) == 0) \
5174
return (PFN_vkVoidFunction)radv_##entrypoint
5175
5176
LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);
5177
LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);
5178
LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);
5179
LOOKUP_RADV_ENTRYPOINT(CreateInstance);
5180
5181
/* GetInstanceProcAddr() can also be called with a NULL instance.
5182
* See https://gitlab.khronos.org/vulkan/vulkan/issues/2057
5183
*/
5184
LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);
5185
5186
#undef LOOKUP_RADV_ENTRYPOINT
5187
5188
if (instance == NULL)
5189
return NULL;
5190
5191
return vk_instance_get_proc_addr(&instance->vk, &radv_instance_entrypoints, pName);
5192
}
5193
5194
/* Windows will use a dll definition file to avoid build errors. */
5195
#ifdef _WIN32
5196
#undef PUBLIC
5197
#define PUBLIC
5198
#endif
5199
5200
/* The loader wants us to expose a second GetInstanceProcAddr function
5201
* to work around certain LD_PRELOAD issues seen in apps.
5202
*/
5203
PUBLIC
5204
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
5205
vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)
5206
{
5207
return radv_GetInstanceProcAddr(instance, pName);
5208
}
5209
5210
PUBLIC
5211
VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL
5212
vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)
5213
{
5214
RADV_FROM_HANDLE(radv_instance, instance, _instance);
5215
return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);
5216
}
5217
5218
bool
5219
radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)
5220
{
5221
/* Only set BO metadata for the first plane */
5222
if (memory->image && memory->image->offset == 0) {
5223
struct radeon_bo_metadata metadata;
5224
radv_init_metadata(device, memory->image, &metadata);
5225
device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);
5226
}
5227
5228
return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);
5229
}
5230
5231
void
5232
radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5233
struct radv_device_memory *mem)
5234
{
5235
if (mem == NULL)
5236
return;
5237
5238
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5239
if (mem->android_hardware_buffer)
5240
AHardwareBuffer_release(mem->android_hardware_buffer);
5241
#endif
5242
5243
if (mem->bo) {
5244
if (device->overallocation_disallowed) {
5245
mtx_lock(&device->overallocation_mutex);
5246
device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;
5247
mtx_unlock(&device->overallocation_mutex);
5248
}
5249
5250
if (device->use_global_bo_list)
5251
device->ws->buffer_make_resident(device->ws, mem->bo, false);
5252
device->ws->buffer_destroy(device->ws, mem->bo);
5253
mem->bo = NULL;
5254
}
5255
5256
vk_object_base_finish(&mem->base);
5257
vk_free2(&device->vk.alloc, pAllocator, mem);
5258
}
5259
5260
static VkResult
5261
radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,
5262
const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5263
{
5264
struct radv_device_memory *mem;
5265
VkResult result;
5266
enum radeon_bo_domain domain;
5267
uint32_t flags = 0;
5268
5269
assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
5270
5271
const VkImportMemoryFdInfoKHR *import_info =
5272
vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);
5273
const VkMemoryDedicatedAllocateInfo *dedicate_info =
5274
vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);
5275
const VkExportMemoryAllocateInfo *export_info =
5276
vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);
5277
const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =
5278
vk_find_struct_const(pAllocateInfo->pNext, IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);
5279
const VkImportMemoryHostPointerInfoEXT *host_ptr_info =
5280
vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);
5281
5282
const struct wsi_memory_allocate_info *wsi_info =
5283
vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);
5284
5285
if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&
5286
!(export_info && (export_info->handleTypes &
5287
VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {
5288
/* Apparently, this is allowed */
5289
*pMem = VK_NULL_HANDLE;
5290
return VK_SUCCESS;
5291
}
5292
5293
mem =
5294
vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5295
if (mem == NULL)
5296
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5297
5298
vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);
5299
5300
if (wsi_info) {
5301
if(wsi_info->implicit_sync)
5302
flags |= RADEON_FLAG_IMPLICIT_SYNC;
5303
5304
/* In case of prime, linear buffer is allocated in default heap which is VRAM.
5305
* Due to this when display is connected to iGPU and render on dGPU, ddx
5306
* function amdgpu_present_check_flip() fails due to which there is blit
5307
* instead of flip. Setting the flag RADEON_FLAG_GTT_WC allows kernel to
5308
* allocate GTT memory in supported hardware where GTT can be directly scanout.
5309
* Using wsi_info variable check to set the flag RADEON_FLAG_GTT_WC so that
5310
* only for memory allocated by driver this flag is set.
5311
*/
5312
flags |= RADEON_FLAG_GTT_WC;
5313
}
5314
5315
if (dedicate_info) {
5316
mem->image = radv_image_from_handle(dedicate_info->image);
5317
mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);
5318
} else {
5319
mem->image = NULL;
5320
mem->buffer = NULL;
5321
}
5322
5323
float priority_float = 0.5;
5324
const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =
5325
vk_find_struct_const(pAllocateInfo->pNext, MEMORY_PRIORITY_ALLOCATE_INFO_EXT);
5326
if (priority_ext)
5327
priority_float = priority_ext->priority;
5328
5329
uint64_t replay_address = 0;
5330
const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =
5331
vk_find_struct_const(pAllocateInfo->pNext, MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);
5332
if (replay_info && replay_info->opaqueCaptureAddress)
5333
replay_address = replay_info->opaqueCaptureAddress;
5334
5335
unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,
5336
(int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));
5337
5338
mem->user_ptr = NULL;
5339
mem->bo = NULL;
5340
5341
#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER
5342
mem->android_hardware_buffer = NULL;
5343
#endif
5344
5345
if (ahb_import_info) {
5346
result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);
5347
if (result != VK_SUCCESS)
5348
goto fail;
5349
} else if (export_info && (export_info->handleTypes &
5350
VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {
5351
result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);
5352
if (result != VK_SUCCESS)
5353
goto fail;
5354
} else if (import_info) {
5355
assert(import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
5356
import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
5357
result = device->ws->buffer_from_fd(device->ws, import_info->fd, priority, &mem->bo, NULL);
5358
if (result != VK_SUCCESS) {
5359
goto fail;
5360
} else {
5361
close(import_info->fd);
5362
}
5363
5364
if (mem->image && mem->image->plane_count == 1 &&
5365
!vk_format_is_depth_or_stencil(mem->image->vk_format) && mem->image->info.samples == 1 &&
5366
mem->image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
5367
struct radeon_bo_metadata metadata;
5368
device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);
5369
5370
struct radv_image_create_info create_info = {.no_metadata_planes = true,
5371
.bo_metadata = &metadata};
5372
5373
/* This gives a basic ability to import radeonsi images
5374
* that don't have DCC. This is not guaranteed by any
5375
* spec and can be removed after we support modifiers. */
5376
result = radv_image_create_layout(device, create_info, NULL, mem->image);
5377
if (result != VK_SUCCESS) {
5378
device->ws->buffer_destroy(device->ws, mem->bo);
5379
goto fail;
5380
}
5381
}
5382
} else if (host_ptr_info) {
5383
assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);
5384
result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,
5385
pAllocateInfo->allocationSize, priority, &mem->bo);
5386
if (result != VK_SUCCESS) {
5387
goto fail;
5388
} else {
5389
mem->user_ptr = host_ptr_info->pHostPointer;
5390
}
5391
} else {
5392
uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
5393
uint32_t heap_index;
5394
5395
heap_index =
5396
device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex]
5397
.heapIndex;
5398
domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];
5399
flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];
5400
5401
if (!import_info && (!export_info || !export_info->handleTypes)) {
5402
flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;
5403
if (device->use_global_bo_list) {
5404
flags |= RADEON_FLAG_PREFER_LOCAL_BO;
5405
}
5406
}
5407
5408
const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);
5409
if (flags_info && flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
5410
flags |= RADEON_FLAG_REPLAYABLE;
5411
5412
if (device->overallocation_disallowed) {
5413
uint64_t total_size =
5414
device->physical_device->memory_properties.memoryHeaps[heap_index].size;
5415
5416
mtx_lock(&device->overallocation_mutex);
5417
if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {
5418
mtx_unlock(&device->overallocation_mutex);
5419
result = VK_ERROR_OUT_OF_DEVICE_MEMORY;
5420
goto fail;
5421
}
5422
device->allocated_memory_size[heap_index] += alloc_size;
5423
mtx_unlock(&device->overallocation_mutex);
5424
}
5425
5426
result = device->ws->buffer_create(device->ws, alloc_size,
5427
device->physical_device->rad_info.max_alignment, domain,
5428
flags, priority, replay_address, &mem->bo);
5429
5430
if (result != VK_SUCCESS) {
5431
if (device->overallocation_disallowed) {
5432
mtx_lock(&device->overallocation_mutex);
5433
device->allocated_memory_size[heap_index] -= alloc_size;
5434
mtx_unlock(&device->overallocation_mutex);
5435
}
5436
goto fail;
5437
}
5438
5439
mem->heap_index = heap_index;
5440
mem->alloc_size = alloc_size;
5441
}
5442
5443
if (!wsi_info) {
5444
if (device->use_global_bo_list) {
5445
result = device->ws->buffer_make_resident(device->ws, mem->bo, true);
5446
if (result != VK_SUCCESS)
5447
goto fail;
5448
}
5449
}
5450
5451
*pMem = radv_device_memory_to_handle(mem);
5452
5453
return VK_SUCCESS;
5454
5455
fail:
5456
radv_free_memory(device, pAllocator, mem);
5457
5458
return result;
5459
}
5460
5461
VkResult
5462
radv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo,
5463
const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)
5464
{
5465
RADV_FROM_HANDLE(radv_device, device, _device);
5466
return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);
5467
}
5468
5469
void
5470
radv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator)
5471
{
5472
RADV_FROM_HANDLE(radv_device, device, _device);
5473
RADV_FROM_HANDLE(radv_device_memory, mem, _mem);
5474
5475
radv_free_memory(device, pAllocator, mem);
5476
}
5477
5478
VkResult
5479
radv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size,
5480
VkMemoryMapFlags flags, void **ppData)
5481
{
5482
RADV_FROM_HANDLE(radv_device, device, _device);
5483
RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5484
5485
if (mem == NULL) {
5486
*ppData = NULL;
5487
return VK_SUCCESS;
5488
}
5489
5490
if (mem->user_ptr)
5491
*ppData = mem->user_ptr;
5492
else
5493
*ppData = device->ws->buffer_map(mem->bo);
5494
5495
if (*ppData) {
5496
*ppData = (uint8_t *)*ppData + offset;
5497
return VK_SUCCESS;
5498
}
5499
5500
return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);
5501
}
5502
5503
void
5504
radv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)
5505
{
5506
RADV_FROM_HANDLE(radv_device, device, _device);
5507
RADV_FROM_HANDLE(radv_device_memory, mem, _memory);
5508
5509
if (mem == NULL)
5510
return;
5511
5512
if (mem->user_ptr == NULL)
5513
device->ws->buffer_unmap(mem->bo);
5514
}
5515
5516
VkResult
5517
radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5518
const VkMappedMemoryRange *pMemoryRanges)
5519
{
5520
return VK_SUCCESS;
5521
}
5522
5523
VkResult
5524
radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,
5525
const VkMappedMemoryRange *pMemoryRanges)
5526
{
5527
return VK_SUCCESS;
5528
}
5529
5530
void
5531
radv_GetBufferMemoryRequirements(VkDevice _device, VkBuffer _buffer,
5532
VkMemoryRequirements *pMemoryRequirements)
5533
{
5534
RADV_FROM_HANDLE(radv_device, device, _device);
5535
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
5536
5537
pMemoryRequirements->memoryTypeBits =
5538
(1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5539
5540
if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)
5541
pMemoryRequirements->alignment = 4096;
5542
else
5543
pMemoryRequirements->alignment = 16;
5544
5545
pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);
5546
}
5547
5548
void
5549
radv_GetBufferMemoryRequirements2(VkDevice device, const VkBufferMemoryRequirementsInfo2 *pInfo,
5550
VkMemoryRequirements2 *pMemoryRequirements)
5551
{
5552
radv_GetBufferMemoryRequirements(device, pInfo->buffer,
5553
&pMemoryRequirements->memoryRequirements);
5554
vk_foreach_struct(ext, pMemoryRequirements->pNext)
5555
{
5556
switch (ext->sType) {
5557
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5558
VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5559
req->requiresDedicatedAllocation = false;
5560
req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5561
break;
5562
}
5563
default:
5564
break;
5565
}
5566
}
5567
}
5568
5569
void
5570
radv_GetImageMemoryRequirements(VkDevice _device, VkImage _image,
5571
VkMemoryRequirements *pMemoryRequirements)
5572
{
5573
RADV_FROM_HANDLE(radv_device, device, _device);
5574
RADV_FROM_HANDLE(radv_image, image, _image);
5575
5576
pMemoryRequirements->memoryTypeBits =
5577
(1u << device->physical_device->memory_properties.memoryTypeCount) - 1;
5578
5579
pMemoryRequirements->size = image->size;
5580
pMemoryRequirements->alignment = image->alignment;
5581
}
5582
5583
void
5584
radv_GetImageMemoryRequirements2(VkDevice device, const VkImageMemoryRequirementsInfo2 *pInfo,
5585
VkMemoryRequirements2 *pMemoryRequirements)
5586
{
5587
radv_GetImageMemoryRequirements(device, pInfo->image, &pMemoryRequirements->memoryRequirements);
5588
5589
RADV_FROM_HANDLE(radv_image, image, pInfo->image);
5590
5591
vk_foreach_struct(ext, pMemoryRequirements->pNext)
5592
{
5593
switch (ext->sType) {
5594
case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {
5595
VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;
5596
req->requiresDedicatedAllocation =
5597
image->shareable && image->tiling != VK_IMAGE_TILING_LINEAR;
5598
req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;
5599
break;
5600
}
5601
default:
5602
break;
5603
}
5604
}
5605
}
5606
5607
void
5608
radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory,
5609
VkDeviceSize *pCommittedMemoryInBytes)
5610
{
5611
*pCommittedMemoryInBytes = 0;
5612
}
5613
5614
VkResult
5615
radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount,
5616
const VkBindBufferMemoryInfo *pBindInfos)
5617
{
5618
RADV_FROM_HANDLE(radv_device, device, _device);
5619
5620
for (uint32_t i = 0; i < bindInfoCount; ++i) {
5621
RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5622
RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);
5623
5624
if (mem) {
5625
if (mem->alloc_size) {
5626
VkMemoryRequirements req;
5627
5628
radv_GetBufferMemoryRequirements(_device, pBindInfos[i].buffer, &req);
5629
5630
if (pBindInfos[i].memoryOffset + req.size > mem->alloc_size) {
5631
return vk_errorf(device->instance, VK_ERROR_UNKNOWN,
5632
"Device memory object too small for the buffer.\n");
5633
}
5634
}
5635
5636
buffer->bo = mem->bo;
5637
buffer->offset = pBindInfos[i].memoryOffset;
5638
} else {
5639
buffer->bo = NULL;
5640
}
5641
}
5642
return VK_SUCCESS;
5643
}
5644
5645
VkResult
5646
radv_BindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory,
5647
VkDeviceSize memoryOffset)
5648
{
5649
const VkBindBufferMemoryInfo info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5650
.buffer = buffer,
5651
.memory = memory,
5652
.memoryOffset = memoryOffset};
5653
5654
return radv_BindBufferMemory2(device, 1, &info);
5655
}
5656
5657
VkResult
5658
radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount,
5659
const VkBindImageMemoryInfo *pBindInfos)
5660
{
5661
RADV_FROM_HANDLE(radv_device, device, _device);
5662
5663
for (uint32_t i = 0; i < bindInfoCount; ++i) {
5664
RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);
5665
RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);
5666
5667
if (mem) {
5668
if (mem->alloc_size) {
5669
VkMemoryRequirements req;
5670
5671
radv_GetImageMemoryRequirements(_device, pBindInfos[i].image, &req);
5672
5673
if (pBindInfos[i].memoryOffset + req.size > mem->alloc_size) {
5674
return vk_errorf(device->instance, VK_ERROR_UNKNOWN,
5675
"Device memory object too small for the image.\n");
5676
}
5677
}
5678
5679
image->bo = mem->bo;
5680
image->offset = pBindInfos[i].memoryOffset;
5681
} else {
5682
image->bo = NULL;
5683
image->offset = 0;
5684
}
5685
}
5686
return VK_SUCCESS;
5687
}
5688
5689
VkResult
5690
radv_BindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory,
5691
VkDeviceSize memoryOffset)
5692
{
5693
const VkBindImageMemoryInfo info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
5694
.image = image,
5695
.memory = memory,
5696
.memoryOffset = memoryOffset};
5697
5698
return radv_BindImageMemory2(device, 1, &info);
5699
}
5700
5701
static bool
5702
radv_sparse_bind_has_effects(const VkBindSparseInfo *info)
5703
{
5704
return info->bufferBindCount || info->imageOpaqueBindCount || info->imageBindCount ||
5705
info->waitSemaphoreCount || info->signalSemaphoreCount;
5706
}
5707
5708
VkResult
5709
radv_QueueBindSparse(VkQueue _queue, uint32_t bindInfoCount, const VkBindSparseInfo *pBindInfo,
5710
VkFence fence)
5711
{
5712
RADV_FROM_HANDLE(radv_queue, queue, _queue);
5713
uint32_t fence_idx = 0;
5714
5715
if (radv_device_is_lost(queue->device))
5716
return VK_ERROR_DEVICE_LOST;
5717
5718
if (fence != VK_NULL_HANDLE) {
5719
for (uint32_t i = 0; i < bindInfoCount; ++i)
5720
if (radv_sparse_bind_has_effects(pBindInfo + i))
5721
fence_idx = i;
5722
} else
5723
fence_idx = UINT32_MAX;
5724
5725
for (uint32_t i = 0; i < bindInfoCount; ++i) {
5726
if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))
5727
continue;
5728
5729
const VkTimelineSemaphoreSubmitInfo *timeline_info =
5730
vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);
5731
5732
VkResult result = radv_queue_submit(
5733
queue, &(struct radv_queue_submission){
5734
.buffer_binds = pBindInfo[i].pBufferBinds,
5735
.buffer_bind_count = pBindInfo[i].bufferBindCount,
5736
.image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,
5737
.image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,
5738
.image_binds = pBindInfo[i].pImageBinds,
5739
.image_bind_count = pBindInfo[i].imageBindCount,
5740
.wait_semaphores = pBindInfo[i].pWaitSemaphores,
5741
.wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,
5742
.signal_semaphores = pBindInfo[i].pSignalSemaphores,
5743
.signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,
5744
.fence = i == fence_idx ? fence : VK_NULL_HANDLE,
5745
.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,
5746
.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues
5747
? timeline_info->waitSemaphoreValueCount
5748
: 0,
5749
.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,
5750
.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues
5751
? timeline_info->signalSemaphoreValueCount
5752
: 0,
5753
});
5754
5755
if (result != VK_SUCCESS)
5756
return result;
5757
}
5758
5759
if (fence != VK_NULL_HANDLE && !bindInfoCount) {
5760
VkResult result = radv_signal_fence(queue, fence);
5761
if (result != VK_SUCCESS)
5762
return result;
5763
}
5764
5765
return VK_SUCCESS;
5766
}
5767
5768
static void
5769
radv_destroy_fence_part(struct radv_device *device, struct radv_fence_part *part)
5770
{
5771
if (part->kind != RADV_FENCE_NONE)
5772
device->ws->destroy_syncobj(device->ws, part->syncobj);
5773
part->kind = RADV_FENCE_NONE;
5774
}
5775
5776
static void
5777
radv_destroy_fence(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
5778
struct radv_fence *fence)
5779
{
5780
radv_destroy_fence_part(device, &fence->temporary);
5781
radv_destroy_fence_part(device, &fence->permanent);
5782
5783
vk_object_base_finish(&fence->base);
5784
vk_free2(&device->vk.alloc, pAllocator, fence);
5785
}
5786
5787
VkResult
5788
radv_CreateFence(VkDevice _device, const VkFenceCreateInfo *pCreateInfo,
5789
const VkAllocationCallbacks *pAllocator, VkFence *pFence)
5790
{
5791
RADV_FROM_HANDLE(radv_device, device, _device);
5792
bool create_signaled = false;
5793
struct radv_fence *fence;
5794
int ret;
5795
5796
fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,
5797
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
5798
if (!fence)
5799
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5800
5801
vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);
5802
5803
fence->permanent.kind = RADV_FENCE_SYNCOBJ;
5804
5805
if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)
5806
create_signaled = true;
5807
5808
ret = device->ws->create_syncobj(device->ws, create_signaled, &fence->permanent.syncobj);
5809
if (ret) {
5810
radv_destroy_fence(device, pAllocator, fence);
5811
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5812
}
5813
5814
*pFence = radv_fence_to_handle(fence);
5815
5816
return VK_SUCCESS;
5817
}
5818
5819
void
5820
radv_DestroyFence(VkDevice _device, VkFence _fence, const VkAllocationCallbacks *pAllocator)
5821
{
5822
RADV_FROM_HANDLE(radv_device, device, _device);
5823
RADV_FROM_HANDLE(radv_fence, fence, _fence);
5824
5825
if (!fence)
5826
return;
5827
5828
radv_destroy_fence(device, pAllocator, fence);
5829
}
5830
5831
VkResult
5832
radv_WaitForFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll,
5833
uint64_t timeout)
5834
{
5835
RADV_FROM_HANDLE(radv_device, device, _device);
5836
uint32_t *handles;
5837
5838
if (radv_device_is_lost(device))
5839
return VK_ERROR_DEVICE_LOST;
5840
5841
timeout = radv_get_absolute_timeout(timeout);
5842
5843
handles = malloc(sizeof(uint32_t) * fenceCount);
5844
if (!handles)
5845
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
5846
5847
for (uint32_t i = 0; i < fenceCount; ++i) {
5848
RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5849
5850
struct radv_fence_part *part =
5851
fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
5852
5853
assert(part->kind == RADV_FENCE_SYNCOBJ);
5854
handles[i] = part->syncobj;
5855
}
5856
5857
bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);
5858
free(handles);
5859
return success ? VK_SUCCESS : VK_TIMEOUT;
5860
}
5861
5862
VkResult
5863
radv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)
5864
{
5865
RADV_FROM_HANDLE(radv_device, device, _device);
5866
5867
for (unsigned i = 0; i < fenceCount; ++i) {
5868
RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);
5869
5870
/* From the Vulkan 1.0.53 spec:
5871
*
5872
* "If any member of pFences currently has its payload
5873
* imported with temporary permanence, that fence’s prior
5874
* permanent payload is irst restored. The remaining
5875
* operations described therefore operate on the restored
5876
* payload."
5877
*/
5878
if (fence->temporary.kind != RADV_FENCE_NONE)
5879
radv_destroy_fence_part(device, &fence->temporary);
5880
5881
device->ws->reset_syncobj(device->ws, fence->permanent.syncobj);
5882
}
5883
5884
return VK_SUCCESS;
5885
}
5886
5887
VkResult
5888
radv_GetFenceStatus(VkDevice _device, VkFence _fence)
5889
{
5890
RADV_FROM_HANDLE(radv_device, device, _device);
5891
RADV_FROM_HANDLE(radv_fence, fence, _fence);
5892
5893
struct radv_fence_part *part =
5894
fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
5895
5896
if (radv_device_is_lost(device))
5897
return VK_ERROR_DEVICE_LOST;
5898
5899
bool success = device->ws->wait_syncobj(device->ws, &part->syncobj, 1, true, 0);
5900
return success ? VK_SUCCESS : VK_NOT_READY;
5901
}
5902
5903
// Queue semaphore functions
5904
5905
static void
5906
radv_create_timeline(struct radv_timeline *timeline, uint64_t value)
5907
{
5908
timeline->highest_signaled = value;
5909
timeline->highest_submitted = value;
5910
list_inithead(&timeline->points);
5911
list_inithead(&timeline->free_points);
5912
list_inithead(&timeline->waiters);
5913
mtx_init(&timeline->mutex, mtx_plain);
5914
}
5915
5916
static void
5917
radv_destroy_timeline(struct radv_device *device, struct radv_timeline *timeline)
5918
{
5919
list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->free_points, list)
5920
{
5921
list_del(&point->list);
5922
device->ws->destroy_syncobj(device->ws, point->syncobj);
5923
free(point);
5924
}
5925
list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
5926
{
5927
list_del(&point->list);
5928
device->ws->destroy_syncobj(device->ws, point->syncobj);
5929
free(point);
5930
}
5931
mtx_destroy(&timeline->mutex);
5932
}
5933
5934
static void
5935
radv_timeline_gc_locked(struct radv_device *device, struct radv_timeline *timeline)
5936
{
5937
list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)
5938
{
5939
if (point->wait_count || point->value > timeline->highest_submitted)
5940
return;
5941
5942
if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {
5943
timeline->highest_signaled = point->value;
5944
list_del(&point->list);
5945
list_add(&point->list, &timeline->free_points);
5946
}
5947
}
5948
}
5949
5950
static struct radv_timeline_point *
5951
radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,
5952
uint64_t p)
5953
{
5954
radv_timeline_gc_locked(device, timeline);
5955
5956
if (p <= timeline->highest_signaled)
5957
return NULL;
5958
5959
list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
5960
{
5961
if (point->value >= p) {
5962
++point->wait_count;
5963
return point;
5964
}
5965
}
5966
return NULL;
5967
}
5968
5969
static struct radv_timeline_point *
5970
radv_timeline_add_point_locked(struct radv_device *device, struct radv_timeline *timeline,
5971
uint64_t p)
5972
{
5973
radv_timeline_gc_locked(device, timeline);
5974
5975
struct radv_timeline_point *ret = NULL;
5976
struct radv_timeline_point *prev = NULL;
5977
int r;
5978
5979
if (p <= timeline->highest_signaled)
5980
return NULL;
5981
5982
list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)
5983
{
5984
if (point->value == p) {
5985
return NULL;
5986
}
5987
5988
if (point->value < p)
5989
prev = point;
5990
}
5991
5992
if (list_is_empty(&timeline->free_points)) {
5993
ret = malloc(sizeof(struct radv_timeline_point));
5994
r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);
5995
if (r) {
5996
free(ret);
5997
return NULL;
5998
}
5999
} else {
6000
ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);
6001
list_del(&ret->list);
6002
6003
device->ws->reset_syncobj(device->ws, ret->syncobj);
6004
}
6005
6006
ret->value = p;
6007
ret->wait_count = 1;
6008
6009
if (prev) {
6010
list_add(&ret->list, &prev->list);
6011
} else {
6012
list_addtail(&ret->list, &timeline->points);
6013
}
6014
return ret;
6015
}
6016
6017
static VkResult
6018
radv_timeline_wait(struct radv_device *device, struct radv_timeline *timeline, uint64_t value,
6019
uint64_t abs_timeout)
6020
{
6021
mtx_lock(&timeline->mutex);
6022
6023
while (timeline->highest_submitted < value) {
6024
struct timespec abstime;
6025
timespec_from_nsec(&abstime, abs_timeout);
6026
6027
u_cnd_monotonic_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);
6028
6029
if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {
6030
mtx_unlock(&timeline->mutex);
6031
return VK_TIMEOUT;
6032
}
6033
}
6034
6035
struct radv_timeline_point *point =
6036
radv_timeline_find_point_at_least_locked(device, timeline, value);
6037
mtx_unlock(&timeline->mutex);
6038
if (!point)
6039
return VK_SUCCESS;
6040
6041
bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);
6042
6043
mtx_lock(&timeline->mutex);
6044
point->wait_count--;
6045
mtx_unlock(&timeline->mutex);
6046
return success ? VK_SUCCESS : VK_TIMEOUT;
6047
}
6048
6049
static void
6050
radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,
6051
struct list_head *processing_list)
6052
{
6053
list_for_each_entry_safe(struct radv_timeline_waiter, waiter, &timeline->waiters, list)
6054
{
6055
if (waiter->value > timeline->highest_submitted)
6056
continue;
6057
6058
radv_queue_trigger_submission(waiter->submission, 1, processing_list);
6059
list_del(&waiter->list);
6060
}
6061
}
6062
6063
static void
6064
radv_destroy_semaphore_part(struct radv_device *device, struct radv_semaphore_part *part)
6065
{
6066
switch (part->kind) {
6067
case RADV_SEMAPHORE_NONE:
6068
break;
6069
case RADV_SEMAPHORE_TIMELINE:
6070
radv_destroy_timeline(device, &part->timeline);
6071
break;
6072
case RADV_SEMAPHORE_SYNCOBJ:
6073
case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:
6074
device->ws->destroy_syncobj(device->ws, part->syncobj);
6075
break;
6076
}
6077
part->kind = RADV_SEMAPHORE_NONE;
6078
}
6079
6080
static VkSemaphoreTypeKHR
6081
radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)
6082
{
6083
const VkSemaphoreTypeCreateInfo *type_info =
6084
vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);
6085
6086
if (!type_info)
6087
return VK_SEMAPHORE_TYPE_BINARY;
6088
6089
if (initial_value)
6090
*initial_value = type_info->initialValue;
6091
return type_info->semaphoreType;
6092
}
6093
6094
static void
6095
radv_destroy_semaphore(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
6096
struct radv_semaphore *sem)
6097
{
6098
radv_destroy_semaphore_part(device, &sem->temporary);
6099
radv_destroy_semaphore_part(device, &sem->permanent);
6100
vk_object_base_finish(&sem->base);
6101
vk_free2(&device->vk.alloc, pAllocator, sem);
6102
}
6103
6104
VkResult
6105
radv_CreateSemaphore(VkDevice _device, const VkSemaphoreCreateInfo *pCreateInfo,
6106
const VkAllocationCallbacks *pAllocator, VkSemaphore *pSemaphore)
6107
{
6108
RADV_FROM_HANDLE(radv_device, device, _device);
6109
uint64_t initial_value = 0;
6110
VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);
6111
6112
struct radv_semaphore *sem =
6113
vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6114
if (!sem)
6115
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6116
6117
vk_object_base_init(&device->vk, &sem->base, VK_OBJECT_TYPE_SEMAPHORE);
6118
6119
sem->temporary.kind = RADV_SEMAPHORE_NONE;
6120
sem->permanent.kind = RADV_SEMAPHORE_NONE;
6121
6122
if (type == VK_SEMAPHORE_TYPE_TIMELINE &&
6123
device->physical_device->rad_info.has_timeline_syncobj) {
6124
int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
6125
if (ret) {
6126
radv_destroy_semaphore(device, pAllocator, sem);
6127
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6128
}
6129
device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);
6130
sem->permanent.timeline_syncobj.max_point = initial_value;
6131
sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
6132
} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
6133
radv_create_timeline(&sem->permanent.timeline, initial_value);
6134
sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;
6135
} else {
6136
int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);
6137
if (ret) {
6138
radv_destroy_semaphore(device, pAllocator, sem);
6139
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6140
}
6141
sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;
6142
}
6143
6144
*pSemaphore = radv_semaphore_to_handle(sem);
6145
return VK_SUCCESS;
6146
}
6147
6148
void
6149
radv_DestroySemaphore(VkDevice _device, VkSemaphore _semaphore,
6150
const VkAllocationCallbacks *pAllocator)
6151
{
6152
RADV_FROM_HANDLE(radv_device, device, _device);
6153
RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);
6154
if (!_semaphore)
6155
return;
6156
6157
radv_destroy_semaphore(device, pAllocator, sem);
6158
}
6159
6160
VkResult
6161
radv_GetSemaphoreCounterValue(VkDevice _device, VkSemaphore _semaphore, uint64_t *pValue)
6162
{
6163
RADV_FROM_HANDLE(radv_device, device, _device);
6164
RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);
6165
6166
if (radv_device_is_lost(device))
6167
return VK_ERROR_DEVICE_LOST;
6168
6169
struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
6170
? &semaphore->temporary
6171
: &semaphore->permanent;
6172
6173
switch (part->kind) {
6174
case RADV_SEMAPHORE_TIMELINE: {
6175
mtx_lock(&part->timeline.mutex);
6176
radv_timeline_gc_locked(device, &part->timeline);
6177
*pValue = part->timeline.highest_signaled;
6178
mtx_unlock(&part->timeline.mutex);
6179
return VK_SUCCESS;
6180
}
6181
case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6182
return device->ws->query_syncobj(device->ws, part->syncobj, pValue);
6183
}
6184
case RADV_SEMAPHORE_NONE:
6185
case RADV_SEMAPHORE_SYNCOBJ:
6186
unreachable("Invalid semaphore type");
6187
}
6188
unreachable("Unhandled semaphore type");
6189
}
6190
6191
static VkResult
6192
radv_wait_timelines(struct radv_device *device, const VkSemaphoreWaitInfo *pWaitInfo,
6193
uint64_t abs_timeout)
6194
{
6195
if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {
6196
for (;;) {
6197
for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6198
RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6199
VkResult result =
6200
radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);
6201
6202
if (result == VK_SUCCESS)
6203
return VK_SUCCESS;
6204
}
6205
if (radv_get_current_time() > abs_timeout)
6206
return VK_TIMEOUT;
6207
}
6208
}
6209
6210
for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6211
RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6212
VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline,
6213
pWaitInfo->pValues[i], abs_timeout);
6214
6215
if (result != VK_SUCCESS)
6216
return result;
6217
}
6218
return VK_SUCCESS;
6219
}
6220
VkResult
6221
radv_WaitSemaphores(VkDevice _device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)
6222
{
6223
RADV_FROM_HANDLE(radv_device, device, _device);
6224
6225
if (radv_device_is_lost(device))
6226
return VK_ERROR_DEVICE_LOST;
6227
6228
uint64_t abs_timeout = radv_get_absolute_timeout(timeout);
6229
6230
if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind ==
6231
RADV_SEMAPHORE_TIMELINE)
6232
return radv_wait_timelines(device, pWaitInfo, abs_timeout);
6233
6234
if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))
6235
return vk_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,
6236
"semaphoreCount integer overflow");
6237
6238
bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);
6239
uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);
6240
if (!handles)
6241
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6242
6243
for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {
6244
RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);
6245
handles[i] = semaphore->permanent.syncobj;
6246
}
6247
6248
bool success =
6249
device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,
6250
pWaitInfo->semaphoreCount, wait_all, false, abs_timeout);
6251
free(handles);
6252
return success ? VK_SUCCESS : VK_TIMEOUT;
6253
}
6254
6255
VkResult
6256
radv_SignalSemaphore(VkDevice _device, const VkSemaphoreSignalInfo *pSignalInfo)
6257
{
6258
RADV_FROM_HANDLE(radv_device, device, _device);
6259
RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);
6260
6261
struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE
6262
? &semaphore->temporary
6263
: &semaphore->permanent;
6264
6265
switch (part->kind) {
6266
case RADV_SEMAPHORE_TIMELINE: {
6267
mtx_lock(&part->timeline.mutex);
6268
radv_timeline_gc_locked(device, &part->timeline);
6269
part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);
6270
part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);
6271
6272
struct list_head processing_list;
6273
list_inithead(&processing_list);
6274
radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);
6275
mtx_unlock(&part->timeline.mutex);
6276
6277
VkResult result = radv_process_submissions(&processing_list);
6278
6279
/* This needs to happen after radv_process_submissions, so
6280
* that any submitted submissions that are now unblocked get
6281
* processed before we wake the application. This way we
6282
* ensure that any binary semaphores that are now unblocked
6283
* are usable by the application. */
6284
u_cnd_monotonic_broadcast(&device->timeline_cond);
6285
6286
return result;
6287
}
6288
case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {
6289
part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);
6290
device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);
6291
break;
6292
}
6293
case RADV_SEMAPHORE_NONE:
6294
case RADV_SEMAPHORE_SYNCOBJ:
6295
unreachable("Invalid semaphore type");
6296
}
6297
return VK_SUCCESS;
6298
}
6299
6300
static void
6301
radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
6302
struct radv_event *event)
6303
{
6304
if (event->bo)
6305
device->ws->buffer_destroy(device->ws, event->bo);
6306
6307
vk_object_base_finish(&event->base);
6308
vk_free2(&device->vk.alloc, pAllocator, event);
6309
}
6310
6311
VkResult
6312
radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,
6313
const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)
6314
{
6315
RADV_FROM_HANDLE(radv_device, device, _device);
6316
struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,
6317
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6318
6319
if (!event)
6320
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6321
6322
vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);
6323
6324
VkResult result = device->ws->buffer_create(
6325
device->ws, 8, 8, RADEON_DOMAIN_GTT,
6326
RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,
6327
RADV_BO_PRIORITY_FENCE, 0, &event->bo);
6328
if (result != VK_SUCCESS) {
6329
radv_destroy_event(device, pAllocator, event);
6330
return vk_error(device->instance, result);
6331
}
6332
6333
event->map = (uint64_t *)device->ws->buffer_map(event->bo);
6334
if (!event->map) {
6335
radv_destroy_event(device, pAllocator, event);
6336
return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
6337
}
6338
6339
*pEvent = radv_event_to_handle(event);
6340
6341
return VK_SUCCESS;
6342
}
6343
6344
void
6345
radv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator)
6346
{
6347
RADV_FROM_HANDLE(radv_device, device, _device);
6348
RADV_FROM_HANDLE(radv_event, event, _event);
6349
6350
if (!event)
6351
return;
6352
6353
radv_destroy_event(device, pAllocator, event);
6354
}
6355
6356
VkResult
6357
radv_GetEventStatus(VkDevice _device, VkEvent _event)
6358
{
6359
RADV_FROM_HANDLE(radv_device, device, _device);
6360
RADV_FROM_HANDLE(radv_event, event, _event);
6361
6362
if (radv_device_is_lost(device))
6363
return VK_ERROR_DEVICE_LOST;
6364
6365
if (*event->map == 1)
6366
return VK_EVENT_SET;
6367
return VK_EVENT_RESET;
6368
}
6369
6370
VkResult
6371
radv_SetEvent(VkDevice _device, VkEvent _event)
6372
{
6373
RADV_FROM_HANDLE(radv_event, event, _event);
6374
*event->map = 1;
6375
6376
return VK_SUCCESS;
6377
}
6378
6379
VkResult
6380
radv_ResetEvent(VkDevice _device, VkEvent _event)
6381
{
6382
RADV_FROM_HANDLE(radv_event, event, _event);
6383
*event->map = 0;
6384
6385
return VK_SUCCESS;
6386
}
6387
6388
static void
6389
radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
6390
struct radv_buffer *buffer)
6391
{
6392
if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)
6393
device->ws->buffer_destroy(device->ws, buffer->bo);
6394
6395
vk_object_base_finish(&buffer->base);
6396
vk_free2(&device->vk.alloc, pAllocator, buffer);
6397
}
6398
6399
VkResult
6400
radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,
6401
const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)
6402
{
6403
RADV_FROM_HANDLE(radv_device, device, _device);
6404
struct radv_buffer *buffer;
6405
6406
if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)
6407
return VK_ERROR_OUT_OF_DEVICE_MEMORY;
6408
6409
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
6410
6411
buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,
6412
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
6413
if (buffer == NULL)
6414
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
6415
6416
vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);
6417
6418
buffer->size = pCreateInfo->size;
6419
buffer->usage = pCreateInfo->usage;
6420
buffer->bo = NULL;
6421
buffer->offset = 0;
6422
buffer->flags = pCreateInfo->flags;
6423
6424
buffer->shareable =
6425
vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;
6426
6427
if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {
6428
enum radeon_bo_flag flags = RADEON_FLAG_VIRTUAL;
6429
if (pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)
6430
flags |= RADEON_FLAG_REPLAYABLE;
6431
6432
uint64_t replay_address = 0;
6433
const VkBufferOpaqueCaptureAddressCreateInfo *replay_info =
6434
vk_find_struct_const(pCreateInfo->pNext, BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO);
6435
if (replay_info && replay_info->opaqueCaptureAddress)
6436
replay_address = replay_info->opaqueCaptureAddress;
6437
6438
VkResult result = device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0,
6439
flags, RADV_BO_PRIORITY_VIRTUAL,
6440
replay_address, &buffer->bo);
6441
if (result != VK_SUCCESS) {
6442
radv_destroy_buffer(device, pAllocator, buffer);
6443
return vk_error(device->instance, result);
6444
}
6445
}
6446
6447
*pBuffer = radv_buffer_to_handle(buffer);
6448
6449
return VK_SUCCESS;
6450
}
6451
6452
void
6453
radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator)
6454
{
6455
RADV_FROM_HANDLE(radv_device, device, _device);
6456
RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);
6457
6458
if (!buffer)
6459
return;
6460
6461
radv_destroy_buffer(device, pAllocator, buffer);
6462
}
6463
6464
VkDeviceAddress
6465
radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
6466
{
6467
RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6468
return radv_buffer_get_va(buffer->bo) + buffer->offset;
6469
}
6470
6471
uint64_t
6472
radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)
6473
{
6474
RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);
6475
return buffer->bo ? radv_buffer_get_va(buffer->bo) + buffer->offset : 0;
6476
}
6477
6478
uint64_t
6479
radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,
6480
const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)
6481
{
6482
RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);
6483
return radv_buffer_get_va(mem->bo);
6484
}
6485
6486
static inline unsigned
6487
si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
6488
{
6489
if (stencil)
6490
return plane->surface.u.legacy.zs.stencil_tiling_index[level];
6491
else
6492
return plane->surface.u.legacy.tiling_index[level];
6493
}
6494
6495
static uint32_t
6496
radv_surface_max_layer_count(struct radv_image_view *iview)
6497
{
6498
return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth
6499
: (iview->base_layer + iview->layer_count);
6500
}
6501
6502
static unsigned
6503
get_dcc_max_uncompressed_block_size(const struct radv_device *device,
6504
const struct radv_image_view *iview)
6505
{
6506
if (device->physical_device->rad_info.chip_class < GFX10 && iview->image->info.samples > 1) {
6507
if (iview->image->planes[0].surface.bpe == 1)
6508
return V_028C78_MAX_BLOCK_SIZE_64B;
6509
else if (iview->image->planes[0].surface.bpe == 2)
6510
return V_028C78_MAX_BLOCK_SIZE_128B;
6511
}
6512
6513
return V_028C78_MAX_BLOCK_SIZE_256B;
6514
}
6515
6516
static unsigned
6517
get_dcc_min_compressed_block_size(const struct radv_device *device)
6518
{
6519
if (!device->physical_device->rad_info.has_dedicated_vram) {
6520
/* amdvlk: [min-compressed-block-size] should be set to 32 for
6521
* dGPU and 64 for APU because all of our APUs to date use
6522
* DIMMs which have a request granularity size of 64B while all
6523
* other chips have a 32B request size.
6524
*/
6525
return V_028C78_MIN_BLOCK_SIZE_64B;
6526
}
6527
6528
return V_028C78_MIN_BLOCK_SIZE_32B;
6529
}
6530
6531
static uint32_t
6532
radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)
6533
{
6534
unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);
6535
unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);
6536
unsigned max_compressed_block_size;
6537
unsigned independent_128b_blocks;
6538
unsigned independent_64b_blocks;
6539
6540
if (!radv_dcc_enabled(iview->image, iview->base_mip))
6541
return 0;
6542
6543
/* For GFX9+ ac_surface computes values for us (except min_compressed
6544
* and max_uncompressed) */
6545
if (device->physical_device->rad_info.chip_class >= GFX9) {
6546
max_compressed_block_size =
6547
iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;
6548
independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;
6549
independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;
6550
} else {
6551
independent_128b_blocks = 0;
6552
6553
if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
6554
VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {
6555
/* If this DCC image is potentially going to be used in texture
6556
* fetches, we need some special settings.
6557
*/
6558
independent_64b_blocks = 1;
6559
max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;
6560
} else {
6561
/* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=
6562
* MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as
6563
* big as possible for better compression state.
6564
*/
6565
independent_64b_blocks = 0;
6566
max_compressed_block_size = max_uncompressed_block_size;
6567
}
6568
}
6569
6570
return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |
6571
S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |
6572
S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |
6573
S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |
6574
S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);
6575
}
6576
6577
void
6578
radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,
6579
struct radv_image_view *iview)
6580
{
6581
const struct util_format_description *desc;
6582
unsigned ntype, format, swap, endian;
6583
unsigned blend_clamp = 0, blend_bypass = 0;
6584
uint64_t va;
6585
const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];
6586
const struct radeon_surf *surf = &plane->surface;
6587
6588
desc = vk_format_description(iview->vk_format);
6589
6590
memset(cb, 0, sizeof(*cb));
6591
6592
/* Intensity is implemented as Red, so treat it that way. */
6593
cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1);
6594
6595
va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6596
6597
cb->cb_color_base = va >> 8;
6598
6599
if (device->physical_device->rad_info.chip_class >= GFX9) {
6600
if (device->physical_device->rad_info.chip_class >= GFX10) {
6601
cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
6602
S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
6603
S_028EE0_CMASK_PIPE_ALIGNED(1) |
6604
S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);
6605
} else {
6606
struct gfx9_surf_meta_flags meta = {
6607
.rb_aligned = 1,
6608
.pipe_aligned = 1,
6609
};
6610
6611
if (surf->meta_offset)
6612
meta = surf->u.gfx9.color.dcc;
6613
6614
cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |
6615
S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |
6616
S_028C74_RB_ALIGNED(meta.rb_aligned) |
6617
S_028C74_PIPE_ALIGNED(meta.pipe_aligned);
6618
cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);
6619
}
6620
6621
cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;
6622
cb->cb_color_base |= surf->tile_swizzle;
6623
} else {
6624
const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];
6625
unsigned pitch_tile_max, slice_tile_max, tile_mode_index;
6626
6627
cb->cb_color_base += level_info->offset_256B;
6628
if (level_info->mode == RADEON_SURF_MODE_2D)
6629
cb->cb_color_base |= surf->tile_swizzle;
6630
6631
pitch_tile_max = level_info->nblk_x / 8 - 1;
6632
slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;
6633
tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);
6634
6635
cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);
6636
cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);
6637
cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;
6638
6639
cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);
6640
6641
if (radv_image_has_fmask(iview->image)) {
6642
if (device->physical_device->rad_info.chip_class >= GFX7)
6643
cb->cb_color_pitch |=
6644
S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);
6645
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);
6646
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);
6647
} else {
6648
/* This must be set for fast clear to work without FMASK. */
6649
if (device->physical_device->rad_info.chip_class >= GFX7)
6650
cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);
6651
cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);
6652
cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);
6653
}
6654
}
6655
6656
/* CMASK variables */
6657
va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6658
va += surf->cmask_offset;
6659
cb->cb_color_cmask = va >> 8;
6660
6661
va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6662
va += surf->meta_offset;
6663
6664
if (radv_dcc_enabled(iview->image, iview->base_mip) &&
6665
device->physical_device->rad_info.chip_class <= GFX8)
6666
va += plane->surface.u.legacy.color.dcc_level[iview->base_mip].dcc_offset;
6667
6668
unsigned dcc_tile_swizzle = surf->tile_swizzle;
6669
dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;
6670
6671
cb->cb_dcc_base = va >> 8;
6672
cb->cb_dcc_base |= dcc_tile_swizzle;
6673
6674
/* GFX10 field has the same base shift as the GFX6 field. */
6675
uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6676
cb->cb_color_view =
6677
S_028C6C_SLICE_START(iview->base_layer) | S_028C6C_SLICE_MAX_GFX10(max_slice);
6678
6679
if (iview->image->info.samples > 1) {
6680
unsigned log_samples = util_logbase2(iview->image->info.samples);
6681
6682
cb->cb_color_attrib |=
6683
S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_samples);
6684
}
6685
6686
if (radv_image_has_fmask(iview->image)) {
6687
va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->fmask_offset;
6688
cb->cb_color_fmask = va >> 8;
6689
cb->cb_color_fmask |= surf->fmask_tile_swizzle;
6690
} else {
6691
cb->cb_color_fmask = cb->cb_color_base;
6692
}
6693
6694
ntype = radv_translate_color_numformat(iview->vk_format, desc,
6695
vk_format_get_first_non_void_channel(iview->vk_format));
6696
format = radv_translate_colorformat(iview->vk_format);
6697
if (format == V_028C70_COLOR_INVALID || ntype == ~0u)
6698
radv_finishme("Illegal color\n");
6699
swap = radv_translate_colorswap(iview->vk_format, false);
6700
endian = radv_colorformat_endian_swap(format);
6701
6702
/* blend clamp should be set for all NORM/SRGB types */
6703
if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||
6704
ntype == V_028C70_NUMBER_SRGB)
6705
blend_clamp = 1;
6706
6707
/* set blend bypass according to docs if SINT/UINT or
6708
8/24 COLOR variants */
6709
if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||
6710
format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||
6711
format == V_028C70_COLOR_X24_8_32_FLOAT) {
6712
blend_clamp = 0;
6713
blend_bypass = 1;
6714
}
6715
#if 0
6716
if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&
6717
(format == V_028C70_COLOR_8 ||
6718
format == V_028C70_COLOR_8_8 ||
6719
format == V_028C70_COLOR_8_8_8_8))
6720
->color_is_int8 = true;
6721
#endif
6722
cb->cb_color_info =
6723
S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |
6724
S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |
6725
S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&
6726
ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&
6727
format != V_028C70_COLOR_24_8) |
6728
S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian);
6729
if (radv_image_has_fmask(iview->image)) {
6730
cb->cb_color_info |= S_028C70_COMPRESSION(1);
6731
if (device->physical_device->rad_info.chip_class == GFX6) {
6732
unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);
6733
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);
6734
}
6735
6736
if (radv_image_is_tc_compat_cmask(iview->image)) {
6737
/* Allow the texture block to read FMASK directly
6738
* without decompressing it. This bit must be cleared
6739
* when performing FMASK_DECOMPRESS or DCC_COMPRESS,
6740
* otherwise the operation doesn't happen.
6741
*/
6742
cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);
6743
6744
if (device->physical_device->rad_info.chip_class == GFX8) {
6745
/* Set CMASK into a tiling format that allows
6746
* the texture block to read it.
6747
*/
6748
cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);
6749
}
6750
}
6751
}
6752
6753
if (radv_image_has_cmask(iview->image) &&
6754
!(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))
6755
cb->cb_color_info |= S_028C70_FAST_CLEAR(1);
6756
6757
if (radv_dcc_enabled(iview->image, iview->base_mip))
6758
cb->cb_color_info |= S_028C70_DCC_ENABLE(1);
6759
6760
cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);
6761
6762
/* This must be set for fast clear to work without FMASK. */
6763
if (!radv_image_has_fmask(iview->image) &&
6764
device->physical_device->rad_info.chip_class == GFX6) {
6765
unsigned bankh = util_logbase2(surf->u.legacy.bankh);
6766
cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);
6767
}
6768
6769
if (device->physical_device->rad_info.chip_class >= GFX9) {
6770
unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D
6771
? (iview->extent.depth - 1)
6772
: (iview->image->info.array_size - 1);
6773
unsigned width =
6774
vk_format_get_plane_width(iview->image->vk_format, iview->plane_id, iview->extent.width);
6775
unsigned height =
6776
vk_format_get_plane_height(iview->image->vk_format, iview->plane_id, iview->extent.height);
6777
6778
if (device->physical_device->rad_info.chip_class >= GFX10) {
6779
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);
6780
6781
cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |
6782
S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |
6783
S_028EE0_RESOURCE_LEVEL(1);
6784
} else {
6785
cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);
6786
cb->cb_color_attrib |=
6787
S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);
6788
}
6789
6790
cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) |
6791
S_028C68_MAX_MIP(iview->image->info.levels - 1);
6792
}
6793
}
6794
6795
static unsigned
6796
radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_view *iview)
6797
{
6798
unsigned max_zplanes = 0;
6799
6800
assert(radv_image_is_tc_compat_htile(iview->image));
6801
6802
if (device->physical_device->rad_info.chip_class >= GFX9) {
6803
/* Default value for 32-bit depth surfaces. */
6804
max_zplanes = 4;
6805
6806
if (iview->vk_format == VK_FORMAT_D16_UNORM && iview->image->info.samples > 1)
6807
max_zplanes = 2;
6808
6809
/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */
6810
if (device->physical_device->rad_info.has_two_planes_iterate256_bug &&
6811
radv_image_get_iterate256(device, iview->image) &&
6812
!radv_image_tile_stencil_disabled(device, iview->image) &&
6813
iview->image->info.samples == 4) {
6814
max_zplanes = 1;
6815
}
6816
6817
max_zplanes = max_zplanes + 1;
6818
} else {
6819
if (iview->vk_format == VK_FORMAT_D16_UNORM) {
6820
/* Do not enable Z plane compression for 16-bit depth
6821
* surfaces because isn't supported on GFX8. Only
6822
* 32-bit depth surfaces are supported by the hardware.
6823
* This allows to maintain shader compatibility and to
6824
* reduce the number of depth decompressions.
6825
*/
6826
max_zplanes = 1;
6827
} else {
6828
if (iview->image->info.samples <= 1)
6829
max_zplanes = 5;
6830
else if (iview->image->info.samples <= 4)
6831
max_zplanes = 3;
6832
else
6833
max_zplanes = 2;
6834
}
6835
}
6836
6837
return max_zplanes;
6838
}
6839
6840
void
6841
radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,
6842
struct radv_image_view *iview)
6843
{
6844
unsigned level = iview->base_mip;
6845
unsigned format, stencil_format;
6846
uint64_t va, s_offs, z_offs;
6847
bool stencil_only = iview->image->vk_format == VK_FORMAT_S8_UINT;
6848
const struct radv_image_plane *plane = &iview->image->planes[0];
6849
const struct radeon_surf *surf = &plane->surface;
6850
6851
assert(vk_format_get_plane_count(iview->image->vk_format) == 1);
6852
6853
memset(ds, 0, sizeof(*ds));
6854
if (!device->instance->absolute_depth_bias) {
6855
switch (iview->image->vk_format) {
6856
case VK_FORMAT_D24_UNORM_S8_UINT:
6857
case VK_FORMAT_X8_D24_UNORM_PACK32:
6858
ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);
6859
break;
6860
case VK_FORMAT_D16_UNORM:
6861
case VK_FORMAT_D16_UNORM_S8_UINT:
6862
ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);
6863
break;
6864
case VK_FORMAT_D32_SFLOAT:
6865
case VK_FORMAT_D32_SFLOAT_S8_UINT:
6866
ds->pa_su_poly_offset_db_fmt_cntl =
6867
S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);
6868
break;
6869
default:
6870
break;
6871
}
6872
}
6873
6874
format = radv_translate_dbformat(iview->image->vk_format);
6875
stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;
6876
6877
uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;
6878
ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | S_028008_SLICE_MAX(max_slice);
6879
if (device->physical_device->rad_info.chip_class >= GFX10) {
6880
ds->db_depth_view |=
6881
S_028008_SLICE_START_HI(iview->base_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);
6882
}
6883
6884
ds->db_htile_data_base = 0;
6885
ds->db_htile_surface = 0;
6886
6887
va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;
6888
s_offs = z_offs = va;
6889
6890
if (device->physical_device->rad_info.chip_class >= GFX9) {
6891
assert(surf->u.gfx9.surf_offset == 0);
6892
s_offs += surf->u.gfx9.zs.stencil_offset;
6893
6894
ds->db_z_info = S_028038_FORMAT(format) |
6895
S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |
6896
S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |
6897
S_028038_MAXMIP(iview->image->info.levels - 1) | S_028038_ZRANGE_PRECISION(1);
6898
ds->db_stencil_info =
6899
S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);
6900
6901
if (device->physical_device->rad_info.chip_class == GFX9) {
6902
ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);
6903
ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);
6904
}
6905
6906
ds->db_depth_view |= S_028008_MIPID(level);
6907
ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |
6908
S_02801C_Y_MAX(iview->image->info.height - 1);
6909
6910
if (radv_htile_enabled(iview->image, level)) {
6911
ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);
6912
6913
if (radv_image_is_tc_compat_htile(iview->image)) {
6914
unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
6915
6916
ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
6917
6918
if (device->physical_device->rad_info.chip_class >= GFX10) {
6919
bool iterate256 = radv_image_get_iterate256(device, iview->image);
6920
6921
ds->db_z_info |= S_028040_ITERATE_FLUSH(1);
6922
ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);
6923
ds->db_z_info |= S_028040_ITERATE_256(iterate256);
6924
ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);
6925
} else {
6926
ds->db_z_info |= S_028038_ITERATE_FLUSH(1);
6927
ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);
6928
}
6929
}
6930
6931
if (radv_image_tile_stencil_disabled(device, iview->image)) {
6932
ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);
6933
}
6934
6935
va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
6936
ds->db_htile_data_base = va >> 8;
6937
ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);
6938
6939
if (device->physical_device->rad_info.chip_class == GFX9) {
6940
ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);
6941
}
6942
6943
if (radv_image_has_vrs_htile(device, iview->image)) {
6944
ds->db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);
6945
}
6946
}
6947
} else {
6948
const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];
6949
6950
if (stencil_only)
6951
level_info = &surf->u.legacy.zs.stencil_level[level];
6952
6953
z_offs += (uint64_t)surf->u.legacy.level[level].offset_256B * 256;
6954
s_offs += (uint64_t)surf->u.legacy.zs.stencil_level[level].offset_256B * 256;
6955
6956
ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));
6957
ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);
6958
ds->db_stencil_info = S_028044_FORMAT(stencil_format);
6959
6960
if (iview->image->info.samples > 1)
6961
ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));
6962
6963
if (device->physical_device->rad_info.chip_class >= GFX7) {
6964
struct radeon_info *info = &device->physical_device->rad_info;
6965
unsigned tiling_index = surf->u.legacy.tiling_index[level];
6966
unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];
6967
unsigned macro_index = surf->u.legacy.macro_tile_index;
6968
unsigned tile_mode = info->si_tile_mode_array[tiling_index];
6969
unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];
6970
unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];
6971
6972
if (stencil_only)
6973
tile_mode = stencil_tile_mode;
6974
6975
ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |
6976
S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |
6977
S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |
6978
S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |
6979
S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |
6980
S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));
6981
ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));
6982
ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));
6983
} else {
6984
unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);
6985
ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
6986
tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);
6987
ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);
6988
if (stencil_only)
6989
ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);
6990
}
6991
6992
ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |
6993
S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);
6994
ds->db_depth_slice =
6995
S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);
6996
6997
if (radv_htile_enabled(iview->image, level)) {
6998
ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);
6999
7000
if (radv_image_tile_stencil_disabled(device, iview->image)) {
7001
ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);
7002
}
7003
7004
va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;
7005
ds->db_htile_data_base = va >> 8;
7006
ds->db_htile_surface = S_028ABC_FULL_CACHE(1);
7007
7008
if (radv_image_is_tc_compat_htile(iview->image)) {
7009
unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);
7010
7011
ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);
7012
ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);
7013
}
7014
}
7015
}
7016
7017
ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;
7018
ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;
7019
}
7020
7021
VkResult
7022
radv_CreateFramebuffer(VkDevice _device, const VkFramebufferCreateInfo *pCreateInfo,
7023
const VkAllocationCallbacks *pAllocator, VkFramebuffer *pFramebuffer)
7024
{
7025
RADV_FROM_HANDLE(radv_device, device, _device);
7026
struct radv_framebuffer *framebuffer;
7027
const VkFramebufferAttachmentsCreateInfo *imageless_create_info =
7028
vk_find_struct_const(pCreateInfo->pNext, FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);
7029
7030
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
7031
7032
size_t size = sizeof(*framebuffer);
7033
if (!imageless_create_info)
7034
size += sizeof(struct radv_image_view *) * pCreateInfo->attachmentCount;
7035
framebuffer =
7036
vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7037
if (framebuffer == NULL)
7038
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7039
7040
vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER);
7041
7042
framebuffer->attachment_count = pCreateInfo->attachmentCount;
7043
framebuffer->width = pCreateInfo->width;
7044
framebuffer->height = pCreateInfo->height;
7045
framebuffer->layers = pCreateInfo->layers;
7046
framebuffer->imageless = !!imageless_create_info;
7047
7048
if (!imageless_create_info) {
7049
for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
7050
VkImageView _iview = pCreateInfo->pAttachments[i];
7051
struct radv_image_view *iview = radv_image_view_from_handle(_iview);
7052
framebuffer->attachments[i] = iview;
7053
}
7054
}
7055
7056
*pFramebuffer = radv_framebuffer_to_handle(framebuffer);
7057
return VK_SUCCESS;
7058
}
7059
7060
void
7061
radv_DestroyFramebuffer(VkDevice _device, VkFramebuffer _fb,
7062
const VkAllocationCallbacks *pAllocator)
7063
{
7064
RADV_FROM_HANDLE(radv_device, device, _device);
7065
RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);
7066
7067
if (!fb)
7068
return;
7069
vk_object_base_finish(&fb->base);
7070
vk_free2(&device->vk.alloc, pAllocator, fb);
7071
}
7072
7073
static unsigned
7074
radv_tex_wrap(VkSamplerAddressMode address_mode)
7075
{
7076
switch (address_mode) {
7077
case VK_SAMPLER_ADDRESS_MODE_REPEAT:
7078
return V_008F30_SQ_TEX_WRAP;
7079
case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:
7080
return V_008F30_SQ_TEX_MIRROR;
7081
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:
7082
return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;
7083
case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:
7084
return V_008F30_SQ_TEX_CLAMP_BORDER;
7085
case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:
7086
return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;
7087
default:
7088
unreachable("illegal tex wrap mode");
7089
break;
7090
}
7091
}
7092
7093
static unsigned
7094
radv_tex_compare(VkCompareOp op)
7095
{
7096
switch (op) {
7097
case VK_COMPARE_OP_NEVER:
7098
return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7099
case VK_COMPARE_OP_LESS:
7100
return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;
7101
case VK_COMPARE_OP_EQUAL:
7102
return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;
7103
case VK_COMPARE_OP_LESS_OR_EQUAL:
7104
return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;
7105
case VK_COMPARE_OP_GREATER:
7106
return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;
7107
case VK_COMPARE_OP_NOT_EQUAL:
7108
return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;
7109
case VK_COMPARE_OP_GREATER_OR_EQUAL:
7110
return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;
7111
case VK_COMPARE_OP_ALWAYS:
7112
return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;
7113
default:
7114
unreachable("illegal compare mode");
7115
break;
7116
}
7117
}
7118
7119
static unsigned
7120
radv_tex_filter(VkFilter filter, unsigned max_ansio)
7121
{
7122
switch (filter) {
7123
case VK_FILTER_NEAREST:
7124
return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT
7125
: V_008F38_SQ_TEX_XY_FILTER_POINT);
7126
case VK_FILTER_LINEAR:
7127
return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR
7128
: V_008F38_SQ_TEX_XY_FILTER_BILINEAR);
7129
case VK_FILTER_CUBIC_IMG:
7130
default:
7131
fprintf(stderr, "illegal texture filter");
7132
return 0;
7133
}
7134
}
7135
7136
static unsigned
7137
radv_tex_mipfilter(VkSamplerMipmapMode mode)
7138
{
7139
switch (mode) {
7140
case VK_SAMPLER_MIPMAP_MODE_NEAREST:
7141
return V_008F38_SQ_TEX_Z_FILTER_POINT;
7142
case VK_SAMPLER_MIPMAP_MODE_LINEAR:
7143
return V_008F38_SQ_TEX_Z_FILTER_LINEAR;
7144
default:
7145
return V_008F38_SQ_TEX_Z_FILTER_NONE;
7146
}
7147
}
7148
7149
static unsigned
7150
radv_tex_bordercolor(VkBorderColor bcolor)
7151
{
7152
switch (bcolor) {
7153
case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:
7154
case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:
7155
return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;
7156
case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:
7157
case VK_BORDER_COLOR_INT_OPAQUE_BLACK:
7158
return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;
7159
case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:
7160
case VK_BORDER_COLOR_INT_OPAQUE_WHITE:
7161
return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;
7162
case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:
7163
case VK_BORDER_COLOR_INT_CUSTOM_EXT:
7164
return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;
7165
default:
7166
break;
7167
}
7168
return 0;
7169
}
7170
7171
static unsigned
7172
radv_tex_aniso_filter(unsigned filter)
7173
{
7174
if (filter < 2)
7175
return 0;
7176
if (filter < 4)
7177
return 1;
7178
if (filter < 8)
7179
return 2;
7180
if (filter < 16)
7181
return 3;
7182
return 4;
7183
}
7184
7185
static unsigned
7186
radv_tex_filter_mode(VkSamplerReductionMode mode)
7187
{
7188
switch (mode) {
7189
case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:
7190
return V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7191
case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:
7192
return V_008F30_SQ_IMG_FILTER_MODE_MIN;
7193
case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:
7194
return V_008F30_SQ_IMG_FILTER_MODE_MAX;
7195
default:
7196
break;
7197
}
7198
return 0;
7199
}
7200
7201
static uint32_t
7202
radv_get_max_anisotropy(struct radv_device *device, const VkSamplerCreateInfo *pCreateInfo)
7203
{
7204
if (device->force_aniso >= 0)
7205
return device->force_aniso;
7206
7207
if (pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0f)
7208
return (uint32_t)pCreateInfo->maxAnisotropy;
7209
7210
return 0;
7211
}
7212
7213
static inline int
7214
S_FIXED(float value, unsigned frac_bits)
7215
{
7216
return value * (1 << frac_bits);
7217
}
7218
7219
static uint32_t
7220
radv_register_border_color(struct radv_device *device, VkClearColorValue value)
7221
{
7222
uint32_t slot;
7223
7224
mtx_lock(&device->border_color_data.mutex);
7225
7226
for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {
7227
if (!device->border_color_data.used[slot]) {
7228
/* Copy to the GPU wrt endian-ness. */
7229
util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value,
7230
sizeof(VkClearColorValue));
7231
7232
device->border_color_data.used[slot] = true;
7233
break;
7234
}
7235
}
7236
7237
mtx_unlock(&device->border_color_data.mutex);
7238
7239
return slot;
7240
}
7241
7242
static void
7243
radv_unregister_border_color(struct radv_device *device, uint32_t slot)
7244
{
7245
mtx_lock(&device->border_color_data.mutex);
7246
7247
device->border_color_data.used[slot] = false;
7248
7249
mtx_unlock(&device->border_color_data.mutex);
7250
}
7251
7252
static void
7253
radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,
7254
const VkSamplerCreateInfo *pCreateInfo)
7255
{
7256
uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);
7257
uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);
7258
bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||
7259
device->physical_device->rad_info.chip_class == GFX9;
7260
unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;
7261
unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;
7262
bool trunc_coord =
7263
pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;
7264
bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7265
pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||
7266
pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;
7267
VkBorderColor border_color =
7268
uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7269
uint32_t border_color_ptr;
7270
7271
const struct VkSamplerReductionModeCreateInfo *sampler_reduction =
7272
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);
7273
if (sampler_reduction)
7274
filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);
7275
7276
if (pCreateInfo->compareEnable)
7277
depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);
7278
7279
sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;
7280
7281
if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||
7282
border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {
7283
const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =
7284
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);
7285
7286
assert(custom_border_color);
7287
7288
sampler->border_color_slot =
7289
radv_register_border_color(device, custom_border_color->customBorderColor);
7290
7291
/* Did we fail to find a slot? */
7292
if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {
7293
fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");
7294
border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;
7295
}
7296
}
7297
7298
/* If we don't have a custom color, set the ptr to 0 */
7299
border_color_ptr =
7300
sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;
7301
7302
sampler->state[0] =
7303
(S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |
7304
S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |
7305
S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |
7306
S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |
7307
S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |
7308
S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |
7309
S_008F30_DISABLE_CUBE_WRAP(0) | S_008F30_COMPAT_MODE(compat_mode) |
7310
S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));
7311
sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |
7312
S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |
7313
S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));
7314
sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |
7315
S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |
7316
S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |
7317
S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |
7318
S_008F38_MIP_POINT_PRECLAMP(0));
7319
sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |
7320
S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));
7321
7322
if (device->physical_device->rad_info.chip_class >= GFX10) {
7323
sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);
7324
} else {
7325
sampler->state[2] |=
7326
S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |
7327
S_008F38_FILTER_PREC_FIX(1) |
7328
S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);
7329
}
7330
}
7331
7332
VkResult
7333
radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo,
7334
const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)
7335
{
7336
RADV_FROM_HANDLE(radv_device, device, _device);
7337
struct radv_sampler *sampler;
7338
7339
const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =
7340
vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);
7341
7342
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);
7343
7344
sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,
7345
VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
7346
if (!sampler)
7347
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7348
7349
vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);
7350
7351
radv_init_sampler(device, sampler, pCreateInfo);
7352
7353
sampler->ycbcr_sampler =
7354
ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion)
7355
: NULL;
7356
*pSampler = radv_sampler_to_handle(sampler);
7357
7358
return VK_SUCCESS;
7359
}
7360
7361
void
7362
radv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator)
7363
{
7364
RADV_FROM_HANDLE(radv_device, device, _device);
7365
RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);
7366
7367
if (!sampler)
7368
return;
7369
7370
if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)
7371
radv_unregister_border_color(device, sampler->border_color_slot);
7372
7373
vk_object_base_finish(&sampler->base);
7374
vk_free2(&device->vk.alloc, pAllocator, sampler);
7375
}
7376
7377
PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
7378
vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)
7379
{
7380
/* For the full details on loader interface versioning, see
7381
* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
7382
* What follows is a condensed summary, to help you navigate the large and
7383
* confusing official doc.
7384
*
7385
* - Loader interface v0 is incompatible with later versions. We don't
7386
* support it.
7387
*
7388
* - In loader interface v1:
7389
* - The first ICD entrypoint called by the loader is
7390
* vk_icdGetInstanceProcAddr(). The ICD must statically expose this
7391
* entrypoint.
7392
* - The ICD must statically expose no other Vulkan symbol unless it is
7393
* linked with -Bsymbolic.
7394
* - Each dispatchable Vulkan handle created by the ICD must be
7395
* a pointer to a struct whose first member is VK_LOADER_DATA. The
7396
* ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
7397
* - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
7398
* vkDestroySurfaceKHR(). The ICD must be capable of working with
7399
* such loader-managed surfaces.
7400
*
7401
* - Loader interface v2 differs from v1 in:
7402
* - The first ICD entrypoint called by the loader is
7403
* vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
7404
* statically expose this entrypoint.
7405
*
7406
* - Loader interface v3 differs from v2 in:
7407
* - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
7408
* vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
7409
* because the loader no longer does so.
7410
*/
7411
*pSupportedVersion = MIN2(*pSupportedVersion, 4u);
7412
return VK_SUCCESS;
7413
}
7414
7415
VkResult
7416
radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)
7417
{
7418
RADV_FROM_HANDLE(radv_device, device, _device);
7419
RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);
7420
7421
assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);
7422
7423
/* At the moment, we support only the below handle types. */
7424
assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||
7425
pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);
7426
7427
bool ret = radv_get_memory_fd(device, memory, pFD);
7428
if (ret == false)
7429
return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);
7430
return VK_SUCCESS;
7431
}
7432
7433
static uint32_t
7434
radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,
7435
enum radeon_bo_domain domains, enum radeon_bo_flag flags,
7436
enum radeon_bo_flag ignore_flags)
7437
{
7438
/* Don't count GTT/CPU as relevant:
7439
*
7440
* - We're not fully consistent between the two.
7441
* - Sometimes VRAM gets VRAM|GTT.
7442
*/
7443
const enum radeon_bo_domain relevant_domains =
7444
RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;
7445
uint32_t bits = 0;
7446
for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {
7447
if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))
7448
continue;
7449
7450
if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))
7451
continue;
7452
7453
bits |= 1u << i;
7454
}
7455
7456
return bits;
7457
}
7458
7459
static uint32_t
7460
radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,
7461
enum radeon_bo_flag flags)
7462
{
7463
enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);
7464
uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7465
7466
if (!bits) {
7467
ignore_flags |= RADEON_FLAG_GTT_WC;
7468
bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7469
}
7470
7471
if (!bits) {
7472
ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;
7473
bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);
7474
}
7475
7476
return bits;
7477
}
7478
VkResult
7479
radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,
7480
int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties)
7481
{
7482
RADV_FROM_HANDLE(radv_device, device, _device);
7483
7484
switch (handleType) {
7485
case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {
7486
enum radeon_bo_domain domains;
7487
enum radeon_bo_flag flags;
7488
if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))
7489
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7490
7491
pMemoryFdProperties->memoryTypeBits =
7492
radv_compute_valid_memory_types(device->physical_device, domains, flags);
7493
return VK_SUCCESS;
7494
}
7495
default:
7496
/* The valid usage section for this function says:
7497
*
7498
* "handleType must not be one of the handle types defined as
7499
* opaque."
7500
*
7501
* So opaque handle types fall into the default "unsupported" case.
7502
*/
7503
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7504
}
7505
}
7506
7507
static VkResult
7508
radv_import_opaque_fd(struct radv_device *device, int fd, uint32_t *syncobj)
7509
{
7510
uint32_t syncobj_handle = 0;
7511
int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);
7512
if (ret != 0)
7513
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7514
7515
if (*syncobj)
7516
device->ws->destroy_syncobj(device->ws, *syncobj);
7517
7518
*syncobj = syncobj_handle;
7519
close(fd);
7520
7521
return VK_SUCCESS;
7522
}
7523
7524
static VkResult
7525
radv_import_sync_fd(struct radv_device *device, int fd, uint32_t *syncobj)
7526
{
7527
/* If we create a syncobj we do it locally so that if we have an error, we don't
7528
* leave a syncobj in an undetermined state in the fence. */
7529
uint32_t syncobj_handle = *syncobj;
7530
if (!syncobj_handle) {
7531
bool create_signaled = fd == -1 ? true : false;
7532
7533
int ret = device->ws->create_syncobj(device->ws, create_signaled, &syncobj_handle);
7534
if (ret) {
7535
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
7536
}
7537
} else {
7538
if (fd == -1)
7539
device->ws->signal_syncobj(device->ws, syncobj_handle, 0);
7540
}
7541
7542
if (fd != -1) {
7543
int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);
7544
if (ret)
7545
return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);
7546
close(fd);
7547
}
7548
7549
*syncobj = syncobj_handle;
7550
7551
return VK_SUCCESS;
7552
}
7553
7554
VkResult
7555
radv_ImportSemaphoreFdKHR(VkDevice _device,
7556
const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)
7557
{
7558
RADV_FROM_HANDLE(radv_device, device, _device);
7559
RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);
7560
VkResult result;
7561
struct radv_semaphore_part *dst = NULL;
7562
bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7563
7564
if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {
7565
assert(!timeline);
7566
dst = &sem->temporary;
7567
} else {
7568
dst = &sem->permanent;
7569
}
7570
7571
uint32_t syncobj =
7572
(dst->kind == RADV_SEMAPHORE_SYNCOBJ || dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ)
7573
? dst->syncobj
7574
: 0;
7575
7576
switch (pImportSemaphoreFdInfo->handleType) {
7577
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7578
result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7579
break;
7580
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7581
assert(!timeline);
7582
result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);
7583
break;
7584
default:
7585
unreachable("Unhandled semaphore handle type");
7586
}
7587
7588
if (result == VK_SUCCESS) {
7589
dst->syncobj = syncobj;
7590
dst->kind = RADV_SEMAPHORE_SYNCOBJ;
7591
if (timeline) {
7592
dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;
7593
dst->timeline_syncobj.max_point = 0;
7594
}
7595
}
7596
7597
return result;
7598
}
7599
7600
VkResult
7601
radv_GetSemaphoreFdKHR(VkDevice _device, const VkSemaphoreGetFdInfoKHR *pGetFdInfo, int *pFd)
7602
{
7603
RADV_FROM_HANDLE(radv_device, device, _device);
7604
RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);
7605
int ret;
7606
uint32_t syncobj_handle;
7607
7608
if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7609
assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||
7610
sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7611
syncobj_handle = sem->temporary.syncobj;
7612
} else {
7613
assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||
7614
sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);
7615
syncobj_handle = sem->permanent.syncobj;
7616
}
7617
7618
switch (pGetFdInfo->handleType) {
7619
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:
7620
ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);
7621
if (ret)
7622
return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7623
break;
7624
case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:
7625
ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);
7626
if (ret)
7627
return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7628
7629
if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {
7630
radv_destroy_semaphore_part(device, &sem->temporary);
7631
} else {
7632
device->ws->reset_syncobj(device->ws, syncobj_handle);
7633
}
7634
break;
7635
default:
7636
unreachable("Unhandled semaphore handle type");
7637
}
7638
7639
return VK_SUCCESS;
7640
}
7641
7642
void
7643
radv_GetPhysicalDeviceExternalSemaphoreProperties(
7644
VkPhysicalDevice physicalDevice,
7645
const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,
7646
VkExternalSemaphoreProperties *pExternalSemaphoreProperties)
7647
{
7648
RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);
7649
VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);
7650
7651
if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&
7652
pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7653
pExternalSemaphoreProperties->exportFromImportedHandleTypes =
7654
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7655
pExternalSemaphoreProperties->compatibleHandleTypes =
7656
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7657
pExternalSemaphoreProperties->externalSemaphoreFeatures =
7658
VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7659
VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7660
} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {
7661
pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7662
pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7663
pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7664
} else if (pExternalSemaphoreInfo->handleType ==
7665
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7666
pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {
7667
pExternalSemaphoreProperties->exportFromImportedHandleTypes =
7668
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
7669
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7670
pExternalSemaphoreProperties->compatibleHandleTypes =
7671
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |
7672
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;
7673
pExternalSemaphoreProperties->externalSemaphoreFeatures =
7674
VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7675
VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7676
} else if (pExternalSemaphoreInfo->handleType ==
7677
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {
7678
pExternalSemaphoreProperties->exportFromImportedHandleTypes =
7679
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7680
pExternalSemaphoreProperties->compatibleHandleTypes =
7681
VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;
7682
pExternalSemaphoreProperties->externalSemaphoreFeatures =
7683
VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |
7684
VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7685
} else {
7686
pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;
7687
pExternalSemaphoreProperties->compatibleHandleTypes = 0;
7688
pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;
7689
}
7690
}
7691
7692
VkResult
7693
radv_ImportFenceFdKHR(VkDevice _device, const VkImportFenceFdInfoKHR *pImportFenceFdInfo)
7694
{
7695
RADV_FROM_HANDLE(radv_device, device, _device);
7696
RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);
7697
struct radv_fence_part *dst = NULL;
7698
VkResult result;
7699
7700
if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {
7701
dst = &fence->temporary;
7702
} else {
7703
dst = &fence->permanent;
7704
}
7705
7706
uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;
7707
7708
switch (pImportFenceFdInfo->handleType) {
7709
case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7710
result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);
7711
break;
7712
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7713
result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);
7714
break;
7715
default:
7716
unreachable("Unhandled fence handle type");
7717
}
7718
7719
if (result == VK_SUCCESS) {
7720
dst->syncobj = syncobj;
7721
dst->kind = RADV_FENCE_SYNCOBJ;
7722
}
7723
7724
return result;
7725
}
7726
7727
VkResult
7728
radv_GetFenceFdKHR(VkDevice _device, const VkFenceGetFdInfoKHR *pGetFdInfo, int *pFd)
7729
{
7730
RADV_FROM_HANDLE(radv_device, device, _device);
7731
RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);
7732
int ret;
7733
7734
struct radv_fence_part *part =
7735
fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;
7736
7737
switch (pGetFdInfo->handleType) {
7738
case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:
7739
ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);
7740
if (ret)
7741
return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7742
break;
7743
case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:
7744
ret = device->ws->export_syncobj_to_sync_file(device->ws, part->syncobj, pFd);
7745
if (ret)
7746
return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);
7747
7748
if (part == &fence->temporary) {
7749
radv_destroy_fence_part(device, part);
7750
} else {
7751
device->ws->reset_syncobj(device->ws, part->syncobj);
7752
}
7753
break;
7754
default:
7755
unreachable("Unhandled fence handle type");
7756
}
7757
7758
return VK_SUCCESS;
7759
}
7760
7761
void
7762
radv_GetPhysicalDeviceExternalFenceProperties(
7763
VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,
7764
VkExternalFenceProperties *pExternalFenceProperties)
7765
{
7766
if (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||
7767
pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {
7768
pExternalFenceProperties->exportFromImportedHandleTypes =
7769
VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7770
pExternalFenceProperties->compatibleHandleTypes =
7771
VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;
7772
pExternalFenceProperties->externalFenceFeatures =
7773
VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;
7774
} else {
7775
pExternalFenceProperties->exportFromImportedHandleTypes = 0;
7776
pExternalFenceProperties->compatibleHandleTypes = 0;
7777
pExternalFenceProperties->externalFenceFeatures = 0;
7778
}
7779
}
7780
7781
void
7782
radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex,
7783
uint32_t localDeviceIndex, uint32_t remoteDeviceIndex,
7784
VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)
7785
{
7786
assert(localDeviceIndex == remoteDeviceIndex);
7787
7788
*pPeerMemoryFeatures =
7789
VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |
7790
VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;
7791
}
7792
7793
static const VkTimeDomainEXT radv_time_domains[] = {
7794
VK_TIME_DOMAIN_DEVICE_EXT,
7795
VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,
7796
#ifdef CLOCK_MONOTONIC_RAW
7797
VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,
7798
#endif
7799
};
7800
7801
VkResult
7802
radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,
7803
uint32_t *pTimeDomainCount,
7804
VkTimeDomainEXT *pTimeDomains)
7805
{
7806
int d;
7807
VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);
7808
7809
for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {
7810
vk_outarray_append_typed(VkTimeDomainEXT, &out, i)
7811
{
7812
*i = radv_time_domains[d];
7813
}
7814
}
7815
7816
return vk_outarray_status(&out);
7817
}
7818
7819
#ifndef _WIN32
7820
static uint64_t
7821
radv_clock_gettime(clockid_t clock_id)
7822
{
7823
struct timespec current;
7824
int ret;
7825
7826
ret = clock_gettime(clock_id, &current);
7827
#ifdef CLOCK_MONOTONIC_RAW
7828
if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)
7829
ret = clock_gettime(CLOCK_MONOTONIC, &current);
7830
#endif
7831
if (ret < 0)
7832
return 0;
7833
7834
return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec;
7835
}
7836
7837
VkResult
7838
radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount,
7839
const VkCalibratedTimestampInfoEXT *pTimestampInfos,
7840
uint64_t *pTimestamps, uint64_t *pMaxDeviation)
7841
{
7842
RADV_FROM_HANDLE(radv_device, device, _device);
7843
uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;
7844
int d;
7845
uint64_t begin, end;
7846
uint64_t max_clock_period = 0;
7847
7848
#ifdef CLOCK_MONOTONIC_RAW
7849
begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7850
#else
7851
begin = radv_clock_gettime(CLOCK_MONOTONIC);
7852
#endif
7853
7854
for (d = 0; d < timestampCount; d++) {
7855
switch (pTimestampInfos[d].timeDomain) {
7856
case VK_TIME_DOMAIN_DEVICE_EXT:
7857
pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);
7858
uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);
7859
max_clock_period = MAX2(max_clock_period, device_period);
7860
break;
7861
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:
7862
pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);
7863
max_clock_period = MAX2(max_clock_period, 1);
7864
break;
7865
7866
#ifdef CLOCK_MONOTONIC_RAW
7867
case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:
7868
pTimestamps[d] = begin;
7869
break;
7870
#endif
7871
default:
7872
pTimestamps[d] = 0;
7873
break;
7874
}
7875
}
7876
7877
#ifdef CLOCK_MONOTONIC_RAW
7878
end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);
7879
#else
7880
end = radv_clock_gettime(CLOCK_MONOTONIC);
7881
#endif
7882
7883
/*
7884
* The maximum deviation is the sum of the interval over which we
7885
* perform the sampling and the maximum period of any sampled
7886
* clock. That's because the maximum skew between any two sampled
7887
* clock edges is when the sampled clock with the largest period is
7888
* sampled at the end of that period but right at the beginning of the
7889
* sampling interval and some other clock is sampled right at the
7890
* begining of its sampling period and right at the end of the
7891
* sampling interval. Let's assume the GPU has the longest clock
7892
* period and that the application is sampling GPU and monotonic:
7893
*
7894
* s e
7895
* w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f
7896
* Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7897
*
7898
* g
7899
* 0 1 2 3
7900
* GPU -----_____-----_____-----_____-----_____
7901
*
7902
* m
7903
* x y z 0 1 2 3 4 5 6 7 8 9 a b c
7904
* Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-
7905
*
7906
* Interval <----------------->
7907
* Deviation <-------------------------->
7908
*
7909
* s = read(raw) 2
7910
* g = read(GPU) 1
7911
* m = read(monotonic) 2
7912
* e = read(raw) b
7913
*
7914
* We round the sample interval up by one tick to cover sampling error
7915
* in the interval clock
7916
*/
7917
7918
uint64_t sample_interval = end - begin + 1;
7919
7920
*pMaxDeviation = sample_interval + max_clock_period;
7921
7922
return VK_SUCCESS;
7923
}
7924
#endif
7925
7926
void
7927
radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,
7928
VkSampleCountFlagBits samples,
7929
VkMultisamplePropertiesEXT *pMultisampleProperties)
7930
{
7931
RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);
7932
VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;
7933
7934
if (physical_device->rad_info.chip_class < GFX10)
7935
supported_samples |= VK_SAMPLE_COUNT_8_BIT;
7936
7937
if (samples & supported_samples) {
7938
pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};
7939
} else {
7940
pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};
7941
}
7942
}
7943
7944
VkResult
7945
radv_GetPhysicalDeviceFragmentShadingRatesKHR(
7946
VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,
7947
VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)
7948
{
7949
VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,
7950
pFragmentShadingRateCount);
7951
7952
#define append_rate(w, h, s) \
7953
{ \
7954
VkPhysicalDeviceFragmentShadingRateKHR rate = { \
7955
.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \
7956
.sampleCounts = s, \
7957
.fragmentSize = {.width = w, .height = h}, \
7958
}; \
7959
vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \
7960
}
7961
7962
for (uint32_t x = 2; x >= 1; x--) {
7963
for (uint32_t y = 2; y >= 1; y--) {
7964
append_rate(x, y,
7965
VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |
7966
VK_SAMPLE_COUNT_8_BIT);
7967
}
7968
}
7969
#undef append_rate
7970
7971
return vk_outarray_status(&out);
7972
}
7973
7974