Path: blob/21.2-virgl/src/amd/vulkan/radv_device.c
7137 views
/*1* Copyright © 2016 Red Hat.2* Copyright © 2016 Bas Nieuwenhuizen3*4* based in part on anv driver which is:5* Copyright © 2015 Intel Corporation6*7* Permission is hereby granted, free of charge, to any person obtaining a8* copy of this software and associated documentation files (the "Software"),9* to deal in the Software without restriction, including without limitation10* the rights to use, copy, modify, merge, publish, distribute, sublicense,11* and/or sell copies of the Software, and to permit persons to whom the12* Software is furnished to do so, subject to the following conditions:13*14* The above copyright notice and this permission notice (including the next15* paragraph) shall be included in all copies or substantial portions of the16* Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR19* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,20* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL21* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER22* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING23* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS24* IN THE SOFTWARE.25*/2627#include <fcntl.h>28#include <stdbool.h>29#include <string.h>3031#ifdef __FreeBSD__32#include <sys/types.h>33#elif !defined(_WIN32)34#include <sys/sysmacros.h>35#endif3637#include "util/debug.h"38#include "util/disk_cache.h"39#include "radv_cs.h"40#include "radv_debug.h"41#include "radv_private.h"42#include "radv_shader.h"43#include "vk_util.h"44#ifdef _WIN3245typedef void *drmDevicePtr;46#include <io.h>47#else48#include <amdgpu.h>49#include <xf86drm.h>50#include "drm-uapi/amdgpu_drm.h"51#include "winsys/amdgpu/radv_amdgpu_winsys_public.h"52#endif53#include "util/build_id.h"54#include "util/debug.h"55#include "util/driconf.h"56#include "util/mesa-sha1.h"57#include "util/timespec.h"58#include "util/u_atomic.h"59#include "winsys/null/radv_null_winsys_public.h"60#include "ac_llvm_util.h"61#include "git_sha1.h"62#include "sid.h"63#include "vk_format.h"64#include "vulkan/vk_icd.h"6566/* The number of IBs per submit isn't infinite, it depends on the ring type67* (ie. some initial setup needed for a submit) and the number of IBs (4 DW).68* This limit is arbitrary but should be safe for now. Ideally, we should get69* this limit from the KMD.70*/71#define RADV_MAX_IBS_PER_SUBMIT 1927273/* The "RAW" clocks on Linux are called "FAST" on FreeBSD */74#if !defined(CLOCK_MONOTONIC_RAW) && defined(CLOCK_MONOTONIC_FAST)75#define CLOCK_MONOTONIC_RAW CLOCK_MONOTONIC_FAST76#endif7778static struct radv_timeline_point *79radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,80uint64_t p);8182static struct radv_timeline_point *radv_timeline_add_point_locked(struct radv_device *device,83struct radv_timeline *timeline,84uint64_t p);8586static void radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,87struct list_head *processing_list);8889static void radv_destroy_semaphore_part(struct radv_device *device,90struct radv_semaphore_part *part);9192uint64_t93radv_get_current_time(void)94{95return os_time_get_nano();96}9798static uint64_t99radv_get_absolute_timeout(uint64_t timeout)100{101uint64_t current_time = radv_get_current_time();102103timeout = MIN2(UINT64_MAX - current_time, timeout);104105return current_time + timeout;106}107108static int109radv_device_get_cache_uuid(enum radeon_family family, void *uuid)110{111struct mesa_sha1 ctx;112unsigned char sha1[20];113unsigned ptr_size = sizeof(void *);114115memset(uuid, 0, VK_UUID_SIZE);116_mesa_sha1_init(&ctx);117118if (!disk_cache_get_function_identifier(radv_device_get_cache_uuid, &ctx) ||119!disk_cache_get_function_identifier(LLVMInitializeAMDGPUTargetInfo, &ctx))120return -1;121122_mesa_sha1_update(&ctx, &family, sizeof(family));123_mesa_sha1_update(&ctx, &ptr_size, sizeof(ptr_size));124_mesa_sha1_final(&ctx, sha1);125126memcpy(uuid, sha1, VK_UUID_SIZE);127return 0;128}129130static void131radv_get_driver_uuid(void *uuid)132{133ac_compute_driver_uuid(uuid, VK_UUID_SIZE);134}135136static void137radv_get_device_uuid(struct radeon_info *info, void *uuid)138{139ac_compute_device_uuid(info, uuid, VK_UUID_SIZE);140}141142static uint64_t143radv_get_adjusted_vram_size(struct radv_physical_device *device)144{145int ov = driQueryOptioni(&device->instance->dri_options, "override_vram_size");146if (ov >= 0)147return MIN2(device->rad_info.vram_size, (uint64_t)ov << 20);148return device->rad_info.vram_size;149}150151static uint64_t152radv_get_visible_vram_size(struct radv_physical_device *device)153{154return MIN2(radv_get_adjusted_vram_size(device), device->rad_info.vram_vis_size);155}156157static uint64_t158radv_get_vram_size(struct radv_physical_device *device)159{160uint64_t total_size = radv_get_adjusted_vram_size(device);161return total_size - MIN2(total_size, device->rad_info.vram_vis_size);162}163164enum radv_heap {165RADV_HEAP_VRAM = 1 << 0,166RADV_HEAP_GTT = 1 << 1,167RADV_HEAP_VRAM_VIS = 1 << 2,168RADV_HEAP_MAX = 1 << 3,169};170171static void172radv_physical_device_init_mem_types(struct radv_physical_device *device)173{174uint64_t visible_vram_size = radv_get_visible_vram_size(device);175uint64_t vram_size = radv_get_vram_size(device);176uint64_t gtt_size = device->rad_info.gart_size;177int vram_index = -1, visible_vram_index = -1, gart_index = -1;178179device->memory_properties.memoryHeapCount = 0;180device->heaps = 0;181182if (!device->rad_info.has_dedicated_vram) {183/* On APUs, the carveout is usually too small for games that request a minimum VRAM size184* greater than it. To workaround this, we compute the total available memory size (GTT +185* visible VRAM size) and report 2/3 as VRAM and 1/3 as GTT.186*/187const uint64_t total_size = gtt_size + visible_vram_size;188visible_vram_size = align64((total_size * 2) / 3, device->rad_info.gart_page_size);189gtt_size = total_size - visible_vram_size;190vram_size = 0;191}192193/* Only get a VRAM heap if it is significant, not if it is a 16 MiB194* remainder above visible VRAM. */195if (vram_size > 0 && vram_size * 9 >= visible_vram_size) {196vram_index = device->memory_properties.memoryHeapCount++;197device->heaps |= RADV_HEAP_VRAM;198device->memory_properties.memoryHeaps[vram_index] = (VkMemoryHeap){199.size = vram_size,200.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,201};202}203204if (gtt_size > 0) {205gart_index = device->memory_properties.memoryHeapCount++;206device->heaps |= RADV_HEAP_GTT;207device->memory_properties.memoryHeaps[gart_index] = (VkMemoryHeap){208.size = gtt_size,209.flags = 0,210};211}212213if (visible_vram_size) {214visible_vram_index = device->memory_properties.memoryHeapCount++;215device->heaps |= RADV_HEAP_VRAM_VIS;216device->memory_properties.memoryHeaps[visible_vram_index] = (VkMemoryHeap){217.size = visible_vram_size,218.flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,219};220}221222unsigned type_count = 0;223224if (vram_index >= 0 || visible_vram_index >= 0) {225device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;226device->memory_flags[type_count] = RADEON_FLAG_NO_CPU_ACCESS;227device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){228.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT,229.heapIndex = vram_index >= 0 ? vram_index : visible_vram_index,230};231}232233if (gart_index >= 0) {234device->memory_domains[type_count] = RADEON_DOMAIN_GTT;235device->memory_flags[type_count] = RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS;236device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){237.propertyFlags =238VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,239.heapIndex = gart_index,240};241}242if (visible_vram_index >= 0) {243device->memory_domains[type_count] = RADEON_DOMAIN_VRAM;244device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;245device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){246.propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |247VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |248VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,249.heapIndex = visible_vram_index,250};251}252253if (gart_index >= 0) {254device->memory_domains[type_count] = RADEON_DOMAIN_GTT;255device->memory_flags[type_count] = RADEON_FLAG_CPU_ACCESS;256device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){257.propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |258VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_CACHED_BIT,259.heapIndex = gart_index,260};261}262device->memory_properties.memoryTypeCount = type_count;263264if (device->rad_info.has_l2_uncached) {265for (int i = 0; i < device->memory_properties.memoryTypeCount; i++) {266VkMemoryType mem_type = device->memory_properties.memoryTypes[i];267268if ((mem_type.propertyFlags &269(VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT)) ||270mem_type.propertyFlags == VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT) {271272VkMemoryPropertyFlags property_flags = mem_type.propertyFlags |273VK_MEMORY_PROPERTY_DEVICE_COHERENT_BIT_AMD |274VK_MEMORY_PROPERTY_DEVICE_UNCACHED_BIT_AMD;275276device->memory_domains[type_count] = device->memory_domains[i];277device->memory_flags[type_count] = device->memory_flags[i] | RADEON_FLAG_VA_UNCACHED;278device->memory_properties.memoryTypes[type_count++] = (VkMemoryType){279.propertyFlags = property_flags,280.heapIndex = mem_type.heapIndex,281};282}283}284device->memory_properties.memoryTypeCount = type_count;285}286}287288static const char *289radv_get_compiler_string(struct radv_physical_device *pdevice)290{291if (!pdevice->use_llvm) {292/* Some games like SotTR apply shader workarounds if the LLVM293* version is too old or if the LLVM version string is294* missing. This gives 2-5% performance with SotTR and ACO.295*/296if (driQueryOptionb(&pdevice->instance->dri_options, "radv_report_llvm9_version_string")) {297return " (LLVM 9.0.1)";298}299300return "";301}302303return " (LLVM " MESA_LLVM_VERSION_STRING ")";304}305306int307radv_get_int_debug_option(const char *name, int default_value)308{309const char *str;310int result;311312str = getenv(name);313if (!str) {314result = default_value;315} else {316char *endptr;317318result = strtol(str, &endptr, 0);319if (str == endptr) {320/* No digits founs. */321result = default_value;322}323}324325return result;326}327328static bool329radv_thread_trace_enabled()330{331return radv_get_int_debug_option("RADV_THREAD_TRACE", -1) >= 0 ||332getenv("RADV_THREAD_TRACE_TRIGGER");333}334335#if defined(VK_USE_PLATFORM_WAYLAND_KHR) || defined(VK_USE_PLATFORM_XCB_KHR) || \336defined(VK_USE_PLATFORM_XLIB_KHR) || defined(VK_USE_PLATFORM_DISPLAY_KHR)337#define RADV_USE_WSI_PLATFORM338#endif339340#ifdef ANDROID341#define RADV_API_VERSION VK_MAKE_VERSION(1, 1, VK_HEADER_VERSION)342#else343#define RADV_API_VERSION VK_MAKE_VERSION(1, 2, VK_HEADER_VERSION)344#endif345346VkResult347radv_EnumerateInstanceVersion(uint32_t *pApiVersion)348{349*pApiVersion = RADV_API_VERSION;350return VK_SUCCESS;351}352353static const struct vk_instance_extension_table radv_instance_extensions_supported = {354.KHR_device_group_creation = true,355.KHR_external_fence_capabilities = true,356.KHR_external_memory_capabilities = true,357.KHR_external_semaphore_capabilities = true,358.KHR_get_physical_device_properties2 = true,359.EXT_debug_report = true,360361#ifdef RADV_USE_WSI_PLATFORM362.KHR_get_surface_capabilities2 = true,363.KHR_surface = true,364.KHR_surface_protected_capabilities = true,365#endif366#ifdef VK_USE_PLATFORM_WAYLAND_KHR367.KHR_wayland_surface = true,368#endif369#ifdef VK_USE_PLATFORM_XCB_KHR370.KHR_xcb_surface = true,371#endif372#ifdef VK_USE_PLATFORM_XLIB_KHR373.KHR_xlib_surface = true,374#endif375#ifdef VK_USE_PLATFORM_XLIB_XRANDR_EXT376.EXT_acquire_xlib_display = true,377#endif378#ifdef VK_USE_PLATFORM_DISPLAY_KHR379.KHR_display = true,380.KHR_get_display_properties2 = true,381.EXT_direct_mode_display = true,382.EXT_display_surface_counter = true,383.EXT_acquire_drm_display = true,384#endif385};386387static void388radv_physical_device_get_supported_extensions(const struct radv_physical_device *device,389struct vk_device_extension_table *ext)390{391*ext = (struct vk_device_extension_table){392.KHR_8bit_storage = true,393.KHR_16bit_storage = true,394.KHR_acceleration_structure = (device->instance->perftest_flags & RADV_PERFTEST_RT) &&395device->rad_info.chip_class >= GFX10_3,396.KHR_bind_memory2 = true,397.KHR_buffer_device_address = true,398.KHR_copy_commands2 = true,399.KHR_create_renderpass2 = true,400.KHR_dedicated_allocation = true,401.KHR_deferred_host_operations = true,402.KHR_depth_stencil_resolve = true,403.KHR_descriptor_update_template = true,404.KHR_device_group = true,405.KHR_draw_indirect_count = true,406.KHR_driver_properties = true,407.KHR_external_fence = true,408.KHR_external_fence_fd = true,409.KHR_external_memory = true,410.KHR_external_memory_fd = true,411.KHR_external_semaphore = true,412.KHR_external_semaphore_fd = true,413.KHR_fragment_shading_rate = device->rad_info.chip_class >= GFX10_3,414.KHR_get_memory_requirements2 = true,415.KHR_image_format_list = true,416.KHR_imageless_framebuffer = true,417#ifdef RADV_USE_WSI_PLATFORM418.KHR_incremental_present = true,419#endif420.KHR_maintenance1 = true,421.KHR_maintenance2 = true,422.KHR_maintenance3 = true,423.KHR_multiview = true,424.KHR_pipeline_executable_properties = true,425.KHR_push_descriptor = true,426.KHR_relaxed_block_layout = true,427.KHR_sampler_mirror_clamp_to_edge = true,428.KHR_sampler_ycbcr_conversion = true,429.KHR_separate_depth_stencil_layouts = true,430.KHR_shader_atomic_int64 = true,431.KHR_shader_clock = true,432.KHR_shader_draw_parameters = true,433.KHR_shader_float16_int8 = true,434.KHR_shader_float_controls = true,435.KHR_shader_non_semantic_info = true,436.KHR_shader_subgroup_extended_types = true,437.KHR_shader_subgroup_uniform_control_flow = true,438.KHR_shader_terminate_invocation = true,439.KHR_spirv_1_4 = true,440.KHR_storage_buffer_storage_class = true,441#ifdef RADV_USE_WSI_PLATFORM442.KHR_swapchain = true,443.KHR_swapchain_mutable_format = true,444#endif445.KHR_timeline_semaphore = true,446.KHR_uniform_buffer_standard_layout = true,447.KHR_variable_pointers = true,448.KHR_vulkan_memory_model = true,449.KHR_workgroup_memory_explicit_layout = true,450.KHR_zero_initialize_workgroup_memory = true,451.EXT_4444_formats = true,452.EXT_buffer_device_address = true,453.EXT_calibrated_timestamps = RADV_SUPPORT_CALIBRATED_TIMESTAMPS,454.EXT_color_write_enable = true,455.EXT_conditional_rendering = true,456.EXT_conservative_rasterization = device->rad_info.chip_class >= GFX9,457.EXT_custom_border_color = true,458.EXT_debug_marker = radv_thread_trace_enabled(),459.EXT_depth_clip_enable = true,460.EXT_depth_range_unrestricted = true,461.EXT_descriptor_indexing = true,462.EXT_discard_rectangles = true,463#ifdef VK_USE_PLATFORM_DISPLAY_KHR464.EXT_display_control = true,465#endif466.EXT_extended_dynamic_state = true,467.EXT_extended_dynamic_state2 = true,468.EXT_external_memory_dma_buf = true,469.EXT_external_memory_host = device->rad_info.has_userptr,470.EXT_global_priority = true,471.EXT_global_priority_query = true,472.EXT_host_query_reset = true,473.EXT_image_drm_format_modifier = device->rad_info.chip_class >= GFX9,474.EXT_image_robustness = true,475.EXT_index_type_uint8 = device->rad_info.chip_class >= GFX8,476.EXT_inline_uniform_block = true,477.EXT_line_rasterization = true,478.EXT_memory_budget = true,479.EXT_memory_priority = true,480.EXT_multi_draw = true,481.EXT_pci_bus_info = true,482#ifndef _WIN32483.EXT_physical_device_drm = true,484#endif485.EXT_pipeline_creation_cache_control = true,486.EXT_pipeline_creation_feedback = true,487.EXT_post_depth_coverage = device->rad_info.chip_class >= GFX10,488.EXT_private_data = true,489.EXT_provoking_vertex = true,490.EXT_queue_family_foreign = true,491.EXT_robustness2 = true,492.EXT_sample_locations = device->rad_info.chip_class < GFX10,493.EXT_sampler_filter_minmax = true,494.EXT_scalar_block_layout = device->rad_info.chip_class >= GFX7,495.EXT_shader_atomic_float = true,496.EXT_shader_demote_to_helper_invocation = true,497.EXT_shader_image_atomic_int64 = true,498.EXT_shader_stencil_export = true,499.EXT_shader_subgroup_ballot = true,500.EXT_shader_subgroup_vote = true,501.EXT_shader_viewport_index_layer = true,502.EXT_subgroup_size_control = true,503.EXT_texel_buffer_alignment = true,504.EXT_transform_feedback = true,505.EXT_vertex_attribute_divisor = true,506.EXT_ycbcr_image_arrays = true,507.AMD_buffer_marker = true,508.AMD_device_coherent_memory = true,509.AMD_draw_indirect_count = true,510.AMD_gcn_shader = true,511.AMD_gpu_shader_half_float = device->rad_info.has_packed_math_16bit,512.AMD_gpu_shader_int16 = device->rad_info.has_packed_math_16bit,513.AMD_memory_overallocation_behavior = true,514.AMD_mixed_attachment_samples = true,515.AMD_rasterization_order = device->rad_info.has_out_of_order_rast,516.AMD_shader_ballot = true,517.AMD_shader_core_properties = true,518.AMD_shader_core_properties2 = true,519.AMD_shader_explicit_vertex_parameter = true,520.AMD_shader_fragment_mask = true,521.AMD_shader_image_load_store_lod = true,522.AMD_shader_info = true,523.AMD_shader_trinary_minmax = true,524.AMD_texture_gather_bias_lod = true,525#ifdef ANDROID526.ANDROID_external_memory_android_hardware_buffer = RADV_SUPPORT_ANDROID_HARDWARE_BUFFER,527.ANDROID_native_buffer = true,528#endif529.GOOGLE_decorate_string = true,530.GOOGLE_hlsl_functionality1 = true,531.GOOGLE_user_type = true,532.NV_compute_shader_derivatives = true,533.VALVE_mutable_descriptor_type = true,534};535}536537static VkResult538radv_physical_device_try_create(struct radv_instance *instance, drmDevicePtr drm_device,539struct radv_physical_device **device_out)540{541VkResult result;542int fd = -1;543int master_fd = -1;544545#ifdef _WIN32546assert(drm_device == NULL);547#else548if (drm_device) {549const char *path = drm_device->nodes[DRM_NODE_RENDER];550drmVersionPtr version;551552fd = open(path, O_RDWR | O_CLOEXEC);553if (fd < 0) {554if (instance->debug_flags & RADV_DEBUG_STARTUP)555radv_logi("Could not open device '%s'", path);556557return vk_error(instance, VK_ERROR_INCOMPATIBLE_DRIVER);558}559560version = drmGetVersion(fd);561if (!version) {562close(fd);563564if (instance->debug_flags & RADV_DEBUG_STARTUP)565radv_logi("Could not get the kernel driver version for device '%s'", path);566567return vk_errorf(instance, VK_ERROR_INCOMPATIBLE_DRIVER, "failed to get version %s: %m",568path);569}570571if (strcmp(version->name, "amdgpu")) {572drmFreeVersion(version);573close(fd);574575if (instance->debug_flags & RADV_DEBUG_STARTUP)576radv_logi("Device '%s' is not using the amdgpu kernel driver.", path);577578return VK_ERROR_INCOMPATIBLE_DRIVER;579}580drmFreeVersion(version);581582if (instance->debug_flags & RADV_DEBUG_STARTUP)583radv_logi("Found compatible device '%s'.", path);584}585#endif586587struct radv_physical_device *device = vk_zalloc2(&instance->vk.alloc, NULL, sizeof(*device), 8,588VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);589if (!device) {590result = vk_error(instance, VK_ERROR_OUT_OF_HOST_MEMORY);591goto fail_fd;592}593594struct vk_physical_device_dispatch_table dispatch_table;595vk_physical_device_dispatch_table_from_entrypoints(&dispatch_table,596&radv_physical_device_entrypoints, true);597598result = vk_physical_device_init(&device->vk, &instance->vk, NULL, &dispatch_table);599if (result != VK_SUCCESS) {600goto fail_alloc;601}602603device->instance = instance;604605#ifdef _WIN32606device->ws = radv_null_winsys_create();607#else608if (drm_device) {609device->ws = radv_amdgpu_winsys_create(fd, instance->debug_flags, instance->perftest_flags, false);610} else {611device->ws = radv_null_winsys_create();612}613#endif614615if (!device->ws) {616result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "failed to initialize winsys");617goto fail_base;618}619620#ifndef _WIN32621if (drm_device && instance->vk.enabled_extensions.KHR_display) {622master_fd = open(drm_device->nodes[DRM_NODE_PRIMARY], O_RDWR | O_CLOEXEC);623if (master_fd >= 0) {624uint32_t accel_working = 0;625struct drm_amdgpu_info request = {.return_pointer = (uintptr_t)&accel_working,626.return_size = sizeof(accel_working),627.query = AMDGPU_INFO_ACCEL_WORKING};628629if (drmCommandWrite(master_fd, DRM_AMDGPU_INFO, &request, sizeof(struct drm_amdgpu_info)) <6300 ||631!accel_working) {632close(master_fd);633master_fd = -1;634}635}636}637#endif638639device->master_fd = master_fd;640device->local_fd = fd;641device->ws->query_info(device->ws, &device->rad_info);642643device->use_llvm = instance->debug_flags & RADV_DEBUG_LLVM;644645snprintf(device->name, sizeof(device->name), "AMD RADV %s%s", device->rad_info.name,646radv_get_compiler_string(device));647648#ifdef ENABLE_SHADER_CACHE649if (radv_device_get_cache_uuid(device->rad_info.family, device->cache_uuid)) {650result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED, "cannot generate UUID");651goto fail_wsi;652}653654/* These flags affect shader compilation. */655uint64_t shader_env_flags = (device->use_llvm ? 0 : 0x2);656657/* The gpu id is already embedded in the uuid so we just pass "radv"658* when creating the cache.659*/660char buf[VK_UUID_SIZE * 2 + 1];661disk_cache_format_hex_id(buf, device->cache_uuid, VK_UUID_SIZE * 2);662device->disk_cache = disk_cache_create(device->name, buf, shader_env_flags);663#endif664665if (device->rad_info.chip_class < GFX8 || device->rad_info.chip_class > GFX10)666vk_warn_non_conformant_implementation("radv");667668radv_get_driver_uuid(&device->driver_uuid);669radv_get_device_uuid(&device->rad_info, &device->device_uuid);670671device->out_of_order_rast_allowed =672device->rad_info.has_out_of_order_rast &&673!(device->instance->debug_flags & RADV_DEBUG_NO_OUT_OF_ORDER);674675device->dcc_msaa_allowed = (device->instance->perftest_flags & RADV_PERFTEST_DCC_MSAA);676677device->use_ngg = device->rad_info.chip_class >= GFX10 &&678device->rad_info.family != CHIP_NAVI14 &&679!(device->instance->debug_flags & RADV_DEBUG_NO_NGG);680681device->use_ngg_streamout = false;682683/* Determine the number of threads per wave for all stages. */684device->cs_wave_size = 64;685device->ps_wave_size = 64;686device->ge_wave_size = 64;687688if (device->rad_info.chip_class >= GFX10) {689if (device->instance->perftest_flags & RADV_PERFTEST_CS_WAVE_32)690device->cs_wave_size = 32;691692/* For pixel shaders, wave64 is recommanded. */693if (device->instance->perftest_flags & RADV_PERFTEST_PS_WAVE_32)694device->ps_wave_size = 32;695696if (device->instance->perftest_flags & RADV_PERFTEST_GE_WAVE_32)697device->ge_wave_size = 32;698}699700radv_physical_device_init_mem_types(device);701702radv_physical_device_get_supported_extensions(device, &device->vk.supported_extensions);703704#ifndef _WIN32705if (drm_device) {706struct stat primary_stat = {0}, render_stat = {0};707708device->available_nodes = drm_device->available_nodes;709device->bus_info = *drm_device->businfo.pci;710711if ((drm_device->available_nodes & (1 << DRM_NODE_PRIMARY)) &&712stat(drm_device->nodes[DRM_NODE_PRIMARY], &primary_stat) != 0) {713result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,714"failed to stat DRM primary node %s",715drm_device->nodes[DRM_NODE_PRIMARY]);716goto fail_disk_cache;717}718device->primary_devid = primary_stat.st_rdev;719720if ((drm_device->available_nodes & (1 << DRM_NODE_RENDER)) &&721stat(drm_device->nodes[DRM_NODE_RENDER], &render_stat) != 0) {722result = vk_errorf(instance, VK_ERROR_INITIALIZATION_FAILED,723"failed to stat DRM render node %s",724drm_device->nodes[DRM_NODE_RENDER]);725goto fail_disk_cache;726}727device->render_devid = render_stat.st_rdev;728}729#endif730731if ((device->instance->debug_flags & RADV_DEBUG_INFO))732ac_print_gpu_info(&device->rad_info, stdout);733734/* The WSI is structured as a layer on top of the driver, so this has735* to be the last part of initialization (at least until we get other736* semi-layers).737*/738result = radv_init_wsi(device);739if (result != VK_SUCCESS) {740vk_error(instance, result);741goto fail_disk_cache;742}743744*device_out = device;745746return VK_SUCCESS;747748fail_disk_cache:749disk_cache_destroy(device->disk_cache);750#ifdef ENABLE_SHADER_CACHE751fail_wsi:752#endif753device->ws->destroy(device->ws);754fail_base:755vk_physical_device_finish(&device->vk);756fail_alloc:757vk_free(&instance->vk.alloc, device);758fail_fd:759if (fd != -1)760close(fd);761if (master_fd != -1)762close(master_fd);763return result;764}765766static void767radv_physical_device_destroy(struct radv_physical_device *device)768{769radv_finish_wsi(device);770device->ws->destroy(device->ws);771disk_cache_destroy(device->disk_cache);772if (device->local_fd != -1)773close(device->local_fd);774if (device->master_fd != -1)775close(device->master_fd);776vk_physical_device_finish(&device->vk);777vk_free(&device->instance->vk.alloc, device);778}779780static const struct debug_control radv_debug_options[] = {781{"nofastclears", RADV_DEBUG_NO_FAST_CLEARS},782{"nodcc", RADV_DEBUG_NO_DCC},783{"shaders", RADV_DEBUG_DUMP_SHADERS},784{"nocache", RADV_DEBUG_NO_CACHE},785{"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS},786{"nohiz", RADV_DEBUG_NO_HIZ},787{"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE},788{"allbos", RADV_DEBUG_ALL_BOS},789{"noibs", RADV_DEBUG_NO_IBS},790{"spirv", RADV_DEBUG_DUMP_SPIRV},791{"vmfaults", RADV_DEBUG_VM_FAULTS},792{"zerovram", RADV_DEBUG_ZERO_VRAM},793{"syncshaders", RADV_DEBUG_SYNC_SHADERS},794{"preoptir", RADV_DEBUG_PREOPTIR},795{"nodynamicbounds", RADV_DEBUG_NO_DYNAMIC_BOUNDS},796{"nooutoforder", RADV_DEBUG_NO_OUT_OF_ORDER},797{"info", RADV_DEBUG_INFO},798{"errors", RADV_DEBUG_ERRORS},799{"startup", RADV_DEBUG_STARTUP},800{"checkir", RADV_DEBUG_CHECKIR},801{"nobinning", RADV_DEBUG_NOBINNING},802{"nongg", RADV_DEBUG_NO_NGG},803{"metashaders", RADV_DEBUG_DUMP_META_SHADERS},804{"nomemorycache", RADV_DEBUG_NO_MEMORY_CACHE},805{"discardtodemote", RADV_DEBUG_DISCARD_TO_DEMOTE},806{"llvm", RADV_DEBUG_LLVM},807{"forcecompress", RADV_DEBUG_FORCE_COMPRESS},808{"hang", RADV_DEBUG_HANG},809{"img", RADV_DEBUG_IMG},810{"noumr", RADV_DEBUG_NO_UMR},811{"invariantgeom", RADV_DEBUG_INVARIANT_GEOM},812{"nodisplaydcc", RADV_DEBUG_NO_DISPLAY_DCC},813{"notccompatcmask", RADV_DEBUG_NO_TC_COMPAT_CMASK},814{"novrsflatshading", RADV_DEBUG_NO_VRS_FLAT_SHADING},815{NULL, 0}};816817const char *818radv_get_debug_option_name(int id)819{820assert(id < ARRAY_SIZE(radv_debug_options) - 1);821return radv_debug_options[id].string;822}823824static const struct debug_control radv_perftest_options[] = {{"localbos", RADV_PERFTEST_LOCAL_BOS},825{"dccmsaa", RADV_PERFTEST_DCC_MSAA},826{"bolist", RADV_PERFTEST_BO_LIST},827{"cswave32", RADV_PERFTEST_CS_WAVE_32},828{"pswave32", RADV_PERFTEST_PS_WAVE_32},829{"gewave32", RADV_PERFTEST_GE_WAVE_32},830{"nosam", RADV_PERFTEST_NO_SAM},831{"sam", RADV_PERFTEST_SAM},832{"rt", RADV_PERFTEST_RT},833{"nggc", RADV_PERFTEST_NGGC},834{NULL, 0}};835836const char *837radv_get_perftest_option_name(int id)838{839assert(id < ARRAY_SIZE(radv_perftest_options) - 1);840return radv_perftest_options[id].string;841}842843// clang-format off844static const driOptionDescription radv_dri_options[] = {845DRI_CONF_SECTION_PERFORMANCE846DRI_CONF_ADAPTIVE_SYNC(true)847DRI_CONF_VK_X11_OVERRIDE_MIN_IMAGE_COUNT(0)848DRI_CONF_VK_X11_STRICT_IMAGE_COUNT(false)849DRI_CONF_VK_X11_ENSURE_MIN_IMAGE_COUNT(false)850DRI_CONF_RADV_REPORT_LLVM9_VERSION_STRING(false)851DRI_CONF_RADV_ENABLE_MRT_OUTPUT_NAN_FIXUP(false)852DRI_CONF_RADV_DISABLE_SHRINK_IMAGE_STORE(false)853DRI_CONF_RADV_NO_DYNAMIC_BOUNDS(false)854DRI_CONF_RADV_ABSOLUTE_DEPTH_BIAS(false)855DRI_CONF_RADV_OVERRIDE_UNIFORM_OFFSET_ALIGNMENT(0)856DRI_CONF_SECTION_END857858DRI_CONF_SECTION_DEBUG859DRI_CONF_OVERRIDE_VRAM_SIZE()860DRI_CONF_VK_WSI_FORCE_BGRA8_UNORM_FIRST(false)861DRI_CONF_RADV_ZERO_VRAM(false)862DRI_CONF_RADV_LOWER_DISCARD_TO_DEMOTE(false)863DRI_CONF_RADV_INVARIANT_GEOM(false)864DRI_CONF_RADV_DISABLE_TC_COMPAT_HTILE_GENERAL(false)865DRI_CONF_RADV_DISABLE_DCC(false)866DRI_CONF_RADV_REPORT_APU_AS_DGPU(false)867DRI_CONF_SECTION_END868};869// clang-format on870871static void872radv_init_dri_options(struct radv_instance *instance)873{874driParseOptionInfo(&instance->available_dri_options, radv_dri_options,875ARRAY_SIZE(radv_dri_options));876driParseConfigFiles(&instance->dri_options, &instance->available_dri_options, 0, "radv", NULL,877instance->vk.app_info.app_name, instance->vk.app_info.app_version,878instance->vk.app_info.engine_name, instance->vk.app_info.engine_version);879880instance->enable_mrt_output_nan_fixup =881driQueryOptionb(&instance->dri_options, "radv_enable_mrt_output_nan_fixup");882883instance->disable_shrink_image_store =884driQueryOptionb(&instance->dri_options, "radv_disable_shrink_image_store");885886instance->absolute_depth_bias =887driQueryOptionb(&instance->dri_options, "radv_absolute_depth_bias");888889instance->disable_tc_compat_htile_in_general =890driQueryOptionb(&instance->dri_options, "radv_disable_tc_compat_htile_general");891892if (driQueryOptionb(&instance->dri_options, "radv_no_dynamic_bounds"))893instance->debug_flags |= RADV_DEBUG_NO_DYNAMIC_BOUNDS;894895if (driQueryOptionb(&instance->dri_options, "radv_zero_vram"))896instance->debug_flags |= RADV_DEBUG_ZERO_VRAM;897898if (driQueryOptionb(&instance->dri_options, "radv_lower_discard_to_demote"))899instance->debug_flags |= RADV_DEBUG_DISCARD_TO_DEMOTE;900901if (driQueryOptionb(&instance->dri_options, "radv_invariant_geom"))902instance->debug_flags |= RADV_DEBUG_INVARIANT_GEOM;903904if (driQueryOptionb(&instance->dri_options, "radv_disable_dcc"))905instance->debug_flags |= RADV_DEBUG_NO_DCC;906907instance->report_apu_as_dgpu =908driQueryOptionb(&instance->dri_options, "radv_report_apu_as_dgpu");909}910911VkResult912radv_CreateInstance(const VkInstanceCreateInfo *pCreateInfo,913const VkAllocationCallbacks *pAllocator, VkInstance *pInstance)914{915struct radv_instance *instance;916VkResult result;917918if (!pAllocator)919pAllocator = vk_default_allocator();920921instance = vk_zalloc(pAllocator, sizeof(*instance), 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);922if (!instance)923return vk_error(NULL, VK_ERROR_OUT_OF_HOST_MEMORY);924925struct vk_instance_dispatch_table dispatch_table;926vk_instance_dispatch_table_from_entrypoints(&dispatch_table, &radv_instance_entrypoints, true);927result = vk_instance_init(&instance->vk, &radv_instance_extensions_supported, &dispatch_table,928pCreateInfo, pAllocator);929if (result != VK_SUCCESS) {930vk_free(pAllocator, instance);931return vk_error(instance, result);932}933934instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), radv_debug_options);935instance->perftest_flags = parse_debug_string(getenv("RADV_PERFTEST"), radv_perftest_options);936937if (instance->debug_flags & RADV_DEBUG_STARTUP)938radv_logi("Created an instance");939940instance->physical_devices_enumerated = false;941list_inithead(&instance->physical_devices);942943VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));944945radv_init_dri_options(instance);946947*pInstance = radv_instance_to_handle(instance);948949return VK_SUCCESS;950}951952void953radv_DestroyInstance(VkInstance _instance, const VkAllocationCallbacks *pAllocator)954{955RADV_FROM_HANDLE(radv_instance, instance, _instance);956957if (!instance)958return;959960list_for_each_entry_safe(struct radv_physical_device, pdevice, &instance->physical_devices, link)961{962radv_physical_device_destroy(pdevice);963}964965VG(VALGRIND_DESTROY_MEMPOOL(instance));966967driDestroyOptionCache(&instance->dri_options);968driDestroyOptionInfo(&instance->available_dri_options);969970vk_instance_finish(&instance->vk);971vk_free(&instance->vk.alloc, instance);972}973974static VkResult975radv_enumerate_physical_devices(struct radv_instance *instance)976{977if (instance->physical_devices_enumerated)978return VK_SUCCESS;979980instance->physical_devices_enumerated = true;981982VkResult result = VK_SUCCESS;983984if (getenv("RADV_FORCE_FAMILY")) {985/* When RADV_FORCE_FAMILY is set, the driver creates a nul986* device that allows to test the compiler without having an987* AMDGPU instance.988*/989struct radv_physical_device *pdevice;990991result = radv_physical_device_try_create(instance, NULL, &pdevice);992if (result != VK_SUCCESS)993return result;994995list_addtail(&pdevice->link, &instance->physical_devices);996return VK_SUCCESS;997}998999#ifndef _WIN321000/* TODO: Check for more devices ? */1001drmDevicePtr devices[8];1002int max_devices = drmGetDevices2(0, devices, ARRAY_SIZE(devices));10031004if (instance->debug_flags & RADV_DEBUG_STARTUP)1005radv_logi("Found %d drm nodes", max_devices);10061007if (max_devices < 1)1008return vk_error(instance, VK_SUCCESS);10091010for (unsigned i = 0; i < (unsigned)max_devices; i++) {1011if (devices[i]->available_nodes & 1 << DRM_NODE_RENDER &&1012devices[i]->bustype == DRM_BUS_PCI &&1013devices[i]->deviceinfo.pci->vendor_id == ATI_VENDOR_ID) {10141015struct radv_physical_device *pdevice;1016result = radv_physical_device_try_create(instance, devices[i], &pdevice);1017/* Incompatible DRM device, skip. */1018if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {1019result = VK_SUCCESS;1020continue;1021}10221023/* Error creating the physical device, report the error. */1024if (result != VK_SUCCESS)1025break;10261027list_addtail(&pdevice->link, &instance->physical_devices);1028}1029}1030drmFreeDevices(devices, max_devices);1031#endif10321033/* If we successfully enumerated any devices, call it success */1034return result;1035}10361037VkResult1038radv_EnumeratePhysicalDevices(VkInstance _instance, uint32_t *pPhysicalDeviceCount,1039VkPhysicalDevice *pPhysicalDevices)1040{1041RADV_FROM_HANDLE(radv_instance, instance, _instance);1042VK_OUTARRAY_MAKE_TYPED(VkPhysicalDevice, out, pPhysicalDevices, pPhysicalDeviceCount);10431044VkResult result = radv_enumerate_physical_devices(instance);1045if (result != VK_SUCCESS)1046return result;10471048list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)1049{1050vk_outarray_append_typed(VkPhysicalDevice, &out, i)1051{1052*i = radv_physical_device_to_handle(pdevice);1053}1054}10551056return vk_outarray_status(&out);1057}10581059VkResult1060radv_EnumeratePhysicalDeviceGroups(VkInstance _instance, uint32_t *pPhysicalDeviceGroupCount,1061VkPhysicalDeviceGroupProperties *pPhysicalDeviceGroupProperties)1062{1063RADV_FROM_HANDLE(radv_instance, instance, _instance);1064VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceGroupProperties, out, pPhysicalDeviceGroupProperties,1065pPhysicalDeviceGroupCount);10661067VkResult result = radv_enumerate_physical_devices(instance);1068if (result != VK_SUCCESS)1069return result;10701071list_for_each_entry(struct radv_physical_device, pdevice, &instance->physical_devices, link)1072{1073vk_outarray_append_typed(VkPhysicalDeviceGroupProperties, &out, p)1074{1075p->physicalDeviceCount = 1;1076memset(p->physicalDevices, 0, sizeof(p->physicalDevices));1077p->physicalDevices[0] = radv_physical_device_to_handle(pdevice);1078p->subsetAllocation = false;1079}1080}10811082return vk_outarray_status(&out);1083}10841085void1086radv_GetPhysicalDeviceFeatures(VkPhysicalDevice physicalDevice, VkPhysicalDeviceFeatures *pFeatures)1087{1088RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);1089memset(pFeatures, 0, sizeof(*pFeatures));10901091*pFeatures = (VkPhysicalDeviceFeatures){1092.robustBufferAccess = true,1093.fullDrawIndexUint32 = true,1094.imageCubeArray = true,1095.independentBlend = true,1096.geometryShader = true,1097.tessellationShader = true,1098.sampleRateShading = true,1099.dualSrcBlend = true,1100.logicOp = true,1101.multiDrawIndirect = true,1102.drawIndirectFirstInstance = true,1103.depthClamp = true,1104.depthBiasClamp = true,1105.fillModeNonSolid = true,1106.depthBounds = true,1107.wideLines = true,1108.largePoints = true,1109.alphaToOne = false,1110.multiViewport = true,1111.samplerAnisotropy = true,1112.textureCompressionETC2 = radv_device_supports_etc(pdevice),1113.textureCompressionASTC_LDR = false,1114.textureCompressionBC = true,1115.occlusionQueryPrecise = true,1116.pipelineStatisticsQuery = true,1117.vertexPipelineStoresAndAtomics = true,1118.fragmentStoresAndAtomics = true,1119.shaderTessellationAndGeometryPointSize = true,1120.shaderImageGatherExtended = true,1121.shaderStorageImageExtendedFormats = true,1122.shaderStorageImageMultisample = true,1123.shaderUniformBufferArrayDynamicIndexing = true,1124.shaderSampledImageArrayDynamicIndexing = true,1125.shaderStorageBufferArrayDynamicIndexing = true,1126.shaderStorageImageArrayDynamicIndexing = true,1127.shaderStorageImageReadWithoutFormat = true,1128.shaderStorageImageWriteWithoutFormat = true,1129.shaderClipDistance = true,1130.shaderCullDistance = true,1131.shaderFloat64 = true,1132.shaderInt64 = true,1133.shaderInt16 = true,1134.sparseBinding = true,1135.sparseResidencyBuffer = pdevice->rad_info.family >= CHIP_POLARIS10,1136.sparseResidencyImage2D = pdevice->rad_info.family >= CHIP_POLARIS10,1137.sparseResidencyAliased = pdevice->rad_info.family >= CHIP_POLARIS10,1138.variableMultisampleRate = true,1139.shaderResourceMinLod = true,1140.shaderResourceResidency = true,1141.inheritedQueries = true,1142};1143}11441145static void1146radv_get_physical_device_features_1_1(struct radv_physical_device *pdevice,1147VkPhysicalDeviceVulkan11Features *f)1148{1149assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES);11501151f->storageBuffer16BitAccess = true;1152f->uniformAndStorageBuffer16BitAccess = true;1153f->storagePushConstant16 = true;1154f->storageInputOutput16 = pdevice->rad_info.has_packed_math_16bit;1155f->multiview = true;1156f->multiviewGeometryShader = true;1157f->multiviewTessellationShader = true;1158f->variablePointersStorageBuffer = true;1159f->variablePointers = true;1160f->protectedMemory = false;1161f->samplerYcbcrConversion = true;1162f->shaderDrawParameters = true;1163}11641165static void1166radv_get_physical_device_features_1_2(struct radv_physical_device *pdevice,1167VkPhysicalDeviceVulkan12Features *f)1168{1169assert(f->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES);11701171f->samplerMirrorClampToEdge = true;1172f->drawIndirectCount = true;1173f->storageBuffer8BitAccess = true;1174f->uniformAndStorageBuffer8BitAccess = true;1175f->storagePushConstant8 = true;1176f->shaderBufferInt64Atomics = true;1177f->shaderSharedInt64Atomics = true;1178f->shaderFloat16 = pdevice->rad_info.has_packed_math_16bit;1179f->shaderInt8 = true;11801181f->descriptorIndexing = true;1182f->shaderInputAttachmentArrayDynamicIndexing = true;1183f->shaderUniformTexelBufferArrayDynamicIndexing = true;1184f->shaderStorageTexelBufferArrayDynamicIndexing = true;1185f->shaderUniformBufferArrayNonUniformIndexing = true;1186f->shaderSampledImageArrayNonUniformIndexing = true;1187f->shaderStorageBufferArrayNonUniformIndexing = true;1188f->shaderStorageImageArrayNonUniformIndexing = true;1189f->shaderInputAttachmentArrayNonUniformIndexing = true;1190f->shaderUniformTexelBufferArrayNonUniformIndexing = true;1191f->shaderStorageTexelBufferArrayNonUniformIndexing = true;1192f->descriptorBindingUniformBufferUpdateAfterBind = true;1193f->descriptorBindingSampledImageUpdateAfterBind = true;1194f->descriptorBindingStorageImageUpdateAfterBind = true;1195f->descriptorBindingStorageBufferUpdateAfterBind = true;1196f->descriptorBindingUniformTexelBufferUpdateAfterBind = true;1197f->descriptorBindingStorageTexelBufferUpdateAfterBind = true;1198f->descriptorBindingUpdateUnusedWhilePending = true;1199f->descriptorBindingPartiallyBound = true;1200f->descriptorBindingVariableDescriptorCount = true;1201f->runtimeDescriptorArray = true;12021203f->samplerFilterMinmax = true;1204f->scalarBlockLayout = pdevice->rad_info.chip_class >= GFX7;1205f->imagelessFramebuffer = true;1206f->uniformBufferStandardLayout = true;1207f->shaderSubgroupExtendedTypes = true;1208f->separateDepthStencilLayouts = true;1209f->hostQueryReset = true;1210f->timelineSemaphore = true, f->bufferDeviceAddress = true;1211f->bufferDeviceAddressCaptureReplay = true;1212f->bufferDeviceAddressMultiDevice = false;1213f->vulkanMemoryModel = true;1214f->vulkanMemoryModelDeviceScope = true;1215f->vulkanMemoryModelAvailabilityVisibilityChains = false;1216f->shaderOutputViewportIndex = true;1217f->shaderOutputLayer = true;1218f->subgroupBroadcastDynamicId = true;1219}12201221void1222radv_GetPhysicalDeviceFeatures2(VkPhysicalDevice physicalDevice,1223VkPhysicalDeviceFeatures2 *pFeatures)1224{1225RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);1226radv_GetPhysicalDeviceFeatures(physicalDevice, &pFeatures->features);12271228VkPhysicalDeviceVulkan11Features core_1_1 = {1229.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES,1230};1231radv_get_physical_device_features_1_1(pdevice, &core_1_1);12321233VkPhysicalDeviceVulkan12Features core_1_2 = {1234.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES,1235};1236radv_get_physical_device_features_1_2(pdevice, &core_1_2);12371238#define CORE_FEATURE(major, minor, feature) features->feature = core_##major##_##minor.feature12391240vk_foreach_struct(ext, pFeatures->pNext)1241{1242switch (ext->sType) {1243case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VARIABLE_POINTERS_FEATURES: {1244VkPhysicalDeviceVariablePointersFeatures *features = (void *)ext;1245CORE_FEATURE(1, 1, variablePointersStorageBuffer);1246CORE_FEATURE(1, 1, variablePointers);1247break;1248}1249case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_FEATURES: {1250VkPhysicalDeviceMultiviewFeatures *features = (VkPhysicalDeviceMultiviewFeatures *)ext;1251CORE_FEATURE(1, 1, multiview);1252CORE_FEATURE(1, 1, multiviewGeometryShader);1253CORE_FEATURE(1, 1, multiviewTessellationShader);1254break;1255}1256case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DRAW_PARAMETERS_FEATURES: {1257VkPhysicalDeviceShaderDrawParametersFeatures *features =1258(VkPhysicalDeviceShaderDrawParametersFeatures *)ext;1259CORE_FEATURE(1, 1, shaderDrawParameters);1260break;1261}1262case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_FEATURES: {1263VkPhysicalDeviceProtectedMemoryFeatures *features =1264(VkPhysicalDeviceProtectedMemoryFeatures *)ext;1265CORE_FEATURE(1, 1, protectedMemory);1266break;1267}1268case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_16BIT_STORAGE_FEATURES: {1269VkPhysicalDevice16BitStorageFeatures *features =1270(VkPhysicalDevice16BitStorageFeatures *)ext;1271CORE_FEATURE(1, 1, storageBuffer16BitAccess);1272CORE_FEATURE(1, 1, uniformAndStorageBuffer16BitAccess);1273CORE_FEATURE(1, 1, storagePushConstant16);1274CORE_FEATURE(1, 1, storageInputOutput16);1275break;1276}1277case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_YCBCR_CONVERSION_FEATURES: {1278VkPhysicalDeviceSamplerYcbcrConversionFeatures *features =1279(VkPhysicalDeviceSamplerYcbcrConversionFeatures *)ext;1280CORE_FEATURE(1, 1, samplerYcbcrConversion);1281break;1282}1283case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_FEATURES: {1284VkPhysicalDeviceDescriptorIndexingFeatures *features =1285(VkPhysicalDeviceDescriptorIndexingFeatures *)ext;1286CORE_FEATURE(1, 2, shaderInputAttachmentArrayDynamicIndexing);1287CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayDynamicIndexing);1288CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayDynamicIndexing);1289CORE_FEATURE(1, 2, shaderUniformBufferArrayNonUniformIndexing);1290CORE_FEATURE(1, 2, shaderSampledImageArrayNonUniformIndexing);1291CORE_FEATURE(1, 2, shaderStorageBufferArrayNonUniformIndexing);1292CORE_FEATURE(1, 2, shaderStorageImageArrayNonUniformIndexing);1293CORE_FEATURE(1, 2, shaderInputAttachmentArrayNonUniformIndexing);1294CORE_FEATURE(1, 2, shaderUniformTexelBufferArrayNonUniformIndexing);1295CORE_FEATURE(1, 2, shaderStorageTexelBufferArrayNonUniformIndexing);1296CORE_FEATURE(1, 2, descriptorBindingUniformBufferUpdateAfterBind);1297CORE_FEATURE(1, 2, descriptorBindingSampledImageUpdateAfterBind);1298CORE_FEATURE(1, 2, descriptorBindingStorageImageUpdateAfterBind);1299CORE_FEATURE(1, 2, descriptorBindingStorageBufferUpdateAfterBind);1300CORE_FEATURE(1, 2, descriptorBindingUniformTexelBufferUpdateAfterBind);1301CORE_FEATURE(1, 2, descriptorBindingStorageTexelBufferUpdateAfterBind);1302CORE_FEATURE(1, 2, descriptorBindingUpdateUnusedWhilePending);1303CORE_FEATURE(1, 2, descriptorBindingPartiallyBound);1304CORE_FEATURE(1, 2, descriptorBindingVariableDescriptorCount);1305CORE_FEATURE(1, 2, runtimeDescriptorArray);1306break;1307}1308case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONDITIONAL_RENDERING_FEATURES_EXT: {1309VkPhysicalDeviceConditionalRenderingFeaturesEXT *features =1310(VkPhysicalDeviceConditionalRenderingFeaturesEXT *)ext;1311features->conditionalRendering = true;1312features->inheritedConditionalRendering = false;1313break;1314}1315case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_FEATURES_EXT: {1316VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *features =1317(VkPhysicalDeviceVertexAttributeDivisorFeaturesEXT *)ext;1318features->vertexAttributeInstanceRateDivisor = true;1319features->vertexAttributeInstanceRateZeroDivisor = true;1320break;1321}1322case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_FEATURES_EXT: {1323VkPhysicalDeviceTransformFeedbackFeaturesEXT *features =1324(VkPhysicalDeviceTransformFeedbackFeaturesEXT *)ext;1325features->transformFeedback = true;1326features->geometryStreams = !pdevice->use_ngg_streamout;1327break;1328}1329case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SCALAR_BLOCK_LAYOUT_FEATURES: {1330VkPhysicalDeviceScalarBlockLayoutFeatures *features =1331(VkPhysicalDeviceScalarBlockLayoutFeatures *)ext;1332CORE_FEATURE(1, 2, scalarBlockLayout);1333break;1334}1335case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MEMORY_PRIORITY_FEATURES_EXT: {1336VkPhysicalDeviceMemoryPriorityFeaturesEXT *features =1337(VkPhysicalDeviceMemoryPriorityFeaturesEXT *)ext;1338features->memoryPriority = true;1339break;1340}1341case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES_EXT: {1342VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *features =1343(VkPhysicalDeviceBufferDeviceAddressFeaturesEXT *)ext;1344CORE_FEATURE(1, 2, bufferDeviceAddress);1345CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);1346CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);1347break;1348}1349case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_BUFFER_DEVICE_ADDRESS_FEATURES: {1350VkPhysicalDeviceBufferDeviceAddressFeatures *features =1351(VkPhysicalDeviceBufferDeviceAddressFeatures *)ext;1352CORE_FEATURE(1, 2, bufferDeviceAddress);1353CORE_FEATURE(1, 2, bufferDeviceAddressCaptureReplay);1354CORE_FEATURE(1, 2, bufferDeviceAddressMultiDevice);1355break;1356}1357case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_CLIP_ENABLE_FEATURES_EXT: {1358VkPhysicalDeviceDepthClipEnableFeaturesEXT *features =1359(VkPhysicalDeviceDepthClipEnableFeaturesEXT *)ext;1360features->depthClipEnable = true;1361break;1362}1363case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_HOST_QUERY_RESET_FEATURES: {1364VkPhysicalDeviceHostQueryResetFeatures *features =1365(VkPhysicalDeviceHostQueryResetFeatures *)ext;1366CORE_FEATURE(1, 2, hostQueryReset);1367break;1368}1369case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_8BIT_STORAGE_FEATURES: {1370VkPhysicalDevice8BitStorageFeatures *features = (VkPhysicalDevice8BitStorageFeatures *)ext;1371CORE_FEATURE(1, 2, storageBuffer8BitAccess);1372CORE_FEATURE(1, 2, uniformAndStorageBuffer8BitAccess);1373CORE_FEATURE(1, 2, storagePushConstant8);1374break;1375}1376case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_FLOAT16_INT8_FEATURES: {1377VkPhysicalDeviceShaderFloat16Int8Features *features =1378(VkPhysicalDeviceShaderFloat16Int8Features *)ext;1379CORE_FEATURE(1, 2, shaderFloat16);1380CORE_FEATURE(1, 2, shaderInt8);1381break;1382}1383case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_INT64_FEATURES: {1384VkPhysicalDeviceShaderAtomicInt64Features *features =1385(VkPhysicalDeviceShaderAtomicInt64Features *)ext;1386CORE_FEATURE(1, 2, shaderBufferInt64Atomics);1387CORE_FEATURE(1, 2, shaderSharedInt64Atomics);1388break;1389}1390case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_DEMOTE_TO_HELPER_INVOCATION_FEATURES_EXT: {1391VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *features =1392(VkPhysicalDeviceShaderDemoteToHelperInvocationFeaturesEXT *)ext;1393features->shaderDemoteToHelperInvocation = true;1394break;1395}1396case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_FEATURES_EXT: {1397VkPhysicalDeviceInlineUniformBlockFeaturesEXT *features =1398(VkPhysicalDeviceInlineUniformBlockFeaturesEXT *)ext;13991400features->inlineUniformBlock = true;1401features->descriptorBindingInlineUniformBlockUpdateAfterBind = true;1402break;1403}1404case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COMPUTE_SHADER_DERIVATIVES_FEATURES_NV: {1405VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *features =1406(VkPhysicalDeviceComputeShaderDerivativesFeaturesNV *)ext;1407features->computeDerivativeGroupQuads = false;1408features->computeDerivativeGroupLinear = true;1409break;1410}1411case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_YCBCR_IMAGE_ARRAYS_FEATURES_EXT: {1412VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *features =1413(VkPhysicalDeviceYcbcrImageArraysFeaturesEXT *)ext;1414features->ycbcrImageArrays = true;1415break;1416}1417case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_UNIFORM_BUFFER_STANDARD_LAYOUT_FEATURES: {1418VkPhysicalDeviceUniformBufferStandardLayoutFeatures *features =1419(VkPhysicalDeviceUniformBufferStandardLayoutFeatures *)ext;1420CORE_FEATURE(1, 2, uniformBufferStandardLayout);1421break;1422}1423case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INDEX_TYPE_UINT8_FEATURES_EXT: {1424VkPhysicalDeviceIndexTypeUint8FeaturesEXT *features =1425(VkPhysicalDeviceIndexTypeUint8FeaturesEXT *)ext;1426features->indexTypeUint8 = pdevice->rad_info.chip_class >= GFX8;1427break;1428}1429case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGELESS_FRAMEBUFFER_FEATURES: {1430VkPhysicalDeviceImagelessFramebufferFeatures *features =1431(VkPhysicalDeviceImagelessFramebufferFeatures *)ext;1432CORE_FEATURE(1, 2, imagelessFramebuffer);1433break;1434}1435case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_EXECUTABLE_PROPERTIES_FEATURES_KHR: {1436VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *features =1437(VkPhysicalDevicePipelineExecutablePropertiesFeaturesKHR *)ext;1438features->pipelineExecutableInfo = true;1439break;1440}1441case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CLOCK_FEATURES_KHR: {1442VkPhysicalDeviceShaderClockFeaturesKHR *features =1443(VkPhysicalDeviceShaderClockFeaturesKHR *)ext;1444features->shaderSubgroupClock = true;1445features->shaderDeviceClock = pdevice->rad_info.chip_class >= GFX8;1446break;1447}1448case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_FEATURES_EXT: {1449VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *features =1450(VkPhysicalDeviceTexelBufferAlignmentFeaturesEXT *)ext;1451features->texelBufferAlignment = true;1452break;1453}1454case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_FEATURES: {1455VkPhysicalDeviceTimelineSemaphoreFeatures *features =1456(VkPhysicalDeviceTimelineSemaphoreFeatures *)ext;1457CORE_FEATURE(1, 2, timelineSemaphore);1458break;1459}1460case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_FEATURES_EXT: {1461VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *features =1462(VkPhysicalDeviceSubgroupSizeControlFeaturesEXT *)ext;1463features->subgroupSizeControl = true;1464features->computeFullSubgroups = true;1465break;1466}1467case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COHERENT_MEMORY_FEATURES_AMD: {1468VkPhysicalDeviceCoherentMemoryFeaturesAMD *features =1469(VkPhysicalDeviceCoherentMemoryFeaturesAMD *)ext;1470features->deviceCoherentMemory = pdevice->rad_info.has_l2_uncached;1471break;1472}1473case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_EXTENDED_TYPES_FEATURES: {1474VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *features =1475(VkPhysicalDeviceShaderSubgroupExtendedTypesFeatures *)ext;1476CORE_FEATURE(1, 2, shaderSubgroupExtendedTypes);1477break;1478}1479case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SEPARATE_DEPTH_STENCIL_LAYOUTS_FEATURES_KHR: {1480VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *features =1481(VkPhysicalDeviceSeparateDepthStencilLayoutsFeaturesKHR *)ext;1482CORE_FEATURE(1, 2, separateDepthStencilLayouts);1483break;1484}1485case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_FEATURES: {1486radv_get_physical_device_features_1_1(pdevice, (void *)ext);1487break;1488}1489case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_FEATURES: {1490radv_get_physical_device_features_1_2(pdevice, (void *)ext);1491break;1492}1493case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_FEATURES_EXT: {1494VkPhysicalDeviceLineRasterizationFeaturesEXT *features =1495(VkPhysicalDeviceLineRasterizationFeaturesEXT *)ext;1496features->rectangularLines = false;1497features->bresenhamLines = true;1498features->smoothLines = false;1499features->stippledRectangularLines = false;1500/* FIXME: Some stippled Bresenham CTS fails on Vega101501* but work on Raven.1502*/1503features->stippledBresenhamLines = pdevice->rad_info.chip_class != GFX9;1504features->stippledSmoothLines = false;1505break;1506}1507case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {1508VkDeviceMemoryOverallocationCreateInfoAMD *features =1509(VkDeviceMemoryOverallocationCreateInfoAMD *)ext;1510features->overallocationBehavior = true;1511break;1512}1513case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {1514VkPhysicalDeviceRobustness2FeaturesEXT *features =1515(VkPhysicalDeviceRobustness2FeaturesEXT *)ext;1516features->robustBufferAccess2 = true;1517features->robustImageAccess2 = true;1518features->nullDescriptor = true;1519break;1520}1521case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {1522VkPhysicalDeviceCustomBorderColorFeaturesEXT *features =1523(VkPhysicalDeviceCustomBorderColorFeaturesEXT *)ext;1524features->customBorderColors = true;1525features->customBorderColorWithoutFormat = true;1526break;1527}1528case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PRIVATE_DATA_FEATURES_EXT: {1529VkPhysicalDevicePrivateDataFeaturesEXT *features =1530(VkPhysicalDevicePrivateDataFeaturesEXT *)ext;1531features->privateData = true;1532break;1533}1534case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PIPELINE_CREATION_CACHE_CONTROL_FEATURES_EXT: {1535VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *features =1536(VkPhysicalDevicePipelineCreationCacheControlFeaturesEXT *)ext;1537features->pipelineCreationCacheControl = true;1538break;1539}1540case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_MEMORY_MODEL_FEATURES_KHR: {1541VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *features =1542(VkPhysicalDeviceVulkanMemoryModelFeaturesKHR *)ext;1543CORE_FEATURE(1, 2, vulkanMemoryModel);1544CORE_FEATURE(1, 2, vulkanMemoryModelDeviceScope);1545CORE_FEATURE(1, 2, vulkanMemoryModelAvailabilityVisibilityChains);1546break;1547}1548case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_FEATURES_EXT: {1549VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *features =1550(VkPhysicalDeviceExtendedDynamicStateFeaturesEXT *)ext;1551features->extendedDynamicState = true;1552break;1553}1554case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_IMAGE_ROBUSTNESS_FEATURES_EXT: {1555VkPhysicalDeviceImageRobustnessFeaturesEXT *features =1556(VkPhysicalDeviceImageRobustnessFeaturesEXT *)ext;1557features->robustImageAccess = true;1558break;1559}1560case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_ATOMIC_FLOAT_FEATURES_EXT: {1561VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *features =1562(VkPhysicalDeviceShaderAtomicFloatFeaturesEXT *)ext;1563features->shaderBufferFloat32Atomics = true;1564features->shaderBufferFloat32AtomicAdd = false;1565features->shaderBufferFloat64Atomics = true;1566features->shaderBufferFloat64AtomicAdd = false;1567features->shaderSharedFloat32Atomics = true;1568features->shaderSharedFloat32AtomicAdd = pdevice->rad_info.chip_class >= GFX8;1569features->shaderSharedFloat64Atomics = true;1570features->shaderSharedFloat64AtomicAdd = false;1571features->shaderImageFloat32Atomics = true;1572features->shaderImageFloat32AtomicAdd = false;1573features->sparseImageFloat32Atomics = true;1574features->sparseImageFloat32AtomicAdd = false;1575break;1576}1577case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_4444_FORMATS_FEATURES_EXT: {1578VkPhysicalDevice4444FormatsFeaturesEXT *features =1579(VkPhysicalDevice4444FormatsFeaturesEXT *)ext;1580features->formatA4R4G4B4 = true;1581features->formatA4B4G4R4 = true;1582break;1583}1584case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_TERMINATE_INVOCATION_FEATURES_KHR: {1585VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *features =1586(VkPhysicalDeviceShaderTerminateInvocationFeaturesKHR *)ext;1587features->shaderTerminateInvocation = true;1588break;1589}1590case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_IMAGE_ATOMIC_INT64_FEATURES_EXT: {1591VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *features =1592(VkPhysicalDeviceShaderImageAtomicInt64FeaturesEXT *)ext;1593features->shaderImageInt64Atomics = true;1594features->sparseImageInt64Atomics = true;1595break;1596}1597case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MUTABLE_DESCRIPTOR_TYPE_FEATURES_VALVE: {1598VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *features =1599(VkPhysicalDeviceMutableDescriptorTypeFeaturesVALVE *)ext;1600features->mutableDescriptorType = true;1601break;1602}1603case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {1604VkPhysicalDeviceFragmentShadingRateFeaturesKHR *features =1605(VkPhysicalDeviceFragmentShadingRateFeaturesKHR *)ext;1606features->pipelineFragmentShadingRate = true;1607features->primitiveFragmentShadingRate = true;1608features->attachmentFragmentShadingRate = true;1609break;1610}1611case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_WORKGROUP_MEMORY_EXPLICIT_LAYOUT_FEATURES_KHR: {1612VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *features =1613(VkPhysicalDeviceWorkgroupMemoryExplicitLayoutFeaturesKHR *)ext;1614features->workgroupMemoryExplicitLayout = true;1615features->workgroupMemoryExplicitLayoutScalarBlockLayout = true;1616features->workgroupMemoryExplicitLayout8BitAccess = true;1617features->workgroupMemoryExplicitLayout16BitAccess = true;1618break;1619}1620case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ZERO_INITIALIZE_WORKGROUP_MEMORY_FEATURES_KHR: {1621VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *features =1622(VkPhysicalDeviceZeroInitializeWorkgroupMemoryFeaturesKHR *)ext;1623features->shaderZeroInitializeWorkgroupMemory = true;1624break;1625}1626case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_FEATURES_EXT: {1627VkPhysicalDeviceProvokingVertexFeaturesEXT *features =1628(VkPhysicalDeviceProvokingVertexFeaturesEXT *)ext;1629features->provokingVertexLast = true;1630features->transformFeedbackPreservesProvokingVertex = true;1631break;1632}1633case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTENDED_DYNAMIC_STATE_2_FEATURES_EXT: {1634VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *features =1635(VkPhysicalDeviceExtendedDynamicState2FeaturesEXT *)ext;1636features->extendedDynamicState2 = true;1637features->extendedDynamicState2LogicOp = true;1638features->extendedDynamicState2PatchControlPoints = false;1639break;1640}1641case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_GLOBAL_PRIORITY_QUERY_FEATURES_EXT: {1642VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *features =1643(VkPhysicalDeviceGlobalPriorityQueryFeaturesEXT *)ext;1644features->globalPriorityQuery = true;1645break;1646}1647case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_FEATURES_KHR: {1648VkPhysicalDeviceAccelerationStructureFeaturesKHR *features =1649(VkPhysicalDeviceAccelerationStructureFeaturesKHR *)ext;1650features->accelerationStructure = true;1651features->accelerationStructureCaptureReplay = false;1652features->accelerationStructureIndirectBuild = false;1653features->accelerationStructureHostCommands = true;1654features->descriptorBindingAccelerationStructureUpdateAfterBind = true;1655break;1656}1657case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_SUBGROUP_UNIFORM_CONTROL_FLOW_FEATURES_KHR: {1658VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *features =1659(VkPhysicalDeviceShaderSubgroupUniformControlFlowFeaturesKHR *)ext;1660features->shaderSubgroupUniformControlFlow = true;1661break;1662}1663case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_FEATURES_EXT: {1664VkPhysicalDeviceMultiDrawFeaturesEXT *features = (VkPhysicalDeviceMultiDrawFeaturesEXT *)ext;1665features->multiDraw = true;1666break;1667}1668case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_COLOR_WRITE_ENABLE_FEATURES_EXT: {1669VkPhysicalDeviceColorWriteEnableFeaturesEXT *features =1670(VkPhysicalDeviceColorWriteEnableFeaturesEXT *)ext;1671features->colorWriteEnable = true;1672break;1673}1674default:1675break;1676}1677}1678#undef CORE_FEATURE1679}16801681static size_t1682radv_max_descriptor_set_size()1683{1684/* make sure that the entire descriptor set is addressable with a signed1685* 32-bit int. So the sum of all limits scaled by descriptor size has to1686* be at most 2 GiB. the combined image & samples object count as one of1687* both. This limit is for the pipeline layout, not for the set layout, but1688* there is no set limit, so we just set a pipeline limit. I don't think1689* any app is going to hit this soon. */1690return ((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -1691MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /1692(32 /* uniform buffer, 32 due to potential space wasted on alignment */ +169332 /* storage buffer, 32 due to potential space wasted on alignment */ +169432 /* sampler, largest when combined with image */ + 64 /* sampled image */ +169564 /* storage image */);1696}16971698static uint32_t1699radv_uniform_buffer_offset_alignment(const struct radv_physical_device *pdevice)1700{1701uint32_t uniform_offset_alignment =1702driQueryOptioni(&pdevice->instance->dri_options, "radv_override_uniform_offset_alignment");1703if (!util_is_power_of_two_or_zero(uniform_offset_alignment)) {1704fprintf(stderr,1705"ERROR: invalid radv_override_uniform_offset_alignment setting %d:"1706"not a power of two\n",1707uniform_offset_alignment);1708uniform_offset_alignment = 0;1709}17101711/* Take at least the hardware limit. */1712return MAX2(uniform_offset_alignment, 4);1713}17141715void1716radv_GetPhysicalDeviceProperties(VkPhysicalDevice physicalDevice,1717VkPhysicalDeviceProperties *pProperties)1718{1719RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);1720VkSampleCountFlags sample_counts = 0xf;17211722size_t max_descriptor_set_size = radv_max_descriptor_set_size();17231724VkPhysicalDeviceLimits limits = {1725.maxImageDimension1D = (1 << 14),1726.maxImageDimension2D = (1 << 14),1727.maxImageDimension3D = (1 << 11),1728.maxImageDimensionCube = (1 << 14),1729.maxImageArrayLayers = (1 << 11),1730.maxTexelBufferElements = UINT32_MAX,1731.maxUniformBufferRange = UINT32_MAX,1732.maxStorageBufferRange = UINT32_MAX,1733.maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE,1734.maxMemoryAllocationCount = UINT32_MAX,1735.maxSamplerAllocationCount = 64 * 1024,1736.bufferImageGranularity = 64, /* A cache line */1737.sparseAddressSpaceSize = RADV_MAX_MEMORY_ALLOCATION_SIZE, /* buffer max size */1738.maxBoundDescriptorSets = MAX_SETS,1739.maxPerStageDescriptorSamplers = max_descriptor_set_size,1740.maxPerStageDescriptorUniformBuffers = max_descriptor_set_size,1741.maxPerStageDescriptorStorageBuffers = max_descriptor_set_size,1742.maxPerStageDescriptorSampledImages = max_descriptor_set_size,1743.maxPerStageDescriptorStorageImages = max_descriptor_set_size,1744.maxPerStageDescriptorInputAttachments = max_descriptor_set_size,1745.maxPerStageResources = max_descriptor_set_size,1746.maxDescriptorSetSamplers = max_descriptor_set_size,1747.maxDescriptorSetUniformBuffers = max_descriptor_set_size,1748.maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS,1749.maxDescriptorSetStorageBuffers = max_descriptor_set_size,1750.maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS,1751.maxDescriptorSetSampledImages = max_descriptor_set_size,1752.maxDescriptorSetStorageImages = max_descriptor_set_size,1753.maxDescriptorSetInputAttachments = max_descriptor_set_size,1754.maxVertexInputAttributes = MAX_VERTEX_ATTRIBS,1755.maxVertexInputBindings = MAX_VBS,1756.maxVertexInputAttributeOffset = UINT32_MAX,1757.maxVertexInputBindingStride = 2048,1758.maxVertexOutputComponents = 128,1759.maxTessellationGenerationLevel = 64,1760.maxTessellationPatchSize = 32,1761.maxTessellationControlPerVertexInputComponents = 128,1762.maxTessellationControlPerVertexOutputComponents = 128,1763.maxTessellationControlPerPatchOutputComponents = 120,1764.maxTessellationControlTotalOutputComponents = 4096,1765.maxTessellationEvaluationInputComponents = 128,1766.maxTessellationEvaluationOutputComponents = 128,1767.maxGeometryShaderInvocations = 127,1768.maxGeometryInputComponents = 64,1769.maxGeometryOutputComponents = 128,1770.maxGeometryOutputVertices = 256,1771.maxGeometryTotalOutputComponents = 1024,1772.maxFragmentInputComponents = 128,1773.maxFragmentOutputAttachments = 8,1774.maxFragmentDualSrcAttachments = 1,1775.maxFragmentCombinedOutputResources = 8,1776.maxComputeSharedMemorySize = pdevice->rad_info.chip_class >= GFX7 ? 65536 : 32768,1777.maxComputeWorkGroupCount = {65535, 65535, 65535},1778.maxComputeWorkGroupInvocations = 1024,1779.maxComputeWorkGroupSize = {1024, 1024, 1024},1780.subPixelPrecisionBits = 8,1781.subTexelPrecisionBits = 8,1782.mipmapPrecisionBits = 8,1783.maxDrawIndexedIndexValue = UINT32_MAX,1784.maxDrawIndirectCount = UINT32_MAX,1785.maxSamplerLodBias = 16,1786.maxSamplerAnisotropy = 16,1787.maxViewports = MAX_VIEWPORTS,1788.maxViewportDimensions = {(1 << 14), (1 << 14)},1789.viewportBoundsRange = {INT16_MIN, INT16_MAX},1790.viewportSubPixelBits = 8,1791.minMemoryMapAlignment = 4096, /* A page */1792.minTexelBufferOffsetAlignment = 4,1793.minUniformBufferOffsetAlignment = radv_uniform_buffer_offset_alignment(pdevice),1794.minStorageBufferOffsetAlignment = 4,1795.minTexelOffset = -32,1796.maxTexelOffset = 31,1797.minTexelGatherOffset = -32,1798.maxTexelGatherOffset = 31,1799.minInterpolationOffset = -2,1800.maxInterpolationOffset = 2,1801.subPixelInterpolationOffsetBits = 8,1802.maxFramebufferWidth = (1 << 14),1803.maxFramebufferHeight = (1 << 14),1804.maxFramebufferLayers = (1 << 10),1805.framebufferColorSampleCounts = sample_counts,1806.framebufferDepthSampleCounts = sample_counts,1807.framebufferStencilSampleCounts = sample_counts,1808.framebufferNoAttachmentsSampleCounts = sample_counts,1809.maxColorAttachments = MAX_RTS,1810.sampledImageColorSampleCounts = sample_counts,1811.sampledImageIntegerSampleCounts = sample_counts,1812.sampledImageDepthSampleCounts = sample_counts,1813.sampledImageStencilSampleCounts = sample_counts,1814.storageImageSampleCounts = sample_counts,1815.maxSampleMaskWords = 1,1816.timestampComputeAndGraphics = true,1817.timestampPeriod = 1000000.0 / pdevice->rad_info.clock_crystal_freq,1818.maxClipDistances = 8,1819.maxCullDistances = 8,1820.maxCombinedClipAndCullDistances = 8,1821.discreteQueuePriorities = 2,1822.pointSizeRange = {0.0, 8191.875},1823.lineWidthRange = {0.0, 8191.875},1824.pointSizeGranularity = (1.0 / 8.0),1825.lineWidthGranularity = (1.0 / 8.0),1826.strictLines = false, /* FINISHME */1827.standardSampleLocations = true,1828.optimalBufferCopyOffsetAlignment = 128,1829.optimalBufferCopyRowPitchAlignment = 128,1830.nonCoherentAtomSize = 64,1831};18321833VkPhysicalDeviceType device_type;18341835if (pdevice->rad_info.has_dedicated_vram || pdevice->instance->report_apu_as_dgpu) {1836device_type = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU;1837} else {1838device_type = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU;1839}18401841*pProperties = (VkPhysicalDeviceProperties){1842.apiVersion = RADV_API_VERSION,1843.driverVersion = vk_get_driver_version(),1844.vendorID = ATI_VENDOR_ID,1845.deviceID = pdevice->rad_info.pci_id,1846.deviceType = device_type,1847.limits = limits,1848.sparseProperties =1849{1850.residencyNonResidentStrict = pdevice->rad_info.family >= CHIP_POLARIS10,1851.residencyStandard2DBlockShape = pdevice->rad_info.family >= CHIP_POLARIS10,1852},1853};18541855strcpy(pProperties->deviceName, pdevice->name);1856memcpy(pProperties->pipelineCacheUUID, pdevice->cache_uuid, VK_UUID_SIZE);1857}18581859static void1860radv_get_physical_device_properties_1_1(struct radv_physical_device *pdevice,1861VkPhysicalDeviceVulkan11Properties *p)1862{1863assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES);18641865memcpy(p->deviceUUID, pdevice->device_uuid, VK_UUID_SIZE);1866memcpy(p->driverUUID, pdevice->driver_uuid, VK_UUID_SIZE);1867memset(p->deviceLUID, 0, VK_LUID_SIZE);1868/* The LUID is for Windows. */1869p->deviceLUIDValid = false;1870p->deviceNodeMask = 0;18711872p->subgroupSize = RADV_SUBGROUP_SIZE;1873p->subgroupSupportedStages = VK_SHADER_STAGE_ALL_GRAPHICS | VK_SHADER_STAGE_COMPUTE_BIT;1874p->subgroupSupportedOperations =1875VK_SUBGROUP_FEATURE_BASIC_BIT | VK_SUBGROUP_FEATURE_VOTE_BIT |1876VK_SUBGROUP_FEATURE_ARITHMETIC_BIT | VK_SUBGROUP_FEATURE_BALLOT_BIT |1877VK_SUBGROUP_FEATURE_CLUSTERED_BIT | VK_SUBGROUP_FEATURE_QUAD_BIT |1878VK_SUBGROUP_FEATURE_SHUFFLE_BIT | VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT;1879p->subgroupQuadOperationsInAllStages = true;18801881p->pointClippingBehavior = VK_POINT_CLIPPING_BEHAVIOR_ALL_CLIP_PLANES;1882p->maxMultiviewViewCount = MAX_VIEWS;1883p->maxMultiviewInstanceIndex = INT_MAX;1884p->protectedNoFault = false;1885p->maxPerSetDescriptors = RADV_MAX_PER_SET_DESCRIPTORS;1886p->maxMemoryAllocationSize = RADV_MAX_MEMORY_ALLOCATION_SIZE;1887}18881889static void1890radv_get_physical_device_properties_1_2(struct radv_physical_device *pdevice,1891VkPhysicalDeviceVulkan12Properties *p)1892{1893assert(p->sType == VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES);18941895p->driverID = VK_DRIVER_ID_MESA_RADV;1896snprintf(p->driverName, VK_MAX_DRIVER_NAME_SIZE, "radv");1897snprintf(p->driverInfo, VK_MAX_DRIVER_INFO_SIZE, "Mesa " PACKAGE_VERSION MESA_GIT_SHA1 "%s",1898radv_get_compiler_string(pdevice));1899p->conformanceVersion = (VkConformanceVersion){1900.major = 1,1901.minor = 2,1902.subminor = 3,1903.patch = 0,1904};19051906/* On AMD hardware, denormals and rounding modes for fp16/fp64 are1907* controlled by the same config register.1908*/1909if (pdevice->rad_info.has_packed_math_16bit) {1910p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;1911p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_32_BIT_ONLY_KHR;1912} else {1913p->denormBehaviorIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;1914p->roundingModeIndependence = VK_SHADER_FLOAT_CONTROLS_INDEPENDENCE_ALL_KHR;1915}19161917/* With LLVM, do not allow both preserving and flushing denorms because1918* different shaders in the same pipeline can have different settings and1919* this won't work for merged shaders. To make it work, this requires LLVM1920* support for changing the register. The same logic applies for the1921* rounding modes because they are configured with the same config1922* register.1923*/1924p->shaderDenormFlushToZeroFloat32 = true;1925p->shaderDenormPreserveFloat32 = !pdevice->use_llvm;1926p->shaderRoundingModeRTEFloat32 = true;1927p->shaderRoundingModeRTZFloat32 = !pdevice->use_llvm;1928p->shaderSignedZeroInfNanPreserveFloat32 = true;19291930p->shaderDenormFlushToZeroFloat16 =1931pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;1932p->shaderDenormPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;1933p->shaderRoundingModeRTEFloat16 = pdevice->rad_info.has_packed_math_16bit;1934p->shaderRoundingModeRTZFloat16 = pdevice->rad_info.has_packed_math_16bit && !pdevice->use_llvm;1935p->shaderSignedZeroInfNanPreserveFloat16 = pdevice->rad_info.has_packed_math_16bit;19361937p->shaderDenormFlushToZeroFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;1938p->shaderDenormPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;1939p->shaderRoundingModeRTEFloat64 = pdevice->rad_info.chip_class >= GFX8;1940p->shaderRoundingModeRTZFloat64 = pdevice->rad_info.chip_class >= GFX8 && !pdevice->use_llvm;1941p->shaderSignedZeroInfNanPreserveFloat64 = pdevice->rad_info.chip_class >= GFX8;19421943p->maxUpdateAfterBindDescriptorsInAllPools = UINT32_MAX / 64;1944p->shaderUniformBufferArrayNonUniformIndexingNative = false;1945p->shaderSampledImageArrayNonUniformIndexingNative = false;1946p->shaderStorageBufferArrayNonUniformIndexingNative = false;1947p->shaderStorageImageArrayNonUniformIndexingNative = false;1948p->shaderInputAttachmentArrayNonUniformIndexingNative = false;1949p->robustBufferAccessUpdateAfterBind = true;1950p->quadDivergentImplicitLod = false;19511952size_t max_descriptor_set_size =1953((1ull << 31) - 16 * MAX_DYNAMIC_BUFFERS -1954MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_INLINE_UNIFORM_BLOCK_COUNT) /1955(32 /* uniform buffer, 32 due to potential space wasted on alignment */ +195632 /* storage buffer, 32 due to potential space wasted on alignment */ +195732 /* sampler, largest when combined with image */ + 64 /* sampled image */ +195864 /* storage image */);1959p->maxPerStageDescriptorUpdateAfterBindSamplers = max_descriptor_set_size;1960p->maxPerStageDescriptorUpdateAfterBindUniformBuffers = max_descriptor_set_size;1961p->maxPerStageDescriptorUpdateAfterBindStorageBuffers = max_descriptor_set_size;1962p->maxPerStageDescriptorUpdateAfterBindSampledImages = max_descriptor_set_size;1963p->maxPerStageDescriptorUpdateAfterBindStorageImages = max_descriptor_set_size;1964p->maxPerStageDescriptorUpdateAfterBindInputAttachments = max_descriptor_set_size;1965p->maxPerStageUpdateAfterBindResources = max_descriptor_set_size;1966p->maxDescriptorSetUpdateAfterBindSamplers = max_descriptor_set_size;1967p->maxDescriptorSetUpdateAfterBindUniformBuffers = max_descriptor_set_size;1968p->maxDescriptorSetUpdateAfterBindUniformBuffersDynamic = MAX_DYNAMIC_UNIFORM_BUFFERS;1969p->maxDescriptorSetUpdateAfterBindStorageBuffers = max_descriptor_set_size;1970p->maxDescriptorSetUpdateAfterBindStorageBuffersDynamic = MAX_DYNAMIC_STORAGE_BUFFERS;1971p->maxDescriptorSetUpdateAfterBindSampledImages = max_descriptor_set_size;1972p->maxDescriptorSetUpdateAfterBindStorageImages = max_descriptor_set_size;1973p->maxDescriptorSetUpdateAfterBindInputAttachments = max_descriptor_set_size;19741975/* We support all of the depth resolve modes */1976p->supportedDepthResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |1977VK_RESOLVE_MODE_AVERAGE_BIT_KHR | VK_RESOLVE_MODE_MIN_BIT_KHR |1978VK_RESOLVE_MODE_MAX_BIT_KHR;19791980/* Average doesn't make sense for stencil so we don't support that */1981p->supportedStencilResolveModes = VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR |1982VK_RESOLVE_MODE_MIN_BIT_KHR | VK_RESOLVE_MODE_MAX_BIT_KHR;19831984p->independentResolveNone = true;1985p->independentResolve = true;19861987/* GFX6-8 only support single channel min/max filter. */1988p->filterMinmaxImageComponentMapping = pdevice->rad_info.chip_class >= GFX9;1989p->filterMinmaxSingleComponentFormats = true;19901991p->maxTimelineSemaphoreValueDifference = UINT64_MAX;19921993p->framebufferIntegerColorSampleCounts = VK_SAMPLE_COUNT_1_BIT;1994}19951996void1997radv_GetPhysicalDeviceProperties2(VkPhysicalDevice physicalDevice,1998VkPhysicalDeviceProperties2 *pProperties)1999{2000RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);2001radv_GetPhysicalDeviceProperties(physicalDevice, &pProperties->properties);20022003VkPhysicalDeviceVulkan11Properties core_1_1 = {2004.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES,2005};2006radv_get_physical_device_properties_1_1(pdevice, &core_1_1);20072008VkPhysicalDeviceVulkan12Properties core_1_2 = {2009.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES,2010};2011radv_get_physical_device_properties_1_2(pdevice, &core_1_2);20122013#define CORE_RENAMED_PROPERTY(major, minor, ext_property, core_property) \2014memcpy(&properties->ext_property, &core_##major##_##minor.core_property, \2015sizeof(core_##major##_##minor.core_property))20162017#define CORE_PROPERTY(major, minor, property) \2018CORE_RENAMED_PROPERTY(major, minor, property, property)20192020vk_foreach_struct(ext, pProperties->pNext)2021{2022switch (ext->sType) {2023case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PUSH_DESCRIPTOR_PROPERTIES_KHR: {2024VkPhysicalDevicePushDescriptorPropertiesKHR *properties =2025(VkPhysicalDevicePushDescriptorPropertiesKHR *)ext;2026properties->maxPushDescriptors = MAX_PUSH_DESCRIPTORS;2027break;2028}2029case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ID_PROPERTIES: {2030VkPhysicalDeviceIDProperties *properties = (VkPhysicalDeviceIDProperties *)ext;2031CORE_PROPERTY(1, 1, deviceUUID);2032CORE_PROPERTY(1, 1, driverUUID);2033CORE_PROPERTY(1, 1, deviceLUID);2034CORE_PROPERTY(1, 1, deviceLUIDValid);2035break;2036}2037case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTIVIEW_PROPERTIES: {2038VkPhysicalDeviceMultiviewProperties *properties =2039(VkPhysicalDeviceMultiviewProperties *)ext;2040CORE_PROPERTY(1, 1, maxMultiviewViewCount);2041CORE_PROPERTY(1, 1, maxMultiviewInstanceIndex);2042break;2043}2044case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_POINT_CLIPPING_PROPERTIES: {2045VkPhysicalDevicePointClippingProperties *properties =2046(VkPhysicalDevicePointClippingProperties *)ext;2047CORE_PROPERTY(1, 1, pointClippingBehavior);2048break;2049}2050case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DISCARD_RECTANGLE_PROPERTIES_EXT: {2051VkPhysicalDeviceDiscardRectanglePropertiesEXT *properties =2052(VkPhysicalDeviceDiscardRectanglePropertiesEXT *)ext;2053properties->maxDiscardRectangles = MAX_DISCARD_RECTANGLES;2054break;2055}2056case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_EXTERNAL_MEMORY_HOST_PROPERTIES_EXT: {2057VkPhysicalDeviceExternalMemoryHostPropertiesEXT *properties =2058(VkPhysicalDeviceExternalMemoryHostPropertiesEXT *)ext;2059properties->minImportedHostPointerAlignment = 4096;2060break;2061}2062case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES: {2063VkPhysicalDeviceSubgroupProperties *properties = (VkPhysicalDeviceSubgroupProperties *)ext;2064CORE_PROPERTY(1, 1, subgroupSize);2065CORE_RENAMED_PROPERTY(1, 1, supportedStages, subgroupSupportedStages);2066CORE_RENAMED_PROPERTY(1, 1, supportedOperations, subgroupSupportedOperations);2067CORE_RENAMED_PROPERTY(1, 1, quadOperationsInAllStages, subgroupQuadOperationsInAllStages);2068break;2069}2070case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MAINTENANCE_3_PROPERTIES: {2071VkPhysicalDeviceMaintenance3Properties *properties =2072(VkPhysicalDeviceMaintenance3Properties *)ext;2073CORE_PROPERTY(1, 1, maxPerSetDescriptors);2074CORE_PROPERTY(1, 1, maxMemoryAllocationSize);2075break;2076}2077case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLER_FILTER_MINMAX_PROPERTIES: {2078VkPhysicalDeviceSamplerFilterMinmaxProperties *properties =2079(VkPhysicalDeviceSamplerFilterMinmaxProperties *)ext;2080CORE_PROPERTY(1, 2, filterMinmaxImageComponentMapping);2081CORE_PROPERTY(1, 2, filterMinmaxSingleComponentFormats);2082break;2083}2084case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_AMD: {2085VkPhysicalDeviceShaderCorePropertiesAMD *properties =2086(VkPhysicalDeviceShaderCorePropertiesAMD *)ext;20872088/* Shader engines. */2089properties->shaderEngineCount = pdevice->rad_info.max_se;2090properties->shaderArraysPerEngineCount = pdevice->rad_info.max_sa_per_se;2091properties->computeUnitsPerShaderArray = pdevice->rad_info.min_good_cu_per_sa;2092properties->simdPerComputeUnit = pdevice->rad_info.num_simd_per_compute_unit;2093properties->wavefrontsPerSimd = pdevice->rad_info.max_wave64_per_simd;2094properties->wavefrontSize = 64;20952096/* SGPR. */2097properties->sgprsPerSimd = pdevice->rad_info.num_physical_sgprs_per_simd;2098properties->minSgprAllocation = pdevice->rad_info.min_sgpr_alloc;2099properties->maxSgprAllocation = pdevice->rad_info.max_sgpr_alloc;2100properties->sgprAllocationGranularity = pdevice->rad_info.sgpr_alloc_granularity;21012102/* VGPR. */2103properties->vgprsPerSimd = pdevice->rad_info.num_physical_wave64_vgprs_per_simd;2104properties->minVgprAllocation = pdevice->rad_info.min_wave64_vgpr_alloc;2105properties->maxVgprAllocation = pdevice->rad_info.max_vgpr_alloc;2106properties->vgprAllocationGranularity = pdevice->rad_info.wave64_vgpr_alloc_granularity;2107break;2108}2109case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SHADER_CORE_PROPERTIES_2_AMD: {2110VkPhysicalDeviceShaderCoreProperties2AMD *properties =2111(VkPhysicalDeviceShaderCoreProperties2AMD *)ext;21122113properties->shaderCoreFeatures = 0;2114properties->activeComputeUnitCount = pdevice->rad_info.num_good_compute_units;2115break;2116}2117case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VERTEX_ATTRIBUTE_DIVISOR_PROPERTIES_EXT: {2118VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *properties =2119(VkPhysicalDeviceVertexAttributeDivisorPropertiesEXT *)ext;2120properties->maxVertexAttribDivisor = UINT32_MAX;2121break;2122}2123case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DESCRIPTOR_INDEXING_PROPERTIES: {2124VkPhysicalDeviceDescriptorIndexingProperties *properties =2125(VkPhysicalDeviceDescriptorIndexingProperties *)ext;2126CORE_PROPERTY(1, 2, maxUpdateAfterBindDescriptorsInAllPools);2127CORE_PROPERTY(1, 2, shaderUniformBufferArrayNonUniformIndexingNative);2128CORE_PROPERTY(1, 2, shaderSampledImageArrayNonUniformIndexingNative);2129CORE_PROPERTY(1, 2, shaderStorageBufferArrayNonUniformIndexingNative);2130CORE_PROPERTY(1, 2, shaderStorageImageArrayNonUniformIndexingNative);2131CORE_PROPERTY(1, 2, shaderInputAttachmentArrayNonUniformIndexingNative);2132CORE_PROPERTY(1, 2, robustBufferAccessUpdateAfterBind);2133CORE_PROPERTY(1, 2, quadDivergentImplicitLod);2134CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSamplers);2135CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindUniformBuffers);2136CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageBuffers);2137CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindSampledImages);2138CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindStorageImages);2139CORE_PROPERTY(1, 2, maxPerStageDescriptorUpdateAfterBindInputAttachments);2140CORE_PROPERTY(1, 2, maxPerStageUpdateAfterBindResources);2141CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSamplers);2142CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffers);2143CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindUniformBuffersDynamic);2144CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffers);2145CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageBuffersDynamic);2146CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindSampledImages);2147CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindStorageImages);2148CORE_PROPERTY(1, 2, maxDescriptorSetUpdateAfterBindInputAttachments);2149break;2150}2151case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROTECTED_MEMORY_PROPERTIES: {2152VkPhysicalDeviceProtectedMemoryProperties *properties =2153(VkPhysicalDeviceProtectedMemoryProperties *)ext;2154CORE_PROPERTY(1, 1, protectedNoFault);2155break;2156}2157case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CONSERVATIVE_RASTERIZATION_PROPERTIES_EXT: {2158VkPhysicalDeviceConservativeRasterizationPropertiesEXT *properties =2159(VkPhysicalDeviceConservativeRasterizationPropertiesEXT *)ext;2160properties->primitiveOverestimationSize = 0;2161properties->maxExtraPrimitiveOverestimationSize = 0;2162properties->extraPrimitiveOverestimationSizeGranularity = 0;2163properties->primitiveUnderestimation = false;2164properties->conservativePointAndLineRasterization = false;2165properties->degenerateTrianglesRasterized = true;2166properties->degenerateLinesRasterized = false;2167properties->fullyCoveredFragmentShaderInputVariable = false;2168properties->conservativeRasterizationPostDepthCoverage = false;2169break;2170}2171#ifndef _WIN322172case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PCI_BUS_INFO_PROPERTIES_EXT: {2173VkPhysicalDevicePCIBusInfoPropertiesEXT *properties =2174(VkPhysicalDevicePCIBusInfoPropertiesEXT *)ext;2175properties->pciDomain = pdevice->bus_info.domain;2176properties->pciBus = pdevice->bus_info.bus;2177properties->pciDevice = pdevice->bus_info.dev;2178properties->pciFunction = pdevice->bus_info.func;2179break;2180}2181#endif2182case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRIVER_PROPERTIES: {2183VkPhysicalDeviceDriverProperties *properties = (VkPhysicalDeviceDriverProperties *)ext;2184CORE_PROPERTY(1, 2, driverID);2185CORE_PROPERTY(1, 2, driverName);2186CORE_PROPERTY(1, 2, driverInfo);2187CORE_PROPERTY(1, 2, conformanceVersion);2188break;2189}2190case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TRANSFORM_FEEDBACK_PROPERTIES_EXT: {2191VkPhysicalDeviceTransformFeedbackPropertiesEXT *properties =2192(VkPhysicalDeviceTransformFeedbackPropertiesEXT *)ext;2193properties->maxTransformFeedbackStreams = MAX_SO_STREAMS;2194properties->maxTransformFeedbackBuffers = MAX_SO_BUFFERS;2195properties->maxTransformFeedbackBufferSize = UINT32_MAX;2196properties->maxTransformFeedbackStreamDataSize = 512;2197properties->maxTransformFeedbackBufferDataSize = 512;2198properties->maxTransformFeedbackBufferDataStride = 512;2199properties->transformFeedbackQueries = !pdevice->use_ngg_streamout;2200properties->transformFeedbackStreamsLinesTriangles = !pdevice->use_ngg_streamout;2201properties->transformFeedbackRasterizationStreamSelect = false;2202properties->transformFeedbackDraw = true;2203break;2204}2205case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_INLINE_UNIFORM_BLOCK_PROPERTIES_EXT: {2206VkPhysicalDeviceInlineUniformBlockPropertiesEXT *props =2207(VkPhysicalDeviceInlineUniformBlockPropertiesEXT *)ext;22082209props->maxInlineUniformBlockSize = MAX_INLINE_UNIFORM_BLOCK_SIZE;2210props->maxPerStageDescriptorInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;2211props->maxPerStageDescriptorUpdateAfterBindInlineUniformBlocks =2212MAX_INLINE_UNIFORM_BLOCK_SIZE * MAX_SETS;2213props->maxDescriptorSetInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;2214props->maxDescriptorSetUpdateAfterBindInlineUniformBlocks = MAX_INLINE_UNIFORM_BLOCK_COUNT;2215break;2216}2217case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SAMPLE_LOCATIONS_PROPERTIES_EXT: {2218VkPhysicalDeviceSampleLocationsPropertiesEXT *properties =2219(VkPhysicalDeviceSampleLocationsPropertiesEXT *)ext;22202221VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;2222if (pdevice->rad_info.chip_class < GFX10) {2223/* FIXME: Some MSAA8x tests fail for weird2224* reasons on GFX10+ when the same pattern is2225* used inside the same render pass.2226*/2227supported_samples |= VK_SAMPLE_COUNT_8_BIT;2228}22292230properties->sampleLocationSampleCounts = supported_samples;2231properties->maxSampleLocationGridSize = (VkExtent2D){2, 2};2232properties->sampleLocationCoordinateRange[0] = 0.0f;2233properties->sampleLocationCoordinateRange[1] = 0.9375f;2234properties->sampleLocationSubPixelBits = 4;2235properties->variableSampleLocations = false;2236break;2237}2238case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DEPTH_STENCIL_RESOLVE_PROPERTIES: {2239VkPhysicalDeviceDepthStencilResolveProperties *properties =2240(VkPhysicalDeviceDepthStencilResolveProperties *)ext;2241CORE_PROPERTY(1, 2, supportedDepthResolveModes);2242CORE_PROPERTY(1, 2, supportedStencilResolveModes);2243CORE_PROPERTY(1, 2, independentResolveNone);2244CORE_PROPERTY(1, 2, independentResolve);2245break;2246}2247case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TEXEL_BUFFER_ALIGNMENT_PROPERTIES_EXT: {2248VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *properties =2249(VkPhysicalDeviceTexelBufferAlignmentPropertiesEXT *)ext;2250properties->storageTexelBufferOffsetAlignmentBytes = 4;2251properties->storageTexelBufferOffsetSingleTexelAlignment = true;2252properties->uniformTexelBufferOffsetAlignmentBytes = 4;2253properties->uniformTexelBufferOffsetSingleTexelAlignment = true;2254break;2255}2256case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FLOAT_CONTROLS_PROPERTIES: {2257VkPhysicalDeviceFloatControlsProperties *properties =2258(VkPhysicalDeviceFloatControlsProperties *)ext;2259CORE_PROPERTY(1, 2, denormBehaviorIndependence);2260CORE_PROPERTY(1, 2, roundingModeIndependence);2261CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat16);2262CORE_PROPERTY(1, 2, shaderDenormPreserveFloat16);2263CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat16);2264CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat16);2265CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat16);2266CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat32);2267CORE_PROPERTY(1, 2, shaderDenormPreserveFloat32);2268CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat32);2269CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat32);2270CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat32);2271CORE_PROPERTY(1, 2, shaderDenormFlushToZeroFloat64);2272CORE_PROPERTY(1, 2, shaderDenormPreserveFloat64);2273CORE_PROPERTY(1, 2, shaderRoundingModeRTEFloat64);2274CORE_PROPERTY(1, 2, shaderRoundingModeRTZFloat64);2275CORE_PROPERTY(1, 2, shaderSignedZeroInfNanPreserveFloat64);2276break;2277}2278case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_TIMELINE_SEMAPHORE_PROPERTIES: {2279VkPhysicalDeviceTimelineSemaphoreProperties *properties =2280(VkPhysicalDeviceTimelineSemaphoreProperties *)ext;2281CORE_PROPERTY(1, 2, maxTimelineSemaphoreValueDifference);2282break;2283}2284case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_SIZE_CONTROL_PROPERTIES_EXT: {2285VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *props =2286(VkPhysicalDeviceSubgroupSizeControlPropertiesEXT *)ext;2287props->minSubgroupSize = 64;2288props->maxSubgroupSize = 64;2289props->maxComputeWorkgroupSubgroups = UINT32_MAX;2290props->requiredSubgroupSizeStages = 0;22912292if (pdevice->rad_info.chip_class >= GFX10) {2293/* Only GFX10+ supports wave32. */2294props->minSubgroupSize = 32;2295props->requiredSubgroupSizeStages = VK_SHADER_STAGE_COMPUTE_BIT;2296}2297break;2298}2299case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_1_PROPERTIES:2300radv_get_physical_device_properties_1_1(pdevice, (void *)ext);2301break;2302case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_VULKAN_1_2_PROPERTIES:2303radv_get_physical_device_properties_1_2(pdevice, (void *)ext);2304break;2305case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_LINE_RASTERIZATION_PROPERTIES_EXT: {2306VkPhysicalDeviceLineRasterizationPropertiesEXT *props =2307(VkPhysicalDeviceLineRasterizationPropertiesEXT *)ext;2308props->lineSubPixelPrecisionBits = 4;2309break;2310}2311case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_PROPERTIES_EXT: {2312VkPhysicalDeviceRobustness2PropertiesEXT *properties =2313(VkPhysicalDeviceRobustness2PropertiesEXT *)ext;2314properties->robustStorageBufferAccessSizeAlignment = 4;2315properties->robustUniformBufferAccessSizeAlignment = 4;2316break;2317}2318case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_PROPERTIES_EXT: {2319VkPhysicalDeviceCustomBorderColorPropertiesEXT *props =2320(VkPhysicalDeviceCustomBorderColorPropertiesEXT *)ext;2321props->maxCustomBorderColorSamplers = RADV_BORDER_COLOR_COUNT;2322break;2323}2324case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR: {2325VkPhysicalDeviceFragmentShadingRatePropertiesKHR *props =2326(VkPhysicalDeviceFragmentShadingRatePropertiesKHR *)ext;2327props->minFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};2328props->maxFragmentShadingRateAttachmentTexelSize = (VkExtent2D){8, 8};2329props->maxFragmentShadingRateAttachmentTexelSizeAspectRatio = 1;2330props->primitiveFragmentShadingRateWithMultipleViewports = true;2331props->layeredShadingRateAttachments = false; /* TODO */2332props->fragmentShadingRateNonTrivialCombinerOps = true;2333props->maxFragmentSize = (VkExtent2D){2, 2};2334props->maxFragmentSizeAspectRatio = 1;2335props->maxFragmentShadingRateCoverageSamples = 2 * 2;2336props->maxFragmentShadingRateRasterizationSamples = VK_SAMPLE_COUNT_8_BIT;2337props->fragmentShadingRateWithShaderDepthStencilWrites = false;2338props->fragmentShadingRateWithSampleMask = true;2339props->fragmentShadingRateWithShaderSampleMask = false;2340props->fragmentShadingRateWithConservativeRasterization = true;2341props->fragmentShadingRateWithFragmentShaderInterlock = false;2342props->fragmentShadingRateWithCustomSampleLocations = true;2343props->fragmentShadingRateStrictMultiplyCombiner = true;2344break;2345}2346case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROVOKING_VERTEX_PROPERTIES_EXT: {2347VkPhysicalDeviceProvokingVertexPropertiesEXT *props =2348(VkPhysicalDeviceProvokingVertexPropertiesEXT *)ext;2349props->provokingVertexModePerPipeline = true;2350props->transformFeedbackPreservesTriangleFanProvokingVertex = true;2351break;2352}2353case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ACCELERATION_STRUCTURE_PROPERTIES_KHR: {2354VkPhysicalDeviceAccelerationStructurePropertiesKHR *props =2355(VkPhysicalDeviceAccelerationStructurePropertiesKHR *)ext;2356props->maxGeometryCount = (1 << 24) - 1;2357props->maxInstanceCount = (1 << 24) - 1;2358props->maxPrimitiveCount = (1 << 29) - 1;2359props->maxPerStageDescriptorAccelerationStructures =2360pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;2361props->maxPerStageDescriptorUpdateAfterBindAccelerationStructures =2362pProperties->properties.limits.maxPerStageDescriptorStorageBuffers;2363props->maxDescriptorSetAccelerationStructures =2364pProperties->properties.limits.maxDescriptorSetStorageBuffers;2365props->maxDescriptorSetUpdateAfterBindAccelerationStructures =2366pProperties->properties.limits.maxDescriptorSetStorageBuffers;2367props->minAccelerationStructureScratchOffsetAlignment = 128;2368break;2369}2370#ifndef _WIN322371case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_DRM_PROPERTIES_EXT: {2372VkPhysicalDeviceDrmPropertiesEXT *props = (VkPhysicalDeviceDrmPropertiesEXT *)ext;2373if (pdevice->available_nodes & (1 << DRM_NODE_PRIMARY)) {2374props->hasPrimary = true;2375props->primaryMajor = (int64_t)major(pdevice->primary_devid);2376props->primaryMinor = (int64_t)minor(pdevice->primary_devid);2377} else {2378props->hasPrimary = false;2379}2380if (pdevice->available_nodes & (1 << DRM_NODE_RENDER)) {2381props->hasRender = true;2382props->renderMajor = (int64_t)major(pdevice->render_devid);2383props->renderMinor = (int64_t)minor(pdevice->render_devid);2384} else {2385props->hasRender = false;2386}2387break;2388}2389#endif2390case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_MULTI_DRAW_PROPERTIES_EXT: {2391VkPhysicalDeviceMultiDrawPropertiesEXT *props = (VkPhysicalDeviceMultiDrawPropertiesEXT *)ext;2392props->maxMultiDrawCount = 2048;2393break;2394}2395default:2396break;2397}2398}2399}24002401static void2402radv_get_physical_device_queue_family_properties(struct radv_physical_device *pdevice,2403uint32_t *pCount,2404VkQueueFamilyProperties **pQueueFamilyProperties)2405{2406int num_queue_families = 1;2407int idx;2408if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&2409!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE))2410num_queue_families++;24112412if (pQueueFamilyProperties == NULL) {2413*pCount = num_queue_families;2414return;2415}24162417if (!*pCount)2418return;24192420idx = 0;2421if (*pCount >= 1) {2422*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){2423.queueFlags = VK_QUEUE_GRAPHICS_BIT | VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT |2424VK_QUEUE_SPARSE_BINDING_BIT,2425.queueCount = 1,2426.timestampValidBits = 64,2427.minImageTransferGranularity = (VkExtent3D){1, 1, 1},2428};2429idx++;2430}24312432if (pdevice->rad_info.num_rings[RING_COMPUTE] > 0 &&2433!(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) {2434if (*pCount > idx) {2435*pQueueFamilyProperties[idx] = (VkQueueFamilyProperties){2436.queueFlags =2437VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT | VK_QUEUE_SPARSE_BINDING_BIT,2438.queueCount = pdevice->rad_info.num_rings[RING_COMPUTE],2439.timestampValidBits = 64,2440.minImageTransferGranularity = (VkExtent3D){1, 1, 1},2441};2442idx++;2443}2444}2445*pCount = idx;2446}24472448void2449radv_GetPhysicalDeviceQueueFamilyProperties(VkPhysicalDevice physicalDevice, uint32_t *pCount,2450VkQueueFamilyProperties *pQueueFamilyProperties)2451{2452RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);2453if (!pQueueFamilyProperties) {2454radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);2455return;2456}2457VkQueueFamilyProperties *properties[] = {2458pQueueFamilyProperties + 0,2459pQueueFamilyProperties + 1,2460pQueueFamilyProperties + 2,2461};2462radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);2463assert(*pCount <= 3);2464}24652466static const VkQueueGlobalPriorityEXT radv_global_queue_priorities[] = {2467VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT,2468VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT,2469VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT,2470VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT,2471};24722473void2474radv_GetPhysicalDeviceQueueFamilyProperties2(VkPhysicalDevice physicalDevice, uint32_t *pCount,2475VkQueueFamilyProperties2 *pQueueFamilyProperties)2476{2477RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);2478if (!pQueueFamilyProperties) {2479radv_get_physical_device_queue_family_properties(pdevice, pCount, NULL);2480return;2481}2482VkQueueFamilyProperties *properties[] = {2483&pQueueFamilyProperties[0].queueFamilyProperties,2484&pQueueFamilyProperties[1].queueFamilyProperties,2485&pQueueFamilyProperties[2].queueFamilyProperties,2486};2487radv_get_physical_device_queue_family_properties(pdevice, pCount, properties);2488assert(*pCount <= 3);24892490for (uint32_t i = 0; i < *pCount; i++) {2491vk_foreach_struct(ext, pQueueFamilyProperties[i].pNext)2492{2493switch (ext->sType) {2494case VK_STRUCTURE_TYPE_QUEUE_FAMILY_GLOBAL_PRIORITY_PROPERTIES_EXT: {2495VkQueueFamilyGlobalPriorityPropertiesEXT *prop =2496(VkQueueFamilyGlobalPriorityPropertiesEXT *)ext;2497STATIC_ASSERT(ARRAY_SIZE(radv_global_queue_priorities) <= VK_MAX_GLOBAL_PRIORITY_SIZE_EXT);2498prop->priorityCount = ARRAY_SIZE(radv_global_queue_priorities);2499memcpy(&prop->priorities, radv_global_queue_priorities, sizeof(radv_global_queue_priorities));2500break;2501}2502default:2503break;2504}2505}2506}2507}25082509void2510radv_GetPhysicalDeviceMemoryProperties(VkPhysicalDevice physicalDevice,2511VkPhysicalDeviceMemoryProperties *pMemoryProperties)2512{2513RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);25142515*pMemoryProperties = physical_device->memory_properties;2516}25172518static void2519radv_get_memory_budget_properties(VkPhysicalDevice physicalDevice,2520VkPhysicalDeviceMemoryBudgetPropertiesEXT *memoryBudget)2521{2522RADV_FROM_HANDLE(radv_physical_device, device, physicalDevice);2523VkPhysicalDeviceMemoryProperties *memory_properties = &device->memory_properties;25242525/* For all memory heaps, the computation of budget is as follow:2526* heap_budget = heap_size - global_heap_usage + app_heap_usage2527*2528* The Vulkan spec 1.1.97 says that the budget should include any2529* currently allocated device memory.2530*2531* Note that the application heap usages are not really accurate (eg.2532* in presence of shared buffers).2533*/2534if (!device->rad_info.has_dedicated_vram) {2535/* On APUs, the driver exposes fake heaps to the application because usually the carveout is2536* too small for games but the budgets need to be redistributed accordingly.2537*/25382539assert(device->heaps == (RADV_HEAP_GTT | RADV_HEAP_VRAM_VIS));2540assert(device->memory_properties.memoryHeaps[0].flags == 0); /* GTT */2541assert(device->memory_properties.memoryHeaps[1].flags == VK_MEMORY_HEAP_DEVICE_LOCAL_BIT);2542uint8_t gtt_heap_idx = 0, vram_vis_heap_idx = 1;25432544/* Get the visible VRAM/GTT heap sizes and internal usages. */2545uint64_t gtt_heap_size = device->memory_properties.memoryHeaps[gtt_heap_idx].size;2546uint64_t vram_vis_heap_size = device->memory_properties.memoryHeaps[vram_vis_heap_idx].size;25472548uint64_t vram_vis_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS) +2549device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);2550uint64_t gtt_internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);25512552/* Compute the total heap size, internal and system usage. */2553uint64_t total_heap_size = vram_vis_heap_size + gtt_heap_size;2554uint64_t total_internal_usage = vram_vis_internal_usage + gtt_internal_usage;2555uint64_t total_system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE) +2556device->ws->query_value(device->ws, RADEON_GTT_USAGE);25572558uint64_t total_usage = MAX2(total_internal_usage, total_system_usage);25592560/* Compute the total free space that can be allocated for this process accross all heaps. */2561uint64_t total_free_space = total_heap_size - MIN2(total_heap_size, total_usage);25622563/* Compute the remaining visible VRAM size for this process. */2564uint64_t vram_vis_free_space = vram_vis_heap_size - MIN2(vram_vis_heap_size, vram_vis_internal_usage);25652566/* Distribute the total free space (2/3rd as VRAM and 1/3rd as GTT) to match the heap sizes,2567* and align down to the page size to be conservative.2568*/2569vram_vis_free_space = ROUND_DOWN_TO(MIN2((total_free_space * 2) / 3, vram_vis_free_space),2570device->rad_info.gart_page_size);2571uint64_t gtt_free_space = total_free_space - vram_vis_free_space;25722573memoryBudget->heapBudget[vram_vis_heap_idx] = vram_vis_free_space + vram_vis_internal_usage;2574memoryBudget->heapUsage[vram_vis_heap_idx] = vram_vis_internal_usage;2575memoryBudget->heapBudget[gtt_heap_idx] = gtt_free_space + gtt_internal_usage;2576memoryBudget->heapUsage[gtt_heap_idx] = gtt_internal_usage;2577} else {2578unsigned mask = device->heaps;2579unsigned heap = 0;2580while (mask) {2581uint64_t internal_usage = 0, system_usage = 0;2582unsigned type = 1u << u_bit_scan(&mask);25832584switch (type) {2585case RADV_HEAP_VRAM:2586internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);2587system_usage = device->ws->query_value(device->ws, RADEON_VRAM_USAGE);2588break;2589case RADV_HEAP_VRAM_VIS:2590internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM_VIS);2591if (!(device->heaps & RADV_HEAP_VRAM))2592internal_usage += device->ws->query_value(device->ws, RADEON_ALLOCATED_VRAM);2593system_usage = device->ws->query_value(device->ws, RADEON_VRAM_VIS_USAGE);2594break;2595case RADV_HEAP_GTT:2596internal_usage = device->ws->query_value(device->ws, RADEON_ALLOCATED_GTT);2597system_usage = device->ws->query_value(device->ws, RADEON_GTT_USAGE);2598break;2599}26002601uint64_t total_usage = MAX2(internal_usage, system_usage);26022603uint64_t free_space = device->memory_properties.memoryHeaps[heap].size -2604MIN2(device->memory_properties.memoryHeaps[heap].size, total_usage);2605memoryBudget->heapBudget[heap] = free_space + internal_usage;2606memoryBudget->heapUsage[heap] = internal_usage;2607++heap;2608}26092610assert(heap == memory_properties->memoryHeapCount);2611}26122613/* The heapBudget and heapUsage values must be zero for array elements2614* greater than or equal to2615* VkPhysicalDeviceMemoryProperties::memoryHeapCount.2616*/2617for (uint32_t i = memory_properties->memoryHeapCount; i < VK_MAX_MEMORY_HEAPS; i++) {2618memoryBudget->heapBudget[i] = 0;2619memoryBudget->heapUsage[i] = 0;2620}2621}26222623void2624radv_GetPhysicalDeviceMemoryProperties2(VkPhysicalDevice physicalDevice,2625VkPhysicalDeviceMemoryProperties2 *pMemoryProperties)2626{2627radv_GetPhysicalDeviceMemoryProperties(physicalDevice, &pMemoryProperties->memoryProperties);26282629VkPhysicalDeviceMemoryBudgetPropertiesEXT *memory_budget =2630vk_find_struct(pMemoryProperties->pNext, PHYSICAL_DEVICE_MEMORY_BUDGET_PROPERTIES_EXT);2631if (memory_budget)2632radv_get_memory_budget_properties(physicalDevice, memory_budget);2633}26342635VkResult2636radv_GetMemoryHostPointerPropertiesEXT(2637VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType, const void *pHostPointer,2638VkMemoryHostPointerPropertiesEXT *pMemoryHostPointerProperties)2639{2640RADV_FROM_HANDLE(radv_device, device, _device);26412642switch (handleType) {2643case VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT: {2644const struct radv_physical_device *physical_device = device->physical_device;2645uint32_t memoryTypeBits = 0;2646for (int i = 0; i < physical_device->memory_properties.memoryTypeCount; i++) {2647if (physical_device->memory_domains[i] == RADEON_DOMAIN_GTT &&2648!(physical_device->memory_flags[i] & RADEON_FLAG_GTT_WC)) {2649memoryTypeBits = (1 << i);2650break;2651}2652}2653pMemoryHostPointerProperties->memoryTypeBits = memoryTypeBits;2654return VK_SUCCESS;2655}2656default:2657return VK_ERROR_INVALID_EXTERNAL_HANDLE;2658}2659}26602661static enum radeon_ctx_priority2662radv_get_queue_global_priority(const VkDeviceQueueGlobalPriorityCreateInfoEXT *pObj)2663{2664/* Default to MEDIUM when a specific global priority isn't requested */2665if (!pObj)2666return RADEON_CTX_PRIORITY_MEDIUM;26672668switch (pObj->globalPriority) {2669case VK_QUEUE_GLOBAL_PRIORITY_REALTIME_EXT:2670return RADEON_CTX_PRIORITY_REALTIME;2671case VK_QUEUE_GLOBAL_PRIORITY_HIGH_EXT:2672return RADEON_CTX_PRIORITY_HIGH;2673case VK_QUEUE_GLOBAL_PRIORITY_MEDIUM_EXT:2674return RADEON_CTX_PRIORITY_MEDIUM;2675case VK_QUEUE_GLOBAL_PRIORITY_LOW_EXT:2676return RADEON_CTX_PRIORITY_LOW;2677default:2678unreachable("Illegal global priority value");2679return RADEON_CTX_PRIORITY_INVALID;2680}2681}26822683static int2684radv_queue_init(struct radv_device *device, struct radv_queue *queue, uint32_t queue_family_index,2685int idx, VkDeviceQueueCreateFlags flags,2686const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority)2687{2688queue->device = device;2689queue->queue_family_index = queue_family_index;2690queue->queue_idx = idx;2691queue->priority = radv_get_queue_global_priority(global_priority);2692queue->flags = flags;2693queue->hw_ctx = device->hw_ctx[queue->priority];26942695vk_object_base_init(&device->vk, &queue->base, VK_OBJECT_TYPE_QUEUE);26962697list_inithead(&queue->pending_submissions);2698mtx_init(&queue->pending_mutex, mtx_plain);26992700mtx_init(&queue->thread_mutex, mtx_plain);2701if (u_cnd_monotonic_init(&queue->thread_cond)) {2702vk_object_base_finish(&queue->base);2703return vk_error(device->instance, VK_ERROR_INITIALIZATION_FAILED);2704}2705queue->cond_created = true;27062707return VK_SUCCESS;2708}27092710static void2711radv_queue_finish(struct radv_queue *queue)2712{2713if (queue->hw_ctx) {2714if (queue->cond_created) {2715if (queue->thread_running) {2716p_atomic_set(&queue->thread_exit, true);2717u_cnd_monotonic_broadcast(&queue->thread_cond);2718thrd_join(queue->submission_thread, NULL);2719}27202721u_cnd_monotonic_destroy(&queue->thread_cond);2722}27232724mtx_destroy(&queue->pending_mutex);2725mtx_destroy(&queue->thread_mutex);2726}27272728if (queue->initial_full_flush_preamble_cs)2729queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);2730if (queue->initial_preamble_cs)2731queue->device->ws->cs_destroy(queue->initial_preamble_cs);2732if (queue->continue_preamble_cs)2733queue->device->ws->cs_destroy(queue->continue_preamble_cs);2734if (queue->descriptor_bo)2735queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);2736if (queue->scratch_bo)2737queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);2738if (queue->esgs_ring_bo)2739queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);2740if (queue->gsvs_ring_bo)2741queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);2742if (queue->tess_rings_bo)2743queue->device->ws->buffer_destroy(queue->device->ws, queue->tess_rings_bo);2744if (queue->gds_bo)2745queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_bo);2746if (queue->gds_oa_bo)2747queue->device->ws->buffer_destroy(queue->device->ws, queue->gds_oa_bo);2748if (queue->compute_scratch_bo)2749queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);27502751vk_object_base_finish(&queue->base);2752}27532754static void2755radv_device_init_gs_info(struct radv_device *device)2756{2757device->gs_table_depth = ac_get_gs_table_depth(device->physical_device->rad_info.chip_class,2758device->physical_device->rad_info.family);2759}27602761static VkResult2762check_physical_device_features(VkPhysicalDevice physicalDevice,2763const VkPhysicalDeviceFeatures *features)2764{2765RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);2766VkPhysicalDeviceFeatures supported_features;2767radv_GetPhysicalDeviceFeatures(physicalDevice, &supported_features);2768VkBool32 *supported_feature = (VkBool32 *)&supported_features;2769VkBool32 *enabled_feature = (VkBool32 *)features;2770unsigned num_features = sizeof(VkPhysicalDeviceFeatures) / sizeof(VkBool32);2771for (uint32_t i = 0; i < num_features; i++) {2772if (enabled_feature[i] && !supported_feature[i])2773return vk_error(physical_device->instance, VK_ERROR_FEATURE_NOT_PRESENT);2774}27752776return VK_SUCCESS;2777}27782779static VkResult2780radv_device_init_border_color(struct radv_device *device)2781{2782VkResult result;27832784result = device->ws->buffer_create(2785device->ws, RADV_BORDER_COLOR_BUFFER_SIZE, 4096, RADEON_DOMAIN_VRAM,2786RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_READ_ONLY | RADEON_FLAG_NO_INTERPROCESS_SHARING,2787RADV_BO_PRIORITY_SHADER, 0, &device->border_color_data.bo);27882789if (result != VK_SUCCESS)2790return vk_error(device->physical_device->instance, result);27912792result = device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, true);2793if (result != VK_SUCCESS)2794return vk_error(device->physical_device->instance, result);27952796device->border_color_data.colors_gpu_ptr = device->ws->buffer_map(device->border_color_data.bo);2797if (!device->border_color_data.colors_gpu_ptr)2798return vk_error(device->physical_device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);2799mtx_init(&device->border_color_data.mutex, mtx_plain);28002801return VK_SUCCESS;2802}28032804static void2805radv_device_finish_border_color(struct radv_device *device)2806{2807if (device->border_color_data.bo) {2808device->ws->buffer_make_resident(device->ws, device->border_color_data.bo, false);2809device->ws->buffer_destroy(device->ws, device->border_color_data.bo);28102811mtx_destroy(&device->border_color_data.mutex);2812}2813}28142815VkResult2816radv_device_init_vrs_image(struct radv_device *device)2817{2818/* FIXME: 4k depth buffers should be large enough for now but we might want to adjust this2819* dynamically at some point. Also, it's probably better to use S8_UINT but no HTILE support yet.2820*/2821uint32_t width = 4096, height = 4096;2822VkMemoryRequirements mem_req;2823VkDeviceMemory mem;2824VkResult result;2825VkImage image;28262827VkImageCreateInfo image_create_info = {2828.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,2829.imageType = VK_IMAGE_TYPE_2D,2830.format = VK_FORMAT_D16_UNORM,2831.extent = {width, height, 1},2832.mipLevels = 1,2833.arrayLayers = 1,2834.samples = VK_SAMPLE_COUNT_1_BIT,2835.tiling = VK_IMAGE_TILING_OPTIMAL,2836.usage = VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT,2837.sharingMode = VK_SHARING_MODE_EXCLUSIVE,2838.queueFamilyIndexCount = 0,2839.pQueueFamilyIndices = NULL,2840.initialLayout = VK_IMAGE_LAYOUT_UNDEFINED,2841};28422843result = radv_CreateImage(radv_device_to_handle(device), &image_create_info,2844&device->meta_state.alloc, &image);2845if (result != VK_SUCCESS)2846return result;28472848radv_GetImageMemoryRequirements(radv_device_to_handle(device), image, &mem_req);28492850VkMemoryAllocateInfo alloc_info = {2851.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,2852.allocationSize = mem_req.size,2853};28542855result = radv_AllocateMemory(radv_device_to_handle(device), &alloc_info,2856&device->meta_state.alloc, &mem);2857if (result != VK_SUCCESS)2858goto fail_alloc;28592860result = radv_BindImageMemory(radv_device_to_handle(device), image, mem, 0);2861if (result != VK_SUCCESS)2862goto fail_bind;28632864device->vrs.image = radv_image_from_handle(image);2865device->vrs.mem = radv_device_memory_from_handle(mem);28662867return VK_SUCCESS;28682869fail_bind:2870radv_FreeMemory(radv_device_to_handle(device), mem, &device->meta_state.alloc);2871fail_alloc:2872radv_DestroyImage(radv_device_to_handle(device), image, &device->meta_state.alloc);28732874return result;2875}28762877static void2878radv_device_finish_vrs_image(struct radv_device *device)2879{2880radv_FreeMemory(radv_device_to_handle(device), radv_device_memory_to_handle(device->vrs.mem),2881&device->meta_state.alloc);2882radv_DestroyImage(radv_device_to_handle(device), radv_image_to_handle(device->vrs.image),2883&device->meta_state.alloc);2884}28852886VkResult2887_radv_device_set_lost(struct radv_device *device, const char *file, int line, const char *msg, ...)2888{2889VkResult err;2890va_list ap;28912892p_atomic_inc(&device->lost);28932894va_start(ap, msg);2895err =2896__vk_errorv(device->physical_device->instance, device, VK_DEBUG_REPORT_OBJECT_TYPE_DEVICE_EXT,2897VK_ERROR_DEVICE_LOST, file, line, msg, ap);2898va_end(ap);28992900return err;2901}29022903VkResult2904radv_CreateDevice(VkPhysicalDevice physicalDevice, const VkDeviceCreateInfo *pCreateInfo,2905const VkAllocationCallbacks *pAllocator, VkDevice *pDevice)2906{2907RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);2908VkResult result;2909struct radv_device *device;29102911bool keep_shader_info = false;2912bool robust_buffer_access = false;2913bool robust_buffer_access2 = false;2914bool overallocation_disallowed = false;2915bool custom_border_colors = false;2916bool vrs_enabled = false;2917bool attachment_vrs_enabled = false;29182919/* Check enabled features */2920if (pCreateInfo->pEnabledFeatures) {2921result = check_physical_device_features(physicalDevice, pCreateInfo->pEnabledFeatures);2922if (result != VK_SUCCESS)2923return result;29242925if (pCreateInfo->pEnabledFeatures->robustBufferAccess)2926robust_buffer_access = true;2927}29282929vk_foreach_struct_const(ext, pCreateInfo->pNext)2930{2931switch (ext->sType) {2932case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FEATURES_2: {2933const VkPhysicalDeviceFeatures2 *features = (const void *)ext;2934result = check_physical_device_features(physicalDevice, &features->features);2935if (result != VK_SUCCESS)2936return result;29372938if (features->features.robustBufferAccess)2939robust_buffer_access = true;2940break;2941}2942case VK_STRUCTURE_TYPE_DEVICE_MEMORY_OVERALLOCATION_CREATE_INFO_AMD: {2943const VkDeviceMemoryOverallocationCreateInfoAMD *overallocation = (const void *)ext;2944if (overallocation->overallocationBehavior ==2945VK_MEMORY_OVERALLOCATION_BEHAVIOR_DISALLOWED_AMD)2946overallocation_disallowed = true;2947break;2948}2949case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_CUSTOM_BORDER_COLOR_FEATURES_EXT: {2950const VkPhysicalDeviceCustomBorderColorFeaturesEXT *border_color_features =2951(const void *)ext;2952custom_border_colors = border_color_features->customBorderColors;2953break;2954}2955case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_FEATURES_KHR: {2956const VkPhysicalDeviceFragmentShadingRateFeaturesKHR *vrs = (const void *)ext;2957attachment_vrs_enabled = vrs->attachmentFragmentShadingRate;2958vrs_enabled = vrs->pipelineFragmentShadingRate || vrs->primitiveFragmentShadingRate ||2959attachment_vrs_enabled;2960break;2961}2962case VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_ROBUSTNESS_2_FEATURES_EXT: {2963const VkPhysicalDeviceRobustness2FeaturesEXT *features = (const void *)ext;2964if (features->robustBufferAccess2)2965robust_buffer_access2 = true;2966break;2967}2968default:2969break;2970}2971}29722973device = vk_zalloc2(&physical_device->instance->vk.alloc, pAllocator, sizeof(*device), 8,2974VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);2975if (!device)2976return vk_error(physical_device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);29772978struct vk_device_dispatch_table dispatch_table;29792980if (radv_thread_trace_enabled()) {2981vk_device_dispatch_table_from_entrypoints(&dispatch_table, &sqtt_device_entrypoints, true);2982vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, false);2983} else {2984vk_device_dispatch_table_from_entrypoints(&dispatch_table, &radv_device_entrypoints, true);2985}29862987result =2988vk_device_init(&device->vk, &physical_device->vk, &dispatch_table, pCreateInfo, pAllocator);2989if (result != VK_SUCCESS) {2990vk_free(&device->vk.alloc, device);2991return result;2992}29932994device->instance = physical_device->instance;2995device->physical_device = physical_device;29962997device->ws = physical_device->ws;29982999keep_shader_info = device->vk.enabled_extensions.AMD_shader_info;30003001/* With update after bind we can't attach bo's to the command buffer3002* from the descriptor set anymore, so we have to use a global BO list.3003*/3004device->use_global_bo_list = (device->instance->perftest_flags & RADV_PERFTEST_BO_LIST) ||3005device->vk.enabled_extensions.EXT_descriptor_indexing ||3006device->vk.enabled_extensions.EXT_buffer_device_address ||3007device->vk.enabled_extensions.KHR_buffer_device_address ||3008device->vk.enabled_extensions.KHR_ray_tracing_pipeline ||3009device->vk.enabled_extensions.KHR_acceleration_structure;30103011device->robust_buffer_access = robust_buffer_access || robust_buffer_access2;3012device->robust_buffer_access2 = robust_buffer_access2;30133014device->adjust_frag_coord_z =3015(vrs_enabled || device->vk.enabled_extensions.KHR_fragment_shading_rate ||3016device->force_vrs != RADV_FORCE_VRS_NONE) &&3017(device->physical_device->rad_info.family == CHIP_SIENNA_CICHLID ||3018device->physical_device->rad_info.family == CHIP_NAVY_FLOUNDER ||3019device->physical_device->rad_info.family == CHIP_VANGOGH);3020device->attachment_vrs_enabled = attachment_vrs_enabled;30213022mtx_init(&device->shader_slab_mutex, mtx_plain);3023list_inithead(&device->shader_slabs);30243025device->overallocation_disallowed = overallocation_disallowed;3026mtx_init(&device->overallocation_mutex, mtx_plain);30273028/* Create one context per queue priority. */3029for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {3030const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];3031const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =3032vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);3033enum radeon_ctx_priority priority = radv_get_queue_global_priority(global_priority);30343035if (device->hw_ctx[priority])3036continue;30373038result = device->ws->ctx_create(device->ws, priority, &device->hw_ctx[priority]);3039if (result != VK_SUCCESS)3040goto fail;3041}30423043for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) {3044const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i];3045uint32_t qfi = queue_create->queueFamilyIndex;3046const VkDeviceQueueGlobalPriorityCreateInfoEXT *global_priority =3047vk_find_struct_const(queue_create->pNext, DEVICE_QUEUE_GLOBAL_PRIORITY_CREATE_INFO_EXT);30483049device->queues[qfi] =3050vk_alloc(&device->vk.alloc, queue_create->queueCount * sizeof(struct radv_queue), 8,3051VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);3052if (!device->queues[qfi]) {3053result = VK_ERROR_OUT_OF_HOST_MEMORY;3054goto fail;3055}30563057memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue));30583059device->queue_count[qfi] = queue_create->queueCount;30603061for (unsigned q = 0; q < queue_create->queueCount; q++) {3062result = radv_queue_init(device, &device->queues[qfi][q], qfi, q, queue_create->flags,3063global_priority);3064if (result != VK_SUCCESS)3065goto fail;3066}3067}30683069device->pbb_allowed = device->physical_device->rad_info.chip_class >= GFX9 &&3070!(device->instance->debug_flags & RADV_DEBUG_NOBINNING);30713072/* The maximum number of scratch waves. Scratch space isn't divided3073* evenly between CUs. The number is only a function of the number of CUs.3074* We can decrease the constant to decrease the scratch buffer size.3075*3076* sctx->scratch_waves must be >= the maximum possible size of3077* 1 threadgroup, so that the hw doesn't hang from being unable3078* to start any.3079*3080* The recommended value is 4 per CU at most. Higher numbers don't3081* bring much benefit, but they still occupy chip resources (think3082* async compute). I've seen ~2% performance difference between 4 and 32.3083*/3084uint32_t max_threads_per_block = 2048;3085device->scratch_waves =3086MAX2(32 * physical_device->rad_info.num_good_compute_units, max_threads_per_block / 64);30873088device->dispatch_initiator = S_00B800_COMPUTE_SHADER_EN(1);30893090if (device->physical_device->rad_info.chip_class >= GFX7) {3091/* If the KMD allows it (there is a KMD hw register for it),3092* allow launching waves out-of-order.3093*/3094device->dispatch_initiator |= S_00B800_ORDER_MODE(1);3095}30963097radv_device_init_gs_info(device);30983099device->tess_offchip_block_dw_size =3100device->physical_device->rad_info.family == CHIP_HAWAII ? 4096 : 8192;31013102if (getenv("RADV_TRACE_FILE")) {3103fprintf(3104stderr,3105"***********************************************************************************\n");3106fprintf(3107stderr,3108"* WARNING: RADV_TRACE_FILE=<file> is deprecated and replaced by RADV_DEBUG=hang *\n");3109fprintf(3110stderr,3111"***********************************************************************************\n");3112abort();3113}31143115if (device->instance->debug_flags & RADV_DEBUG_HANG) {3116/* Enable GPU hangs detection and dump logs if a GPU hang is3117* detected.3118*/3119keep_shader_info = true;31203121if (!radv_init_trace(device))3122goto fail;31233124fprintf(stderr,3125"*****************************************************************************\n");3126fprintf(stderr,3127"* WARNING: RADV_DEBUG=hang is costly and should only be used for debugging! *\n");3128fprintf(stderr,3129"*****************************************************************************\n");31303131/* Wait for idle after every draw/dispatch to identify the3132* first bad call.3133*/3134device->instance->debug_flags |= RADV_DEBUG_SYNC_SHADERS;31353136radv_dump_enabled_options(device, stderr);3137}31383139if (radv_thread_trace_enabled()) {3140fprintf(stderr, "*************************************************\n");3141fprintf(stderr, "* WARNING: Thread trace support is experimental *\n");3142fprintf(stderr, "*************************************************\n");31433144if (device->physical_device->rad_info.chip_class < GFX8 ||3145device->physical_device->rad_info.chip_class > GFX10_3) {3146fprintf(stderr, "GPU hardware not supported: refer to "3147"the RGP documentation for the list of "3148"supported GPUs!\n");3149abort();3150}31513152if (!radv_thread_trace_init(device))3153goto fail;3154}31553156if (getenv("RADV_TRAP_HANDLER")) {3157/* TODO: Add support for more hardware. */3158assert(device->physical_device->rad_info.chip_class == GFX8);31593160fprintf(stderr, "**********************************************************************\n");3161fprintf(stderr, "* WARNING: RADV_TRAP_HANDLER is experimental and only for debugging! *\n");3162fprintf(stderr, "**********************************************************************\n");31633164/* To get the disassembly of the faulty shaders, we have to3165* keep some shader info around.3166*/3167keep_shader_info = true;31683169if (!radv_trap_handler_init(device))3170goto fail;3171}31723173if (getenv("RADV_FORCE_VRS")) {3174const char *vrs_rates = getenv("RADV_FORCE_VRS");31753176if (device->physical_device->rad_info.chip_class < GFX10_3)3177fprintf(stderr, "radv: VRS is only supported on RDNA2+\n");3178else if (!strcmp(vrs_rates, "2x2"))3179device->force_vrs = RADV_FORCE_VRS_2x2;3180else if (!strcmp(vrs_rates, "2x1"))3181device->force_vrs = RADV_FORCE_VRS_2x1;3182else if (!strcmp(vrs_rates, "1x2"))3183device->force_vrs = RADV_FORCE_VRS_1x2;3184else3185fprintf(stderr, "radv: Invalid VRS rates specified "3186"(valid values are 2x2, 2x1 and 1x2)\n");3187}31883189device->keep_shader_info = keep_shader_info;3190result = radv_device_init_meta(device);3191if (result != VK_SUCCESS)3192goto fail;31933194radv_device_init_msaa(device);31953196/* If the border color extension is enabled, let's create the buffer we need. */3197if (custom_border_colors) {3198result = radv_device_init_border_color(device);3199if (result != VK_SUCCESS)3200goto fail;3201}32023203for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) {3204device->empty_cs[family] = device->ws->cs_create(device->ws, family);3205if (!device->empty_cs[family])3206goto fail;32073208switch (family) {3209case RADV_QUEUE_GENERAL:3210radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0));3211radeon_emit(device->empty_cs[family], CC0_UPDATE_LOAD_ENABLES(1));3212radeon_emit(device->empty_cs[family], CC1_UPDATE_SHADOW_ENABLES(1));3213break;3214case RADV_QUEUE_COMPUTE:3215radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0));3216radeon_emit(device->empty_cs[family], 0);3217break;3218}32193220result = device->ws->cs_finalize(device->empty_cs[family]);3221if (result != VK_SUCCESS)3222goto fail;3223}32243225if (device->physical_device->rad_info.chip_class >= GFX7)3226cik_create_gfx_config(device);32273228VkPipelineCacheCreateInfo ci;3229ci.sType = VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO;3230ci.pNext = NULL;3231ci.flags = 0;3232ci.pInitialData = NULL;3233ci.initialDataSize = 0;3234VkPipelineCache pc;3235result = radv_CreatePipelineCache(radv_device_to_handle(device), &ci, NULL, &pc);3236if (result != VK_SUCCESS)3237goto fail_meta;32383239device->mem_cache = radv_pipeline_cache_from_handle(pc);32403241if (u_cnd_monotonic_init(&device->timeline_cond)) {3242result = VK_ERROR_INITIALIZATION_FAILED;3243goto fail_mem_cache;3244}32453246device->force_aniso = MIN2(16, radv_get_int_debug_option("RADV_TEX_ANISO", -1));3247if (device->force_aniso >= 0) {3248fprintf(stderr, "radv: Forcing anisotropy filter to %ix\n",32491 << util_logbase2(device->force_aniso));3250}32513252*pDevice = radv_device_to_handle(device);3253return VK_SUCCESS;32543255fail_mem_cache:3256radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);3257fail_meta:3258radv_device_finish_meta(device);3259fail:3260radv_thread_trace_finish(device);3261free(device->thread_trace.trigger_file);32623263radv_trap_handler_finish(device);3264radv_finish_trace(device);32653266if (device->gfx_init)3267device->ws->buffer_destroy(device->ws, device->gfx_init);32683269radv_device_finish_border_color(device);32703271for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {3272for (unsigned q = 0; q < device->queue_count[i]; q++)3273radv_queue_finish(&device->queues[i][q]);3274if (device->queue_count[i])3275vk_free(&device->vk.alloc, device->queues[i]);3276}32773278for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {3279if (device->hw_ctx[i])3280device->ws->ctx_destroy(device->hw_ctx[i]);3281}32823283vk_device_finish(&device->vk);3284vk_free(&device->vk.alloc, device);3285return result;3286}32873288void3289radv_DestroyDevice(VkDevice _device, const VkAllocationCallbacks *pAllocator)3290{3291RADV_FROM_HANDLE(radv_device, device, _device);32923293if (!device)3294return;32953296if (device->gfx_init)3297device->ws->buffer_destroy(device->ws, device->gfx_init);32983299radv_device_finish_border_color(device);3300radv_device_finish_vrs_image(device);33013302for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {3303for (unsigned q = 0; q < device->queue_count[i]; q++)3304radv_queue_finish(&device->queues[i][q]);3305if (device->queue_count[i])3306vk_free(&device->vk.alloc, device->queues[i]);3307if (device->empty_cs[i])3308device->ws->cs_destroy(device->empty_cs[i]);3309}33103311for (unsigned i = 0; i < RADV_NUM_HW_CTX; i++) {3312if (device->hw_ctx[i])3313device->ws->ctx_destroy(device->hw_ctx[i]);3314}33153316radv_device_finish_meta(device);33173318VkPipelineCache pc = radv_pipeline_cache_to_handle(device->mem_cache);3319radv_DestroyPipelineCache(radv_device_to_handle(device), pc, NULL);33203321radv_trap_handler_finish(device);3322radv_finish_trace(device);33233324radv_destroy_shader_slabs(device);33253326u_cnd_monotonic_destroy(&device->timeline_cond);33273328free(device->thread_trace.trigger_file);3329radv_thread_trace_finish(device);33303331vk_device_finish(&device->vk);3332vk_free(&device->vk.alloc, device);3333}33343335VkResult3336radv_EnumerateInstanceLayerProperties(uint32_t *pPropertyCount, VkLayerProperties *pProperties)3337{3338if (pProperties == NULL) {3339*pPropertyCount = 0;3340return VK_SUCCESS;3341}33423343/* None supported at this time */3344return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);3345}33463347VkResult3348radv_EnumerateDeviceLayerProperties(VkPhysicalDevice physicalDevice, uint32_t *pPropertyCount,3349VkLayerProperties *pProperties)3350{3351if (pProperties == NULL) {3352*pPropertyCount = 0;3353return VK_SUCCESS;3354}33553356/* None supported at this time */3357return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);3358}33593360void3361radv_GetDeviceQueue2(VkDevice _device, const VkDeviceQueueInfo2 *pQueueInfo, VkQueue *pQueue)3362{3363RADV_FROM_HANDLE(radv_device, device, _device);3364struct radv_queue *queue;33653366queue = &device->queues[pQueueInfo->queueFamilyIndex][pQueueInfo->queueIndex];3367if (pQueueInfo->flags != queue->flags) {3368/* From the Vulkan 1.1.70 spec:3369*3370* "The queue returned by vkGetDeviceQueue2 must have the same3371* flags value from this structure as that used at device3372* creation time in a VkDeviceQueueCreateInfo instance. If no3373* matching flags were specified at device creation time then3374* pQueue will return VK_NULL_HANDLE."3375*/3376*pQueue = VK_NULL_HANDLE;3377return;3378}33793380*pQueue = radv_queue_to_handle(queue);3381}33823383void3384radv_GetDeviceQueue(VkDevice _device, uint32_t queueFamilyIndex, uint32_t queueIndex,3385VkQueue *pQueue)3386{3387const VkDeviceQueueInfo2 info =3388(VkDeviceQueueInfo2){.sType = VK_STRUCTURE_TYPE_DEVICE_QUEUE_INFO_2,3389.queueFamilyIndex = queueFamilyIndex,3390.queueIndex = queueIndex};33913392radv_GetDeviceQueue2(_device, &info, pQueue);3393}33943395static void3396fill_geom_tess_rings(struct radv_queue *queue, uint32_t *map, bool add_sample_positions,3397uint32_t esgs_ring_size, struct radeon_winsys_bo *esgs_ring_bo,3398uint32_t gsvs_ring_size, struct radeon_winsys_bo *gsvs_ring_bo,3399uint32_t tess_factor_ring_size, uint32_t tess_offchip_ring_offset,3400uint32_t tess_offchip_ring_size, struct radeon_winsys_bo *tess_rings_bo)3401{3402uint32_t *desc = &map[4];34033404if (esgs_ring_bo) {3405uint64_t esgs_va = radv_buffer_get_va(esgs_ring_bo);34063407/* stride 0, num records - size, add tid, swizzle, elsize4,3408index stride 64 */3409desc[0] = esgs_va;3410desc[1] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32) | S_008F04_SWIZZLE_ENABLE(true);3411desc[2] = esgs_ring_size;3412desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |3413S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |3414S_008F0C_INDEX_STRIDE(3) | S_008F0C_ADD_TID_ENABLE(1);34153416if (queue->device->physical_device->rad_info.chip_class >= GFX10) {3417desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |3418S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);3419} else {3420desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |3421S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);3422}34233424/* GS entry for ES->GS ring */3425/* stride 0, num records - size, elsize0,3426index stride 0 */3427desc[4] = esgs_va;3428desc[5] = S_008F04_BASE_ADDRESS_HI(esgs_va >> 32);3429desc[6] = esgs_ring_size;3430desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |3431S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);34323433if (queue->device->physical_device->rad_info.chip_class >= GFX10) {3434desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |3435S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);3436} else {3437desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |3438S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);3439}3440}34413442desc += 8;34433444if (gsvs_ring_bo) {3445uint64_t gsvs_va = radv_buffer_get_va(gsvs_ring_bo);34463447/* VS entry for GS->VS ring */3448/* stride 0, num records - size, elsize0,3449index stride 0 */3450desc[0] = gsvs_va;3451desc[1] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32);3452desc[2] = gsvs_ring_size;3453desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |3454S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);34553456if (queue->device->physical_device->rad_info.chip_class >= GFX10) {3457desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |3458S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);3459} else {3460desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |3461S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);3462}34633464/* stride gsvs_itemsize, num records 643465elsize 4, index stride 16 */3466/* shader will patch stride and desc[2] */3467desc[4] = gsvs_va;3468desc[5] = S_008F04_BASE_ADDRESS_HI(gsvs_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);3469desc[6] = 0;3470desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |3471S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |3472S_008F0C_INDEX_STRIDE(1) | S_008F0C_ADD_TID_ENABLE(true);34733474if (queue->device->physical_device->rad_info.chip_class >= GFX10) {3475desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |3476S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_DISABLED) | S_008F0C_RESOURCE_LEVEL(1);3477} else {3478desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |3479S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32) | S_008F0C_ELEMENT_SIZE(1);3480}3481}34823483desc += 8;34843485if (tess_rings_bo) {3486uint64_t tess_va = radv_buffer_get_va(tess_rings_bo);3487uint64_t tess_offchip_va = tess_va + tess_offchip_ring_offset;34883489desc[0] = tess_va;3490desc[1] = S_008F04_BASE_ADDRESS_HI(tess_va >> 32);3491desc[2] = tess_factor_ring_size;3492desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |3493S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);34943495if (queue->device->physical_device->rad_info.chip_class >= GFX10) {3496desc[3] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |3497S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);3498} else {3499desc[3] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |3500S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);3501}35023503desc[4] = tess_offchip_va;3504desc[5] = S_008F04_BASE_ADDRESS_HI(tess_offchip_va >> 32);3505desc[6] = tess_offchip_ring_size;3506desc[7] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |3507S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W);35083509if (queue->device->physical_device->rad_info.chip_class >= GFX10) {3510desc[7] |= S_008F0C_FORMAT(V_008F0C_GFX10_FORMAT_32_FLOAT) |3511S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_RAW) | S_008F0C_RESOURCE_LEVEL(1);3512} else {3513desc[7] |= S_008F0C_NUM_FORMAT(V_008F0C_BUF_NUM_FORMAT_FLOAT) |3514S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);3515}3516}35173518desc += 8;35193520if (add_sample_positions) {3521/* add sample positions after all rings */3522memcpy(desc, queue->device->sample_locations_1x, 8);3523desc += 2;3524memcpy(desc, queue->device->sample_locations_2x, 16);3525desc += 4;3526memcpy(desc, queue->device->sample_locations_4x, 32);3527desc += 8;3528memcpy(desc, queue->device->sample_locations_8x, 64);3529}3530}35313532static unsigned3533radv_get_hs_offchip_param(struct radv_device *device, uint32_t *max_offchip_buffers_p)3534{3535bool double_offchip_buffers = device->physical_device->rad_info.chip_class >= GFX7 &&3536device->physical_device->rad_info.family != CHIP_CARRIZO &&3537device->physical_device->rad_info.family != CHIP_STONEY;3538unsigned max_offchip_buffers_per_se = double_offchip_buffers ? 128 : 64;3539unsigned max_offchip_buffers;3540unsigned offchip_granularity;3541unsigned hs_offchip_param;35423543/*3544* Per RadeonSI:3545* This must be one less than the maximum number due to a hw limitation.3546* Various hardware bugs need thGFX73547*3548* Per AMDVLK:3549* Vega10 should limit max_offchip_buffers to 508 (4 * 127).3550* Gfx7 should limit max_offchip_buffers to 5083551* Gfx6 should limit max_offchip_buffers to 126 (2 * 63)3552*3553* Follow AMDVLK here.3554*/3555if (device->physical_device->rad_info.chip_class >= GFX10) {3556max_offchip_buffers_per_se = 128;3557} else if (device->physical_device->rad_info.family == CHIP_VEGA10 ||3558device->physical_device->rad_info.chip_class == GFX7 ||3559device->physical_device->rad_info.chip_class == GFX6)3560--max_offchip_buffers_per_se;35613562max_offchip_buffers = max_offchip_buffers_per_se * device->physical_device->rad_info.max_se;35633564/* Hawaii has a bug with offchip buffers > 256 that can be worked3565* around by setting 4K granularity.3566*/3567if (device->tess_offchip_block_dw_size == 4096) {3568assert(device->physical_device->rad_info.family == CHIP_HAWAII);3569offchip_granularity = V_03093C_X_4K_DWORDS;3570} else {3571assert(device->tess_offchip_block_dw_size == 8192);3572offchip_granularity = V_03093C_X_8K_DWORDS;3573}35743575switch (device->physical_device->rad_info.chip_class) {3576case GFX6:3577max_offchip_buffers = MIN2(max_offchip_buffers, 126);3578break;3579case GFX7:3580case GFX8:3581case GFX9:3582max_offchip_buffers = MIN2(max_offchip_buffers, 508);3583break;3584case GFX10:3585break;3586default:3587break;3588}35893590*max_offchip_buffers_p = max_offchip_buffers;3591if (device->physical_device->rad_info.chip_class >= GFX10_3) {3592hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX103(max_offchip_buffers - 1) |3593S_03093C_OFFCHIP_GRANULARITY_GFX103(offchip_granularity);3594} else if (device->physical_device->rad_info.chip_class >= GFX7) {3595if (device->physical_device->rad_info.chip_class >= GFX8)3596--max_offchip_buffers;3597hs_offchip_param = S_03093C_OFFCHIP_BUFFERING_GFX7(max_offchip_buffers) |3598S_03093C_OFFCHIP_GRANULARITY_GFX7(offchip_granularity);3599} else {3600hs_offchip_param = S_0089B0_OFFCHIP_BUFFERING(max_offchip_buffers);3601}3602return hs_offchip_param;3603}36043605static void3606radv_emit_gs_ring_sizes(struct radv_queue *queue, struct radeon_cmdbuf *cs,3607struct radeon_winsys_bo *esgs_ring_bo, uint32_t esgs_ring_size,3608struct radeon_winsys_bo *gsvs_ring_bo, uint32_t gsvs_ring_size)3609{3610if (!esgs_ring_bo && !gsvs_ring_bo)3611return;36123613if (esgs_ring_bo)3614radv_cs_add_buffer(queue->device->ws, cs, esgs_ring_bo);36153616if (gsvs_ring_bo)3617radv_cs_add_buffer(queue->device->ws, cs, gsvs_ring_bo);36183619if (queue->device->physical_device->rad_info.chip_class >= GFX7) {3620radeon_set_uconfig_reg_seq(cs, R_030900_VGT_ESGS_RING_SIZE, 2);3621radeon_emit(cs, esgs_ring_size >> 8);3622radeon_emit(cs, gsvs_ring_size >> 8);3623} else {3624radeon_set_config_reg_seq(cs, R_0088C8_VGT_ESGS_RING_SIZE, 2);3625radeon_emit(cs, esgs_ring_size >> 8);3626radeon_emit(cs, gsvs_ring_size >> 8);3627}3628}36293630static void3631radv_emit_tess_factor_ring(struct radv_queue *queue, struct radeon_cmdbuf *cs,3632unsigned hs_offchip_param, unsigned tf_ring_size,3633struct radeon_winsys_bo *tess_rings_bo)3634{3635uint64_t tf_va;36363637if (!tess_rings_bo)3638return;36393640tf_va = radv_buffer_get_va(tess_rings_bo);36413642radv_cs_add_buffer(queue->device->ws, cs, tess_rings_bo);36433644if (queue->device->physical_device->rad_info.chip_class >= GFX7) {3645radeon_set_uconfig_reg(cs, R_030938_VGT_TF_RING_SIZE, S_030938_SIZE(tf_ring_size / 4));3646radeon_set_uconfig_reg(cs, R_030940_VGT_TF_MEMORY_BASE, tf_va >> 8);36473648if (queue->device->physical_device->rad_info.chip_class >= GFX10) {3649radeon_set_uconfig_reg(cs, R_030984_VGT_TF_MEMORY_BASE_HI_UMD,3650S_030984_BASE_HI(tf_va >> 40));3651} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {3652radeon_set_uconfig_reg(cs, R_030944_VGT_TF_MEMORY_BASE_HI, S_030944_BASE_HI(tf_va >> 40));3653}3654radeon_set_uconfig_reg(cs, R_03093C_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);3655} else {3656radeon_set_config_reg(cs, R_008988_VGT_TF_RING_SIZE, S_008988_SIZE(tf_ring_size / 4));3657radeon_set_config_reg(cs, R_0089B8_VGT_TF_MEMORY_BASE, tf_va >> 8);3658radeon_set_config_reg(cs, R_0089B0_VGT_HS_OFFCHIP_PARAM, hs_offchip_param);3659}3660}36613662static void3663radv_emit_graphics_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,3664uint32_t size_per_wave, uint32_t waves,3665struct radeon_winsys_bo *scratch_bo)3666{3667if (queue->queue_family_index != RADV_QUEUE_GENERAL)3668return;36693670if (!scratch_bo)3671return;36723673radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);36743675radeon_set_context_reg(3676cs, R_0286E8_SPI_TMPRING_SIZE,3677S_0286E8_WAVES(waves) | S_0286E8_WAVESIZE(round_up_u32(size_per_wave, 1024)));3678}36793680static void3681radv_emit_compute_scratch(struct radv_queue *queue, struct radeon_cmdbuf *cs,3682uint32_t size_per_wave, uint32_t waves,3683struct radeon_winsys_bo *compute_scratch_bo)3684{3685uint64_t scratch_va;36863687if (!compute_scratch_bo)3688return;36893690scratch_va = radv_buffer_get_va(compute_scratch_bo);36913692radv_cs_add_buffer(queue->device->ws, cs, compute_scratch_bo);36933694radeon_set_sh_reg_seq(cs, R_00B900_COMPUTE_USER_DATA_0, 2);3695radeon_emit(cs, scratch_va);3696radeon_emit(cs, S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1));36973698radeon_set_sh_reg(cs, R_00B860_COMPUTE_TMPRING_SIZE,3699S_00B860_WAVES(waves) | S_00B860_WAVESIZE(round_up_u32(size_per_wave, 1024)));3700}37013702static void3703radv_emit_global_shader_pointers(struct radv_queue *queue, struct radeon_cmdbuf *cs,3704struct radeon_winsys_bo *descriptor_bo)3705{3706uint64_t va;37073708if (!descriptor_bo)3709return;37103711va = radv_buffer_get_va(descriptor_bo);37123713radv_cs_add_buffer(queue->device->ws, cs, descriptor_bo);37143715if (queue->device->physical_device->rad_info.chip_class >= GFX10) {3716uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,3717R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,3718R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};37193720for (int i = 0; i < ARRAY_SIZE(regs); ++i) {3721radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);3722}3723} else if (queue->device->physical_device->rad_info.chip_class == GFX9) {3724uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,3725R_00B208_SPI_SHADER_USER_DATA_ADDR_LO_GS,3726R_00B408_SPI_SHADER_USER_DATA_ADDR_LO_HS};37273728for (int i = 0; i < ARRAY_SIZE(regs); ++i) {3729radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);3730}3731} else {3732uint32_t regs[] = {R_00B030_SPI_SHADER_USER_DATA_PS_0, R_00B130_SPI_SHADER_USER_DATA_VS_0,3733R_00B230_SPI_SHADER_USER_DATA_GS_0, R_00B330_SPI_SHADER_USER_DATA_ES_0,3734R_00B430_SPI_SHADER_USER_DATA_HS_0, R_00B530_SPI_SHADER_USER_DATA_LS_0};37353736for (int i = 0; i < ARRAY_SIZE(regs); ++i) {3737radv_emit_shader_pointer(queue->device, cs, regs[i], va, true);3738}3739}3740}37413742static void3743radv_init_graphics_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)3744{3745struct radv_device *device = queue->device;37463747if (device->gfx_init) {3748uint64_t va = radv_buffer_get_va(device->gfx_init);37493750radeon_emit(cs, PKT3(PKT3_INDIRECT_BUFFER_CIK, 2, 0));3751radeon_emit(cs, va);3752radeon_emit(cs, va >> 32);3753radeon_emit(cs, device->gfx_init_size_dw & 0xffff);37543755radv_cs_add_buffer(device->ws, cs, device->gfx_init);3756} else {3757si_emit_graphics(device, cs);3758}3759}37603761static void3762radv_init_compute_state(struct radeon_cmdbuf *cs, struct radv_queue *queue)3763{3764si_emit_compute(queue->device, cs);3765}37663767static VkResult3768radv_get_preamble_cs(struct radv_queue *queue, uint32_t scratch_size_per_wave,3769uint32_t scratch_waves, uint32_t compute_scratch_size_per_wave,3770uint32_t compute_scratch_waves, uint32_t esgs_ring_size,3771uint32_t gsvs_ring_size, bool needs_tess_rings, bool needs_gds,3772bool needs_gds_oa, bool needs_sample_positions,3773struct radeon_cmdbuf **initial_full_flush_preamble_cs,3774struct radeon_cmdbuf **initial_preamble_cs,3775struct radeon_cmdbuf **continue_preamble_cs)3776{3777struct radeon_winsys_bo *scratch_bo = NULL;3778struct radeon_winsys_bo *descriptor_bo = NULL;3779struct radeon_winsys_bo *compute_scratch_bo = NULL;3780struct radeon_winsys_bo *esgs_ring_bo = NULL;3781struct radeon_winsys_bo *gsvs_ring_bo = NULL;3782struct radeon_winsys_bo *tess_rings_bo = NULL;3783struct radeon_winsys_bo *gds_bo = NULL;3784struct radeon_winsys_bo *gds_oa_bo = NULL;3785struct radeon_cmdbuf *dest_cs[3] = {0};3786bool add_tess_rings = false, add_gds = false, add_gds_oa = false, add_sample_positions = false;3787unsigned tess_factor_ring_size = 0, tess_offchip_ring_size = 0;3788unsigned max_offchip_buffers;3789unsigned hs_offchip_param = 0;3790unsigned tess_offchip_ring_offset;3791uint32_t ring_bo_flags = RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING;3792VkResult result = VK_SUCCESS;3793if (!queue->has_tess_rings) {3794if (needs_tess_rings)3795add_tess_rings = true;3796}3797if (!queue->has_gds) {3798if (needs_gds)3799add_gds = true;3800}3801if (!queue->has_gds_oa) {3802if (needs_gds_oa)3803add_gds_oa = true;3804}3805if (!queue->has_sample_positions) {3806if (needs_sample_positions)3807add_sample_positions = true;3808}3809tess_factor_ring_size = 32768 * queue->device->physical_device->rad_info.max_se;3810hs_offchip_param = radv_get_hs_offchip_param(queue->device, &max_offchip_buffers);3811tess_offchip_ring_offset = align(tess_factor_ring_size, 64 * 1024);3812tess_offchip_ring_size = max_offchip_buffers * queue->device->tess_offchip_block_dw_size * 4;38133814scratch_size_per_wave = MAX2(scratch_size_per_wave, queue->scratch_size_per_wave);3815if (scratch_size_per_wave)3816scratch_waves = MIN2(scratch_waves, UINT32_MAX / scratch_size_per_wave);3817else3818scratch_waves = 0;38193820compute_scratch_size_per_wave =3821MAX2(compute_scratch_size_per_wave, queue->compute_scratch_size_per_wave);3822if (compute_scratch_size_per_wave)3823compute_scratch_waves =3824MIN2(compute_scratch_waves, UINT32_MAX / compute_scratch_size_per_wave);3825else3826compute_scratch_waves = 0;38273828if (scratch_size_per_wave <= queue->scratch_size_per_wave &&3829scratch_waves <= queue->scratch_waves &&3830compute_scratch_size_per_wave <= queue->compute_scratch_size_per_wave &&3831compute_scratch_waves <= queue->compute_scratch_waves &&3832esgs_ring_size <= queue->esgs_ring_size && gsvs_ring_size <= queue->gsvs_ring_size &&3833!add_tess_rings && !add_gds && !add_gds_oa && !add_sample_positions &&3834queue->initial_preamble_cs) {3835*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;3836*initial_preamble_cs = queue->initial_preamble_cs;3837*continue_preamble_cs = queue->continue_preamble_cs;3838if (!scratch_size_per_wave && !compute_scratch_size_per_wave && !esgs_ring_size &&3839!gsvs_ring_size && !needs_tess_rings && !needs_gds && !needs_gds_oa &&3840!needs_sample_positions)3841*continue_preamble_cs = NULL;3842return VK_SUCCESS;3843}38443845uint32_t scratch_size = scratch_size_per_wave * scratch_waves;3846uint32_t queue_scratch_size = queue->scratch_size_per_wave * queue->scratch_waves;3847if (scratch_size > queue_scratch_size) {3848result =3849queue->device->ws->buffer_create(queue->device->ws, scratch_size, 4096, RADEON_DOMAIN_VRAM,3850ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &scratch_bo);3851if (result != VK_SUCCESS)3852goto fail;3853} else3854scratch_bo = queue->scratch_bo;38553856uint32_t compute_scratch_size = compute_scratch_size_per_wave * compute_scratch_waves;3857uint32_t compute_queue_scratch_size =3858queue->compute_scratch_size_per_wave * queue->compute_scratch_waves;3859if (compute_scratch_size > compute_queue_scratch_size) {3860result = queue->device->ws->buffer_create(queue->device->ws, compute_scratch_size, 4096,3861RADEON_DOMAIN_VRAM, ring_bo_flags,3862RADV_BO_PRIORITY_SCRATCH, 0, &compute_scratch_bo);3863if (result != VK_SUCCESS)3864goto fail;38653866} else3867compute_scratch_bo = queue->compute_scratch_bo;38683869if (esgs_ring_size > queue->esgs_ring_size) {3870result = queue->device->ws->buffer_create(queue->device->ws, esgs_ring_size, 4096,3871RADEON_DOMAIN_VRAM, ring_bo_flags,3872RADV_BO_PRIORITY_SCRATCH, 0, &esgs_ring_bo);3873if (result != VK_SUCCESS)3874goto fail;3875} else {3876esgs_ring_bo = queue->esgs_ring_bo;3877esgs_ring_size = queue->esgs_ring_size;3878}38793880if (gsvs_ring_size > queue->gsvs_ring_size) {3881result = queue->device->ws->buffer_create(queue->device->ws, gsvs_ring_size, 4096,3882RADEON_DOMAIN_VRAM, ring_bo_flags,3883RADV_BO_PRIORITY_SCRATCH, 0, &gsvs_ring_bo);3884if (result != VK_SUCCESS)3885goto fail;3886} else {3887gsvs_ring_bo = queue->gsvs_ring_bo;3888gsvs_ring_size = queue->gsvs_ring_size;3889}38903891if (add_tess_rings) {3892result = queue->device->ws->buffer_create(3893queue->device->ws, tess_offchip_ring_offset + tess_offchip_ring_size, 256,3894RADEON_DOMAIN_VRAM, ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &tess_rings_bo);3895if (result != VK_SUCCESS)3896goto fail;3897} else {3898tess_rings_bo = queue->tess_rings_bo;3899}39003901if (add_gds) {3902assert(queue->device->physical_device->rad_info.chip_class >= GFX10);39033904/* 4 streamout GDS counters.3905* We need 256B (64 dw) of GDS, otherwise streamout hangs.3906*/3907result =3908queue->device->ws->buffer_create(queue->device->ws, 256, 4, RADEON_DOMAIN_GDS,3909ring_bo_flags, RADV_BO_PRIORITY_SCRATCH, 0, &gds_bo);3910if (result != VK_SUCCESS)3911goto fail;3912} else {3913gds_bo = queue->gds_bo;3914}39153916if (add_gds_oa) {3917assert(queue->device->physical_device->rad_info.chip_class >= GFX10);39183919result =3920queue->device->ws->buffer_create(queue->device->ws, 4, 1, RADEON_DOMAIN_OA, ring_bo_flags,3921RADV_BO_PRIORITY_SCRATCH, 0, &gds_oa_bo);3922if (result != VK_SUCCESS)3923goto fail;3924} else {3925gds_oa_bo = queue->gds_oa_bo;3926}39273928if (scratch_bo != queue->scratch_bo || esgs_ring_bo != queue->esgs_ring_bo ||3929gsvs_ring_bo != queue->gsvs_ring_bo || tess_rings_bo != queue->tess_rings_bo ||3930add_sample_positions) {3931uint32_t size = 0;3932if (gsvs_ring_bo || esgs_ring_bo || tess_rings_bo || add_sample_positions) {3933size = 112; /* 2 dword + 2 padding + 4 dword * 6 */3934if (add_sample_positions)3935size += 128; /* 64+32+16+8 = 120 bytes */3936} else if (scratch_bo)3937size = 8; /* 2 dword */39383939result = queue->device->ws->buffer_create(3940queue->device->ws, size, 4096, RADEON_DOMAIN_VRAM,3941RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_READ_ONLY,3942RADV_BO_PRIORITY_DESCRIPTOR, 0, &descriptor_bo);3943if (result != VK_SUCCESS)3944goto fail;3945} else3946descriptor_bo = queue->descriptor_bo;39473948if (descriptor_bo != queue->descriptor_bo) {3949uint32_t *map = (uint32_t *)queue->device->ws->buffer_map(descriptor_bo);3950if (!map)3951goto fail;39523953if (scratch_bo) {3954uint64_t scratch_va = radv_buffer_get_va(scratch_bo);3955uint32_t rsrc1 = S_008F04_BASE_ADDRESS_HI(scratch_va >> 32) | S_008F04_SWIZZLE_ENABLE(1);3956map[0] = scratch_va;3957map[1] = rsrc1;3958}39593960if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo || add_sample_positions)3961fill_geom_tess_rings(queue, map, add_sample_positions, esgs_ring_size, esgs_ring_bo,3962gsvs_ring_size, gsvs_ring_bo, tess_factor_ring_size,3963tess_offchip_ring_offset, tess_offchip_ring_size, tess_rings_bo);39643965queue->device->ws->buffer_unmap(descriptor_bo);3966}39673968for (int i = 0; i < 3; ++i) {3969enum rgp_flush_bits sqtt_flush_bits = 0;3970struct radeon_cmdbuf *cs = NULL;3971cs = queue->device->ws->cs_create(queue->device->ws,3972queue->queue_family_index ? RING_COMPUTE : RING_GFX);3973if (!cs) {3974result = VK_ERROR_OUT_OF_HOST_MEMORY;3975goto fail;3976}39773978dest_cs[i] = cs;39793980if (scratch_bo)3981radv_cs_add_buffer(queue->device->ws, cs, scratch_bo);39823983/* Emit initial configuration. */3984switch (queue->queue_family_index) {3985case RADV_QUEUE_GENERAL:3986radv_init_graphics_state(cs, queue);3987break;3988case RADV_QUEUE_COMPUTE:3989radv_init_compute_state(cs, queue);3990break;3991case RADV_QUEUE_TRANSFER:3992break;3993}39943995if (esgs_ring_bo || gsvs_ring_bo || tess_rings_bo) {3996radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));3997radeon_emit(cs, EVENT_TYPE(V_028A90_VS_PARTIAL_FLUSH) | EVENT_INDEX(4));39983999radeon_emit(cs, PKT3(PKT3_EVENT_WRITE, 0, 0));4000radeon_emit(cs, EVENT_TYPE(V_028A90_VGT_FLUSH) | EVENT_INDEX(0));4001}40024003radv_emit_gs_ring_sizes(queue, cs, esgs_ring_bo, esgs_ring_size, gsvs_ring_bo,4004gsvs_ring_size);4005radv_emit_tess_factor_ring(queue, cs, hs_offchip_param, tess_factor_ring_size, tess_rings_bo);4006radv_emit_global_shader_pointers(queue, cs, descriptor_bo);4007radv_emit_compute_scratch(queue, cs, compute_scratch_size_per_wave, compute_scratch_waves,4008compute_scratch_bo);4009radv_emit_graphics_scratch(queue, cs, scratch_size_per_wave, scratch_waves, scratch_bo);40104011if (gds_bo)4012radv_cs_add_buffer(queue->device->ws, cs, gds_bo);4013if (gds_oa_bo)4014radv_cs_add_buffer(queue->device->ws, cs, gds_oa_bo);40154016if (i == 0) {4017si_cs_emit_cache_flush(4018cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,4019queue->queue_family_index == RING_COMPUTE &&4020queue->device->physical_device->rad_info.chip_class >= GFX7,4021(queue->queue_family_index == RADV_QUEUE_COMPUTE4022? RADV_CMD_FLAG_CS_PARTIAL_FLUSH4023: (RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_PS_PARTIAL_FLUSH)) |4024RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE | RADV_CMD_FLAG_INV_VCACHE |4025RADV_CMD_FLAG_INV_L2 | RADV_CMD_FLAG_START_PIPELINE_STATS,4026&sqtt_flush_bits, 0);4027} else if (i == 1) {4028si_cs_emit_cache_flush(cs, queue->device->physical_device->rad_info.chip_class, NULL, 0,4029queue->queue_family_index == RING_COMPUTE &&4030queue->device->physical_device->rad_info.chip_class >= GFX7,4031RADV_CMD_FLAG_INV_ICACHE | RADV_CMD_FLAG_INV_SCACHE |4032RADV_CMD_FLAG_INV_VCACHE | RADV_CMD_FLAG_INV_L2 |4033RADV_CMD_FLAG_START_PIPELINE_STATS,4034&sqtt_flush_bits, 0);4035}40364037result = queue->device->ws->cs_finalize(cs);4038if (result != VK_SUCCESS)4039goto fail;4040}40414042if (queue->initial_full_flush_preamble_cs)4043queue->device->ws->cs_destroy(queue->initial_full_flush_preamble_cs);40444045if (queue->initial_preamble_cs)4046queue->device->ws->cs_destroy(queue->initial_preamble_cs);40474048if (queue->continue_preamble_cs)4049queue->device->ws->cs_destroy(queue->continue_preamble_cs);40504051queue->initial_full_flush_preamble_cs = dest_cs[0];4052queue->initial_preamble_cs = dest_cs[1];4053queue->continue_preamble_cs = dest_cs[2];40544055if (scratch_bo != queue->scratch_bo) {4056if (queue->scratch_bo)4057queue->device->ws->buffer_destroy(queue->device->ws, queue->scratch_bo);4058queue->scratch_bo = scratch_bo;4059}4060queue->scratch_size_per_wave = scratch_size_per_wave;4061queue->scratch_waves = scratch_waves;40624063if (compute_scratch_bo != queue->compute_scratch_bo) {4064if (queue->compute_scratch_bo)4065queue->device->ws->buffer_destroy(queue->device->ws, queue->compute_scratch_bo);4066queue->compute_scratch_bo = compute_scratch_bo;4067}4068queue->compute_scratch_size_per_wave = compute_scratch_size_per_wave;4069queue->compute_scratch_waves = compute_scratch_waves;40704071if (esgs_ring_bo != queue->esgs_ring_bo) {4072if (queue->esgs_ring_bo)4073queue->device->ws->buffer_destroy(queue->device->ws, queue->esgs_ring_bo);4074queue->esgs_ring_bo = esgs_ring_bo;4075queue->esgs_ring_size = esgs_ring_size;4076}40774078if (gsvs_ring_bo != queue->gsvs_ring_bo) {4079if (queue->gsvs_ring_bo)4080queue->device->ws->buffer_destroy(queue->device->ws, queue->gsvs_ring_bo);4081queue->gsvs_ring_bo = gsvs_ring_bo;4082queue->gsvs_ring_size = gsvs_ring_size;4083}40844085if (tess_rings_bo != queue->tess_rings_bo) {4086queue->tess_rings_bo = tess_rings_bo;4087queue->has_tess_rings = true;4088}40894090if (gds_bo != queue->gds_bo) {4091queue->gds_bo = gds_bo;4092queue->has_gds = true;4093}40944095if (gds_oa_bo != queue->gds_oa_bo) {4096queue->gds_oa_bo = gds_oa_bo;4097queue->has_gds_oa = true;4098}40994100if (descriptor_bo != queue->descriptor_bo) {4101if (queue->descriptor_bo)4102queue->device->ws->buffer_destroy(queue->device->ws, queue->descriptor_bo);41034104queue->descriptor_bo = descriptor_bo;4105}41064107if (add_sample_positions)4108queue->has_sample_positions = true;41094110*initial_full_flush_preamble_cs = queue->initial_full_flush_preamble_cs;4111*initial_preamble_cs = queue->initial_preamble_cs;4112*continue_preamble_cs = queue->continue_preamble_cs;4113if (!scratch_size && !compute_scratch_size && !esgs_ring_size && !gsvs_ring_size)4114*continue_preamble_cs = NULL;4115return VK_SUCCESS;4116fail:4117for (int i = 0; i < ARRAY_SIZE(dest_cs); ++i)4118if (dest_cs[i])4119queue->device->ws->cs_destroy(dest_cs[i]);4120if (descriptor_bo && descriptor_bo != queue->descriptor_bo)4121queue->device->ws->buffer_destroy(queue->device->ws, descriptor_bo);4122if (scratch_bo && scratch_bo != queue->scratch_bo)4123queue->device->ws->buffer_destroy(queue->device->ws, scratch_bo);4124if (compute_scratch_bo && compute_scratch_bo != queue->compute_scratch_bo)4125queue->device->ws->buffer_destroy(queue->device->ws, compute_scratch_bo);4126if (esgs_ring_bo && esgs_ring_bo != queue->esgs_ring_bo)4127queue->device->ws->buffer_destroy(queue->device->ws, esgs_ring_bo);4128if (gsvs_ring_bo && gsvs_ring_bo != queue->gsvs_ring_bo)4129queue->device->ws->buffer_destroy(queue->device->ws, gsvs_ring_bo);4130if (tess_rings_bo && tess_rings_bo != queue->tess_rings_bo)4131queue->device->ws->buffer_destroy(queue->device->ws, tess_rings_bo);4132if (gds_bo && gds_bo != queue->gds_bo)4133queue->device->ws->buffer_destroy(queue->device->ws, gds_bo);4134if (gds_oa_bo && gds_oa_bo != queue->gds_oa_bo)4135queue->device->ws->buffer_destroy(queue->device->ws, gds_oa_bo);41364137return vk_error(queue->device->instance, result);4138}41394140static VkResult4141radv_alloc_sem_counts(struct radv_device *device, struct radv_winsys_sem_counts *counts,4142int num_sems, struct radv_semaphore_part **sems,4143const uint64_t *timeline_values, VkFence _fence, bool is_signal)4144{4145int syncobj_idx = 0, non_reset_idx = 0, timeline_idx = 0;41464147if (num_sems == 0 && _fence == VK_NULL_HANDLE)4148return VK_SUCCESS;41494150for (uint32_t i = 0; i < num_sems; i++) {4151switch (sems[i]->kind) {4152case RADV_SEMAPHORE_SYNCOBJ:4153counts->syncobj_count++;4154counts->syncobj_reset_count++;4155break;4156case RADV_SEMAPHORE_NONE:4157break;4158case RADV_SEMAPHORE_TIMELINE:4159counts->syncobj_count++;4160break;4161case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:4162counts->timeline_syncobj_count++;4163break;4164}4165}41664167if (_fence != VK_NULL_HANDLE)4168counts->syncobj_count++;41694170if (counts->syncobj_count || counts->timeline_syncobj_count) {4171counts->points = (uint64_t *)malloc(sizeof(*counts->syncobj) * counts->syncobj_count +4172(sizeof(*counts->syncobj) + sizeof(*counts->points)) *4173counts->timeline_syncobj_count);4174if (!counts->points)4175return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);4176counts->syncobj = (uint32_t *)(counts->points + counts->timeline_syncobj_count);4177}41784179non_reset_idx = counts->syncobj_reset_count;41804181for (uint32_t i = 0; i < num_sems; i++) {4182switch (sems[i]->kind) {4183case RADV_SEMAPHORE_NONE:4184unreachable("Empty semaphore");4185break;4186case RADV_SEMAPHORE_SYNCOBJ:4187counts->syncobj[syncobj_idx++] = sems[i]->syncobj;4188break;4189case RADV_SEMAPHORE_TIMELINE: {4190mtx_lock(&sems[i]->timeline.mutex);4191struct radv_timeline_point *point = NULL;4192if (is_signal) {4193point = radv_timeline_add_point_locked(device, &sems[i]->timeline, timeline_values[i]);4194} else {4195point = radv_timeline_find_point_at_least_locked(device, &sems[i]->timeline,4196timeline_values[i]);4197}41984199mtx_unlock(&sems[i]->timeline.mutex);42004201if (point) {4202counts->syncobj[non_reset_idx++] = point->syncobj;4203} else {4204/* Explicitly remove the semaphore so we might not find4205* a point later post-submit. */4206sems[i] = NULL;4207}4208break;4209}4210case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:4211counts->syncobj[counts->syncobj_count + timeline_idx] = sems[i]->syncobj;4212counts->points[timeline_idx] = timeline_values[i];4213++timeline_idx;4214break;4215}4216}42174218if (_fence != VK_NULL_HANDLE) {4219RADV_FROM_HANDLE(radv_fence, fence, _fence);42204221struct radv_fence_part *part =4222fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;4223counts->syncobj[non_reset_idx++] = part->syncobj;4224}42254226assert(MAX2(syncobj_idx, non_reset_idx) <= counts->syncobj_count);4227counts->syncobj_count = MAX2(syncobj_idx, non_reset_idx);42284229return VK_SUCCESS;4230}42314232static void4233radv_free_sem_info(struct radv_winsys_sem_info *sem_info)4234{4235free(sem_info->wait.points);4236free(sem_info->signal.points);4237}42384239static void4240radv_free_temp_syncobjs(struct radv_device *device, int num_sems, struct radv_semaphore_part *sems)4241{4242for (uint32_t i = 0; i < num_sems; i++) {4243radv_destroy_semaphore_part(device, sems + i);4244}4245}42464247static VkResult4248radv_alloc_sem_info(struct radv_device *device, struct radv_winsys_sem_info *sem_info,4249int num_wait_sems, struct radv_semaphore_part **wait_sems,4250const uint64_t *wait_values, int num_signal_sems,4251struct radv_semaphore_part **signal_sems, const uint64_t *signal_values,4252VkFence fence)4253{4254VkResult ret;42554256ret = radv_alloc_sem_counts(device, &sem_info->wait, num_wait_sems, wait_sems, wait_values,4257VK_NULL_HANDLE, false);4258if (ret)4259return ret;4260ret = radv_alloc_sem_counts(device, &sem_info->signal, num_signal_sems, signal_sems,4261signal_values, fence, true);4262if (ret)4263radv_free_sem_info(sem_info);42644265/* caller can override these */4266sem_info->cs_emit_wait = true;4267sem_info->cs_emit_signal = true;4268return ret;4269}42704271static void4272radv_finalize_timelines(struct radv_device *device, uint32_t num_wait_sems,4273struct radv_semaphore_part **wait_sems, const uint64_t *wait_values,4274uint32_t num_signal_sems, struct radv_semaphore_part **signal_sems,4275const uint64_t *signal_values, struct list_head *processing_list)4276{4277for (uint32_t i = 0; i < num_wait_sems; ++i) {4278if (wait_sems[i] && wait_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {4279mtx_lock(&wait_sems[i]->timeline.mutex);4280struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(4281device, &wait_sems[i]->timeline, wait_values[i]);4282point->wait_count -= 2;4283mtx_unlock(&wait_sems[i]->timeline.mutex);4284}4285}4286for (uint32_t i = 0; i < num_signal_sems; ++i) {4287if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE) {4288mtx_lock(&signal_sems[i]->timeline.mutex);4289struct radv_timeline_point *point = radv_timeline_find_point_at_least_locked(4290device, &signal_sems[i]->timeline, signal_values[i]);4291signal_sems[i]->timeline.highest_submitted =4292MAX2(signal_sems[i]->timeline.highest_submitted, point->value);4293point->wait_count -= 2;4294radv_timeline_trigger_waiters_locked(&signal_sems[i]->timeline, processing_list);4295mtx_unlock(&signal_sems[i]->timeline.mutex);4296} else if (signal_sems[i] && signal_sems[i]->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ) {4297signal_sems[i]->timeline_syncobj.max_point =4298MAX2(signal_sems[i]->timeline_syncobj.max_point, signal_values[i]);4299}4300}4301}43024303static VkResult4304radv_sparse_buffer_bind_memory(struct radv_device *device, const VkSparseBufferMemoryBindInfo *bind)4305{4306RADV_FROM_HANDLE(radv_buffer, buffer, bind->buffer);4307VkResult result;43084309for (uint32_t i = 0; i < bind->bindCount; ++i) {4310struct radv_device_memory *mem = NULL;43114312if (bind->pBinds[i].memory != VK_NULL_HANDLE)4313mem = radv_device_memory_from_handle(bind->pBinds[i].memory);43144315result = device->ws->buffer_virtual_bind(device->ws, buffer->bo,4316bind->pBinds[i].resourceOffset, bind->pBinds[i].size,4317mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);4318if (result != VK_SUCCESS)4319return result;4320}43214322return VK_SUCCESS;4323}43244325static VkResult4326radv_sparse_image_opaque_bind_memory(struct radv_device *device,4327const VkSparseImageOpaqueMemoryBindInfo *bind)4328{4329RADV_FROM_HANDLE(radv_image, image, bind->image);4330VkResult result;43314332for (uint32_t i = 0; i < bind->bindCount; ++i) {4333struct radv_device_memory *mem = NULL;43344335if (bind->pBinds[i].memory != VK_NULL_HANDLE)4336mem = radv_device_memory_from_handle(bind->pBinds[i].memory);43374338result = device->ws->buffer_virtual_bind(device->ws, image->bo,4339bind->pBinds[i].resourceOffset, bind->pBinds[i].size,4340mem ? mem->bo : NULL, bind->pBinds[i].memoryOffset);4341if (result != VK_SUCCESS)4342return result;4343}43444345return VK_SUCCESS;4346}43474348static VkResult4349radv_sparse_image_bind_memory(struct radv_device *device, const VkSparseImageMemoryBindInfo *bind)4350{4351RADV_FROM_HANDLE(radv_image, image, bind->image);4352struct radeon_surf *surface = &image->planes[0].surface;4353uint32_t bs = vk_format_get_blocksize(image->vk_format);4354VkResult result;43554356for (uint32_t i = 0; i < bind->bindCount; ++i) {4357struct radv_device_memory *mem = NULL;4358uint32_t offset, pitch;4359uint32_t mem_offset = bind->pBinds[i].memoryOffset;4360const uint32_t layer = bind->pBinds[i].subresource.arrayLayer;4361const uint32_t level = bind->pBinds[i].subresource.mipLevel;43624363VkExtent3D bind_extent = bind->pBinds[i].extent;4364bind_extent.width =4365DIV_ROUND_UP(bind_extent.width, vk_format_get_blockwidth(image->vk_format));4366bind_extent.height =4367DIV_ROUND_UP(bind_extent.height, vk_format_get_blockheight(image->vk_format));43684369VkOffset3D bind_offset = bind->pBinds[i].offset;4370bind_offset.x /= vk_format_get_blockwidth(image->vk_format);4371bind_offset.y /= vk_format_get_blockheight(image->vk_format);43724373if (bind->pBinds[i].memory != VK_NULL_HANDLE)4374mem = radv_device_memory_from_handle(bind->pBinds[i].memory);43754376if (device->physical_device->rad_info.chip_class >= GFX9) {4377offset = surface->u.gfx9.surf_slice_size * layer + surface->u.gfx9.prt_level_offset[level];4378pitch = surface->u.gfx9.prt_level_pitch[level];4379} else {4380offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +4381surface->u.legacy.level[level].slice_size_dw * 4 * layer;4382pitch = surface->u.legacy.level[level].nblk_x;4383}43844385offset += (bind_offset.y * pitch * bs) + (bind_offset.x * surface->prt_tile_height * bs);43864387uint32_t aligned_extent_width = ALIGN(bind_extent.width, surface->prt_tile_width);43884389bool whole_subres = bind_offset.x == 0 && aligned_extent_width == pitch;43904391if (whole_subres) {4392uint32_t aligned_extent_height = ALIGN(bind_extent.height, surface->prt_tile_height);43934394uint32_t size = aligned_extent_width * aligned_extent_height * bs;4395result = device->ws->buffer_virtual_bind(device->ws, image->bo, offset, size,4396mem ? mem->bo : NULL, mem_offset);4397if (result != VK_SUCCESS)4398return result;4399} else {4400uint32_t img_increment = pitch * bs;4401uint32_t mem_increment = aligned_extent_width * bs;4402uint32_t size = mem_increment * surface->prt_tile_height;4403for (unsigned y = 0; y < bind_extent.height; y += surface->prt_tile_height) {4404result = device->ws->buffer_virtual_bind(4405device->ws, image->bo, offset + img_increment * y, size, mem ? mem->bo : NULL,4406mem_offset + mem_increment * y);4407if (result != VK_SUCCESS)4408return result;4409}4410}4411}44124413return VK_SUCCESS;4414}44154416static VkResult4417radv_get_preambles(struct radv_queue *queue, const VkCommandBuffer *cmd_buffers,4418uint32_t cmd_buffer_count, struct radeon_cmdbuf **initial_full_flush_preamble_cs,4419struct radeon_cmdbuf **initial_preamble_cs,4420struct radeon_cmdbuf **continue_preamble_cs)4421{4422uint32_t scratch_size_per_wave = 0, waves_wanted = 0;4423uint32_t compute_scratch_size_per_wave = 0, compute_waves_wanted = 0;4424uint32_t esgs_ring_size = 0, gsvs_ring_size = 0;4425bool tess_rings_needed = false;4426bool gds_needed = false;4427bool gds_oa_needed = false;4428bool sample_positions_needed = false;44294430for (uint32_t j = 0; j < cmd_buffer_count; j++) {4431RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, cmd_buffers[j]);44324433scratch_size_per_wave = MAX2(scratch_size_per_wave, cmd_buffer->scratch_size_per_wave_needed);4434waves_wanted = MAX2(waves_wanted, cmd_buffer->scratch_waves_wanted);4435compute_scratch_size_per_wave =4436MAX2(compute_scratch_size_per_wave, cmd_buffer->compute_scratch_size_per_wave_needed);4437compute_waves_wanted = MAX2(compute_waves_wanted, cmd_buffer->compute_scratch_waves_wanted);4438esgs_ring_size = MAX2(esgs_ring_size, cmd_buffer->esgs_ring_size_needed);4439gsvs_ring_size = MAX2(gsvs_ring_size, cmd_buffer->gsvs_ring_size_needed);4440tess_rings_needed |= cmd_buffer->tess_rings_needed;4441gds_needed |= cmd_buffer->gds_needed;4442gds_oa_needed |= cmd_buffer->gds_oa_needed;4443sample_positions_needed |= cmd_buffer->sample_positions_needed;4444}44454446return radv_get_preamble_cs(queue, scratch_size_per_wave, waves_wanted,4447compute_scratch_size_per_wave, compute_waves_wanted, esgs_ring_size,4448gsvs_ring_size, tess_rings_needed, gds_needed, gds_oa_needed,4449sample_positions_needed, initial_full_flush_preamble_cs,4450initial_preamble_cs, continue_preamble_cs);4451}44524453struct radv_deferred_queue_submission {4454struct radv_queue *queue;4455VkCommandBuffer *cmd_buffers;4456uint32_t cmd_buffer_count;44574458/* Sparse bindings that happen on a queue. */4459VkSparseBufferMemoryBindInfo *buffer_binds;4460uint32_t buffer_bind_count;4461VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;4462uint32_t image_opaque_bind_count;4463VkSparseImageMemoryBindInfo *image_binds;4464uint32_t image_bind_count;44654466bool flush_caches;4467VkShaderStageFlags wait_dst_stage_mask;4468struct radv_semaphore_part **wait_semaphores;4469uint32_t wait_semaphore_count;4470struct radv_semaphore_part **signal_semaphores;4471uint32_t signal_semaphore_count;4472VkFence fence;44734474uint64_t *wait_values;4475uint64_t *signal_values;44764477struct radv_semaphore_part *temporary_semaphore_parts;4478uint32_t temporary_semaphore_part_count;44794480struct list_head queue_pending_list;4481uint32_t submission_wait_count;4482struct radv_timeline_waiter *wait_nodes;44834484struct list_head processing_list;4485};44864487struct radv_queue_submission {4488const VkCommandBuffer *cmd_buffers;4489uint32_t cmd_buffer_count;44904491/* Sparse bindings that happen on a queue. */4492const VkSparseBufferMemoryBindInfo *buffer_binds;4493uint32_t buffer_bind_count;4494const VkSparseImageOpaqueMemoryBindInfo *image_opaque_binds;4495uint32_t image_opaque_bind_count;4496const VkSparseImageMemoryBindInfo *image_binds;4497uint32_t image_bind_count;44984499bool flush_caches;4500VkPipelineStageFlags wait_dst_stage_mask;4501const VkSemaphore *wait_semaphores;4502uint32_t wait_semaphore_count;4503const VkSemaphore *signal_semaphores;4504uint32_t signal_semaphore_count;4505VkFence fence;45064507const uint64_t *wait_values;4508uint32_t wait_value_count;4509const uint64_t *signal_values;4510uint32_t signal_value_count;4511};45124513static VkResult radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission,4514uint32_t decrement,4515struct list_head *processing_list);45164517static VkResult4518radv_create_deferred_submission(struct radv_queue *queue,4519const struct radv_queue_submission *submission,4520struct radv_deferred_queue_submission **out)4521{4522struct radv_deferred_queue_submission *deferred = NULL;4523size_t size = sizeof(struct radv_deferred_queue_submission);45244525uint32_t temporary_count = 0;4526for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {4527RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);4528if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE)4529++temporary_count;4530}45314532size += submission->cmd_buffer_count * sizeof(VkCommandBuffer);4533size += submission->buffer_bind_count * sizeof(VkSparseBufferMemoryBindInfo);4534size += submission->image_opaque_bind_count * sizeof(VkSparseImageOpaqueMemoryBindInfo);4535size += submission->image_bind_count * sizeof(VkSparseImageMemoryBindInfo);45364537for (uint32_t i = 0; i < submission->image_bind_count; ++i)4538size += submission->image_binds[i].bindCount * sizeof(VkSparseImageMemoryBind);45394540size += submission->wait_semaphore_count * sizeof(struct radv_semaphore_part *);4541size += temporary_count * sizeof(struct radv_semaphore_part);4542size += submission->signal_semaphore_count * sizeof(struct radv_semaphore_part *);4543size += submission->wait_value_count * sizeof(uint64_t);4544size += submission->signal_value_count * sizeof(uint64_t);4545size += submission->wait_semaphore_count * sizeof(struct radv_timeline_waiter);45464547deferred = calloc(1, size);4548if (!deferred)4549return VK_ERROR_OUT_OF_HOST_MEMORY;45504551deferred->queue = queue;45524553deferred->cmd_buffers = (void *)(deferred + 1);4554deferred->cmd_buffer_count = submission->cmd_buffer_count;4555if (submission->cmd_buffer_count) {4556memcpy(deferred->cmd_buffers, submission->cmd_buffers,4557submission->cmd_buffer_count * sizeof(*deferred->cmd_buffers));4558}45594560deferred->buffer_binds = (void *)(deferred->cmd_buffers + submission->cmd_buffer_count);4561deferred->buffer_bind_count = submission->buffer_bind_count;4562if (submission->buffer_bind_count) {4563memcpy(deferred->buffer_binds, submission->buffer_binds,4564submission->buffer_bind_count * sizeof(*deferred->buffer_binds));4565}45664567deferred->image_opaque_binds = (void *)(deferred->buffer_binds + submission->buffer_bind_count);4568deferred->image_opaque_bind_count = submission->image_opaque_bind_count;4569if (submission->image_opaque_bind_count) {4570memcpy(deferred->image_opaque_binds, submission->image_opaque_binds,4571submission->image_opaque_bind_count * sizeof(*deferred->image_opaque_binds));4572}45734574deferred->image_binds =4575(void *)(deferred->image_opaque_binds + deferred->image_opaque_bind_count);4576deferred->image_bind_count = submission->image_bind_count;45774578VkSparseImageMemoryBind *sparse_image_binds =4579(void *)(deferred->image_binds + deferred->image_bind_count);4580for (uint32_t i = 0; i < deferred->image_bind_count; ++i) {4581deferred->image_binds[i] = submission->image_binds[i];4582deferred->image_binds[i].pBinds = sparse_image_binds;45834584for (uint32_t j = 0; j < deferred->image_binds[i].bindCount; ++j)4585*sparse_image_binds++ = submission->image_binds[i].pBinds[j];4586}45874588deferred->flush_caches = submission->flush_caches;4589deferred->wait_dst_stage_mask = submission->wait_dst_stage_mask;45904591deferred->wait_semaphores = (void *)sparse_image_binds;4592deferred->wait_semaphore_count = submission->wait_semaphore_count;45934594deferred->signal_semaphores =4595(void *)(deferred->wait_semaphores + deferred->wait_semaphore_count);4596deferred->signal_semaphore_count = submission->signal_semaphore_count;45974598deferred->fence = submission->fence;45994600deferred->temporary_semaphore_parts =4601(void *)(deferred->signal_semaphores + deferred->signal_semaphore_count);4602deferred->temporary_semaphore_part_count = temporary_count;46034604uint32_t temporary_idx = 0;4605for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {4606RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->wait_semaphores[i]);4607if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {4608deferred->wait_semaphores[i] = &deferred->temporary_semaphore_parts[temporary_idx];4609deferred->temporary_semaphore_parts[temporary_idx] = semaphore->temporary;4610semaphore->temporary.kind = RADV_SEMAPHORE_NONE;4611++temporary_idx;4612} else4613deferred->wait_semaphores[i] = &semaphore->permanent;4614}46154616for (uint32_t i = 0; i < submission->signal_semaphore_count; ++i) {4617RADV_FROM_HANDLE(radv_semaphore, semaphore, submission->signal_semaphores[i]);4618if (semaphore->temporary.kind != RADV_SEMAPHORE_NONE) {4619deferred->signal_semaphores[i] = &semaphore->temporary;4620} else {4621deferred->signal_semaphores[i] = &semaphore->permanent;4622}4623}46244625deferred->wait_values = (void *)(deferred->temporary_semaphore_parts + temporary_count);4626if (submission->wait_value_count) {4627memcpy(deferred->wait_values, submission->wait_values,4628submission->wait_value_count * sizeof(uint64_t));4629}4630deferred->signal_values = deferred->wait_values + submission->wait_value_count;4631if (submission->signal_value_count) {4632memcpy(deferred->signal_values, submission->signal_values,4633submission->signal_value_count * sizeof(uint64_t));4634}46354636deferred->wait_nodes = (void *)(deferred->signal_values + submission->signal_value_count);4637/* This is worst-case. radv_queue_enqueue_submission will fill in further, but this4638* ensure the submission is not accidentally triggered early when adding wait timelines. */4639deferred->submission_wait_count = 1 + submission->wait_semaphore_count;46404641*out = deferred;4642return VK_SUCCESS;4643}46444645static VkResult4646radv_queue_enqueue_submission(struct radv_deferred_queue_submission *submission,4647struct list_head *processing_list)4648{4649uint32_t wait_cnt = 0;4650struct radv_timeline_waiter *waiter = submission->wait_nodes;4651for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {4652if (submission->wait_semaphores[i]->kind == RADV_SEMAPHORE_TIMELINE) {4653mtx_lock(&submission->wait_semaphores[i]->timeline.mutex);4654if (submission->wait_semaphores[i]->timeline.highest_submitted <4655submission->wait_values[i]) {4656++wait_cnt;4657waiter->value = submission->wait_values[i];4658waiter->submission = submission;4659list_addtail(&waiter->list, &submission->wait_semaphores[i]->timeline.waiters);4660++waiter;4661}4662mtx_unlock(&submission->wait_semaphores[i]->timeline.mutex);4663}4664}46654666mtx_lock(&submission->queue->pending_mutex);46674668bool is_first = list_is_empty(&submission->queue->pending_submissions);4669list_addtail(&submission->queue_pending_list, &submission->queue->pending_submissions);46704671mtx_unlock(&submission->queue->pending_mutex);46724673/* If there is already a submission in the queue, that will decrement the counter by 1 when4674* submitted, but if the queue was empty, we decrement ourselves as there is no previous4675* submission. */4676uint32_t decrement = submission->wait_semaphore_count - wait_cnt + (is_first ? 1 : 0);46774678/* if decrement is zero, then we don't have a refcounted reference to the4679* submission anymore, so it is not safe to access the submission. */4680if (!decrement)4681return VK_SUCCESS;46824683return radv_queue_trigger_submission(submission, decrement, processing_list);4684}46854686static void4687radv_queue_submission_update_queue(struct radv_deferred_queue_submission *submission,4688struct list_head *processing_list)4689{4690mtx_lock(&submission->queue->pending_mutex);4691list_del(&submission->queue_pending_list);46924693/* trigger the next submission in the queue. */4694if (!list_is_empty(&submission->queue->pending_submissions)) {4695struct radv_deferred_queue_submission *next_submission =4696list_first_entry(&submission->queue->pending_submissions,4697struct radv_deferred_queue_submission, queue_pending_list);4698radv_queue_trigger_submission(next_submission, 1, processing_list);4699}4700mtx_unlock(&submission->queue->pending_mutex);47014702u_cnd_monotonic_broadcast(&submission->queue->device->timeline_cond);4703}47044705static VkResult4706radv_queue_submit_deferred(struct radv_deferred_queue_submission *submission,4707struct list_head *processing_list)4708{4709struct radv_queue *queue = submission->queue;4710struct radeon_winsys_ctx *ctx = queue->hw_ctx;4711uint32_t max_cs_submission = queue->device->trace_bo ? 1 : RADV_MAX_IBS_PER_SUBMIT;4712bool do_flush = submission->flush_caches || submission->wait_dst_stage_mask;4713bool can_patch = true;4714uint32_t advance;4715struct radv_winsys_sem_info sem_info = {0};4716VkResult result;4717struct radeon_cmdbuf *initial_preamble_cs = NULL;4718struct radeon_cmdbuf *initial_flush_preamble_cs = NULL;4719struct radeon_cmdbuf *continue_preamble_cs = NULL;47204721result =4722radv_get_preambles(queue, submission->cmd_buffers, submission->cmd_buffer_count,4723&initial_preamble_cs, &initial_flush_preamble_cs, &continue_preamble_cs);4724if (result != VK_SUCCESS)4725goto fail;47264727result = radv_alloc_sem_info(queue->device, &sem_info, submission->wait_semaphore_count,4728submission->wait_semaphores, submission->wait_values,4729submission->signal_semaphore_count, submission->signal_semaphores,4730submission->signal_values, submission->fence);4731if (result != VK_SUCCESS)4732goto fail;47334734for (uint32_t i = 0; i < submission->buffer_bind_count; ++i) {4735result = radv_sparse_buffer_bind_memory(queue->device, submission->buffer_binds + i);4736if (result != VK_SUCCESS)4737goto fail;4738}47394740for (uint32_t i = 0; i < submission->image_opaque_bind_count; ++i) {4741result =4742radv_sparse_image_opaque_bind_memory(queue->device, submission->image_opaque_binds + i);4743if (result != VK_SUCCESS)4744goto fail;4745}47464747for (uint32_t i = 0; i < submission->image_bind_count; ++i) {4748result = radv_sparse_image_bind_memory(queue->device, submission->image_binds + i);4749if (result != VK_SUCCESS)4750goto fail;4751}47524753if (!submission->cmd_buffer_count) {4754result = queue->device->ws->cs_submit(ctx, queue->queue_idx,4755&queue->device->empty_cs[queue->queue_family_index], 1,4756NULL, NULL, &sem_info, false);4757if (result != VK_SUCCESS)4758goto fail;4759} else {4760struct radeon_cmdbuf **cs_array =4761malloc(sizeof(struct radeon_cmdbuf *) * (submission->cmd_buffer_count));47624763for (uint32_t j = 0; j < submission->cmd_buffer_count; j++) {4764RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, submission->cmd_buffers[j]);4765assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);47664767cs_array[j] = cmd_buffer->cs;4768if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT))4769can_patch = false;47704771cmd_buffer->status = RADV_CMD_BUFFER_STATUS_PENDING;4772}47734774for (uint32_t j = 0; j < submission->cmd_buffer_count; j += advance) {4775struct radeon_cmdbuf *initial_preamble =4776(do_flush && !j) ? initial_flush_preamble_cs : initial_preamble_cs;4777advance = MIN2(max_cs_submission, submission->cmd_buffer_count - j);47784779if (queue->device->trace_bo)4780*queue->device->trace_id_ptr = 0;47814782sem_info.cs_emit_wait = j == 0;4783sem_info.cs_emit_signal = j + advance == submission->cmd_buffer_count;47844785result = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance,4786initial_preamble, continue_preamble_cs, &sem_info,4787can_patch);4788if (result != VK_SUCCESS) {4789free(cs_array);4790goto fail;4791}47924793if (queue->device->trace_bo) {4794radv_check_gpu_hangs(queue, cs_array[j]);4795}47964797if (queue->device->tma_bo) {4798radv_check_trap_handler(queue);4799}4800}48014802free(cs_array);4803}48044805radv_finalize_timelines(queue->device, submission->wait_semaphore_count,4806submission->wait_semaphores, submission->wait_values,4807submission->signal_semaphore_count, submission->signal_semaphores,4808submission->signal_values, processing_list);4809/* Has to happen after timeline finalization to make sure the4810* condition variable is only triggered when timelines and queue have4811* been updated. */4812radv_queue_submission_update_queue(submission, processing_list);48134814fail:4815if (result != VK_SUCCESS && result != VK_ERROR_DEVICE_LOST) {4816/* When something bad happened during the submission, such as4817* an out of memory issue, it might be hard to recover from4818* this inconsistent state. To avoid this sort of problem, we4819* assume that we are in a really bad situation and return4820* VK_ERROR_DEVICE_LOST to ensure the clients do not attempt4821* to submit the same job again to this device.4822*/4823result = radv_device_set_lost(queue->device, "vkQueueSubmit() failed");4824}48254826radv_free_temp_syncobjs(queue->device, submission->temporary_semaphore_part_count,4827submission->temporary_semaphore_parts);4828radv_free_sem_info(&sem_info);4829free(submission);4830return result;4831}48324833static VkResult4834radv_process_submissions(struct list_head *processing_list)4835{4836while (!list_is_empty(processing_list)) {4837struct radv_deferred_queue_submission *submission =4838list_first_entry(processing_list, struct radv_deferred_queue_submission, processing_list);4839list_del(&submission->processing_list);48404841VkResult result = radv_queue_submit_deferred(submission, processing_list);4842if (result != VK_SUCCESS)4843return result;4844}4845return VK_SUCCESS;4846}48474848static VkResult4849wait_for_submission_timelines_available(struct radv_deferred_queue_submission *submission,4850uint64_t timeout)4851{4852struct radv_device *device = submission->queue->device;4853uint32_t syncobj_count = 0;4854uint32_t syncobj_idx = 0;48554856for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {4857if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)4858continue;48594860if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])4861continue;4862++syncobj_count;4863}48644865if (!syncobj_count)4866return VK_SUCCESS;48674868uint64_t *points = malloc((sizeof(uint64_t) + sizeof(uint32_t)) * syncobj_count);4869if (!points)4870return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);48714872uint32_t *syncobj = (uint32_t *)(points + syncobj_count);48734874for (uint32_t i = 0; i < submission->wait_semaphore_count; ++i) {4875if (submission->wait_semaphores[i]->kind != RADV_SEMAPHORE_TIMELINE_SYNCOBJ)4876continue;48774878if (submission->wait_semaphores[i]->timeline_syncobj.max_point >= submission->wait_values[i])4879continue;48804881syncobj[syncobj_idx] = submission->wait_semaphores[i]->syncobj;4882points[syncobj_idx] = submission->wait_values[i];4883++syncobj_idx;4884}4885bool success = device->ws->wait_timeline_syncobj(device->ws, syncobj, points, syncobj_idx, true,4886true, timeout);48874888free(points);4889return success ? VK_SUCCESS : VK_TIMEOUT;4890}48914892static int4893radv_queue_submission_thread_run(void *q)4894{4895struct radv_queue *queue = q;48964897mtx_lock(&queue->thread_mutex);4898while (!p_atomic_read(&queue->thread_exit)) {4899struct radv_deferred_queue_submission *submission = queue->thread_submission;4900struct list_head processing_list;4901VkResult result = VK_SUCCESS;4902if (!submission) {4903u_cnd_monotonic_wait(&queue->thread_cond, &queue->thread_mutex);4904continue;4905}4906mtx_unlock(&queue->thread_mutex);49074908/* Wait at most 5 seconds so we have a chance to notice shutdown when4909* a semaphore never gets signaled. If it takes longer we just retry4910* the wait next iteration. */4911result =4912wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(5000000000));4913if (result != VK_SUCCESS) {4914mtx_lock(&queue->thread_mutex);4915continue;4916}49174918/* The lock isn't held but nobody will add one until we finish4919* the current submission. */4920p_atomic_set(&queue->thread_submission, NULL);49214922list_inithead(&processing_list);4923list_addtail(&submission->processing_list, &processing_list);4924result = radv_process_submissions(&processing_list);49254926mtx_lock(&queue->thread_mutex);4927}4928mtx_unlock(&queue->thread_mutex);4929return 0;4930}49314932static VkResult4933radv_queue_trigger_submission(struct radv_deferred_queue_submission *submission, uint32_t decrement,4934struct list_head *processing_list)4935{4936struct radv_queue *queue = submission->queue;4937int ret;4938if (p_atomic_add_return(&submission->submission_wait_count, -decrement))4939return VK_SUCCESS;49404941if (wait_for_submission_timelines_available(submission, radv_get_absolute_timeout(0)) ==4942VK_SUCCESS) {4943list_addtail(&submission->processing_list, processing_list);4944return VK_SUCCESS;4945}49464947mtx_lock(&queue->thread_mutex);49484949/* A submission can only be ready for the thread if it doesn't have4950* any predecessors in the same queue, so there can only be one such4951* submission at a time. */4952assert(queue->thread_submission == NULL);49534954/* Only start the thread on demand to save resources for the many games4955* which only use binary semaphores. */4956if (!queue->thread_running) {4957ret = thrd_create(&queue->submission_thread, radv_queue_submission_thread_run, queue);4958if (ret) {4959mtx_unlock(&queue->thread_mutex);4960return vk_errorf(queue->device->instance, VK_ERROR_DEVICE_LOST,4961"Failed to start submission thread");4962}4963queue->thread_running = true;4964}49654966queue->thread_submission = submission;4967mtx_unlock(&queue->thread_mutex);49684969u_cnd_monotonic_signal(&queue->thread_cond);4970return VK_SUCCESS;4971}49724973static VkResult4974radv_queue_submit(struct radv_queue *queue, const struct radv_queue_submission *submission)4975{4976struct radv_deferred_queue_submission *deferred = NULL;49774978VkResult result = radv_create_deferred_submission(queue, submission, &deferred);4979if (result != VK_SUCCESS)4980return result;49814982struct list_head processing_list;4983list_inithead(&processing_list);49844985result = radv_queue_enqueue_submission(deferred, &processing_list);4986if (result != VK_SUCCESS) {4987/* If anything is in the list we leak. */4988assert(list_is_empty(&processing_list));4989return result;4990}4991return radv_process_submissions(&processing_list);4992}49934994bool4995radv_queue_internal_submit(struct radv_queue *queue, struct radeon_cmdbuf *cs)4996{4997struct radeon_winsys_ctx *ctx = queue->hw_ctx;4998struct radv_winsys_sem_info sem_info = {0};4999VkResult result;50005001result = radv_alloc_sem_info(queue->device, &sem_info, 0, NULL, 0, 0, 0, NULL, VK_NULL_HANDLE);5002if (result != VK_SUCCESS)5003return false;50045005result =5006queue->device->ws->cs_submit(ctx, queue->queue_idx, &cs, 1, NULL, NULL, &sem_info, false);5007radv_free_sem_info(&sem_info);5008if (result != VK_SUCCESS)5009return false;50105011return true;5012}50135014/* Signals fence as soon as all the work currently put on queue is done. */5015static VkResult5016radv_signal_fence(struct radv_queue *queue, VkFence fence)5017{5018return radv_queue_submit(queue, &(struct radv_queue_submission){.fence = fence});5019}50205021static bool5022radv_submit_has_effects(const VkSubmitInfo *info)5023{5024return info->commandBufferCount || info->waitSemaphoreCount || info->signalSemaphoreCount;5025}50265027VkResult5028radv_QueueSubmit(VkQueue _queue, uint32_t submitCount, const VkSubmitInfo *pSubmits, VkFence fence)5029{5030RADV_FROM_HANDLE(radv_queue, queue, _queue);5031VkResult result;5032uint32_t fence_idx = 0;5033bool flushed_caches = false;50345035if (radv_device_is_lost(queue->device))5036return VK_ERROR_DEVICE_LOST;50375038if (fence != VK_NULL_HANDLE) {5039for (uint32_t i = 0; i < submitCount; ++i)5040if (radv_submit_has_effects(pSubmits + i))5041fence_idx = i;5042} else5043fence_idx = UINT32_MAX;50445045for (uint32_t i = 0; i < submitCount; i++) {5046if (!radv_submit_has_effects(pSubmits + i) && fence_idx != i)5047continue;50485049VkPipelineStageFlags wait_dst_stage_mask = 0;5050for (unsigned j = 0; j < pSubmits[i].waitSemaphoreCount; ++j) {5051wait_dst_stage_mask |= pSubmits[i].pWaitDstStageMask[j];5052}50535054const VkTimelineSemaphoreSubmitInfo *timeline_info =5055vk_find_struct_const(pSubmits[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);50565057result = radv_queue_submit(5058queue, &(struct radv_queue_submission){5059.cmd_buffers = pSubmits[i].pCommandBuffers,5060.cmd_buffer_count = pSubmits[i].commandBufferCount,5061.wait_dst_stage_mask = wait_dst_stage_mask,5062.flush_caches = !flushed_caches,5063.wait_semaphores = pSubmits[i].pWaitSemaphores,5064.wait_semaphore_count = pSubmits[i].waitSemaphoreCount,5065.signal_semaphores = pSubmits[i].pSignalSemaphores,5066.signal_semaphore_count = pSubmits[i].signalSemaphoreCount,5067.fence = i == fence_idx ? fence : VK_NULL_HANDLE,5068.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,5069.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues5070? timeline_info->waitSemaphoreValueCount5071: 0,5072.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,5073.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues5074? timeline_info->signalSemaphoreValueCount5075: 0,5076});5077if (result != VK_SUCCESS)5078return result;50795080flushed_caches = true;5081}50825083if (fence != VK_NULL_HANDLE && !submitCount) {5084result = radv_signal_fence(queue, fence);5085if (result != VK_SUCCESS)5086return result;5087}50885089return VK_SUCCESS;5090}50915092static const char *5093radv_get_queue_family_name(struct radv_queue *queue)5094{5095switch (queue->queue_family_index) {5096case RADV_QUEUE_GENERAL:5097return "graphics";5098case RADV_QUEUE_COMPUTE:5099return "compute";5100case RADV_QUEUE_TRANSFER:5101return "transfer";5102default:5103unreachable("Unknown queue family");5104}5105}51065107VkResult5108radv_QueueWaitIdle(VkQueue _queue)5109{5110RADV_FROM_HANDLE(radv_queue, queue, _queue);51115112if (radv_device_is_lost(queue->device))5113return VK_ERROR_DEVICE_LOST;51145115mtx_lock(&queue->pending_mutex);5116while (!list_is_empty(&queue->pending_submissions)) {5117u_cnd_monotonic_wait(&queue->device->timeline_cond, &queue->pending_mutex);5118}5119mtx_unlock(&queue->pending_mutex);51205121if (!queue->device->ws->ctx_wait_idle(5122queue->hw_ctx, radv_queue_family_to_ring(queue->queue_family_index), queue->queue_idx)) {5123return radv_device_set_lost(queue->device,5124"Failed to wait for a '%s' queue "5125"to be idle. GPU hang ?",5126radv_get_queue_family_name(queue));5127}51285129return VK_SUCCESS;5130}51315132VkResult5133radv_DeviceWaitIdle(VkDevice _device)5134{5135RADV_FROM_HANDLE(radv_device, device, _device);51365137for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) {5138for (unsigned q = 0; q < device->queue_count[i]; q++) {5139VkResult result = radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q]));51405141if (result != VK_SUCCESS)5142return result;5143}5144}5145return VK_SUCCESS;5146}51475148VkResult5149radv_EnumerateInstanceExtensionProperties(const char *pLayerName, uint32_t *pPropertyCount,5150VkExtensionProperties *pProperties)5151{5152if (pLayerName)5153return vk_error(NULL, VK_ERROR_LAYER_NOT_PRESENT);51545155return vk_enumerate_instance_extension_properties(&radv_instance_extensions_supported,5156pPropertyCount, pProperties);5157}51585159PFN_vkVoidFunction5160radv_GetInstanceProcAddr(VkInstance _instance, const char *pName)5161{5162RADV_FROM_HANDLE(radv_instance, instance, _instance);51635164/* The Vulkan 1.0 spec for vkGetInstanceProcAddr has a table of exactly5165* when we have to return valid function pointers, NULL, or it's left5166* undefined. See the table for exact details.5167*/5168if (pName == NULL)5169return NULL;51705171#define LOOKUP_RADV_ENTRYPOINT(entrypoint) \5172if (strcmp(pName, "vk" #entrypoint) == 0) \5173return (PFN_vkVoidFunction)radv_##entrypoint51745175LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceExtensionProperties);5176LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceLayerProperties);5177LOOKUP_RADV_ENTRYPOINT(EnumerateInstanceVersion);5178LOOKUP_RADV_ENTRYPOINT(CreateInstance);51795180/* GetInstanceProcAddr() can also be called with a NULL instance.5181* See https://gitlab.khronos.org/vulkan/vulkan/issues/20575182*/5183LOOKUP_RADV_ENTRYPOINT(GetInstanceProcAddr);51845185#undef LOOKUP_RADV_ENTRYPOINT51865187if (instance == NULL)5188return NULL;51895190return vk_instance_get_proc_addr(&instance->vk, &radv_instance_entrypoints, pName);5191}51925193/* Windows will use a dll definition file to avoid build errors. */5194#ifdef _WIN325195#undef PUBLIC5196#define PUBLIC5197#endif51985199/* The loader wants us to expose a second GetInstanceProcAddr function5200* to work around certain LD_PRELOAD issues seen in apps.5201*/5202PUBLIC5203VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL5204vk_icdGetInstanceProcAddr(VkInstance instance, const char *pName)5205{5206return radv_GetInstanceProcAddr(instance, pName);5207}52085209PUBLIC5210VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL5211vk_icdGetPhysicalDeviceProcAddr(VkInstance _instance, const char *pName)5212{5213RADV_FROM_HANDLE(radv_instance, instance, _instance);5214return vk_instance_get_physical_device_proc_addr(&instance->vk, pName);5215}52165217bool5218radv_get_memory_fd(struct radv_device *device, struct radv_device_memory *memory, int *pFD)5219{5220/* Only set BO metadata for the first plane */5221if (memory->image && memory->image->offset == 0) {5222struct radeon_bo_metadata metadata;5223radv_init_metadata(device, memory->image, &metadata);5224device->ws->buffer_set_metadata(device->ws, memory->bo, &metadata);5225}52265227return device->ws->buffer_get_fd(device->ws, memory->bo, pFD);5228}52295230void5231radv_free_memory(struct radv_device *device, const VkAllocationCallbacks *pAllocator,5232struct radv_device_memory *mem)5233{5234if (mem == NULL)5235return;52365237#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER5238if (mem->android_hardware_buffer)5239AHardwareBuffer_release(mem->android_hardware_buffer);5240#endif52415242if (mem->bo) {5243if (device->overallocation_disallowed) {5244mtx_lock(&device->overallocation_mutex);5245device->allocated_memory_size[mem->heap_index] -= mem->alloc_size;5246mtx_unlock(&device->overallocation_mutex);5247}52485249if (device->use_global_bo_list)5250device->ws->buffer_make_resident(device->ws, mem->bo, false);5251device->ws->buffer_destroy(device->ws, mem->bo);5252mem->bo = NULL;5253}52545255vk_object_base_finish(&mem->base);5256vk_free2(&device->vk.alloc, pAllocator, mem);5257}52585259static VkResult5260radv_alloc_memory(struct radv_device *device, const VkMemoryAllocateInfo *pAllocateInfo,5261const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)5262{5263struct radv_device_memory *mem;5264VkResult result;5265enum radeon_bo_domain domain;5266uint32_t flags = 0;52675268assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);52695270const VkImportMemoryFdInfoKHR *import_info =5271vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_FD_INFO_KHR);5272const VkMemoryDedicatedAllocateInfo *dedicate_info =5273vk_find_struct_const(pAllocateInfo->pNext, MEMORY_DEDICATED_ALLOCATE_INFO);5274const VkExportMemoryAllocateInfo *export_info =5275vk_find_struct_const(pAllocateInfo->pNext, EXPORT_MEMORY_ALLOCATE_INFO);5276const struct VkImportAndroidHardwareBufferInfoANDROID *ahb_import_info =5277vk_find_struct_const(pAllocateInfo->pNext, IMPORT_ANDROID_HARDWARE_BUFFER_INFO_ANDROID);5278const VkImportMemoryHostPointerInfoEXT *host_ptr_info =5279vk_find_struct_const(pAllocateInfo->pNext, IMPORT_MEMORY_HOST_POINTER_INFO_EXT);52805281const struct wsi_memory_allocate_info *wsi_info =5282vk_find_struct_const(pAllocateInfo->pNext, WSI_MEMORY_ALLOCATE_INFO_MESA);52835284if (pAllocateInfo->allocationSize == 0 && !ahb_import_info &&5285!(export_info && (export_info->handleTypes &5286VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID))) {5287/* Apparently, this is allowed */5288*pMem = VK_NULL_HANDLE;5289return VK_SUCCESS;5290}52915292mem =5293vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*mem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);5294if (mem == NULL)5295return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);52965297vk_object_base_init(&device->vk, &mem->base, VK_OBJECT_TYPE_DEVICE_MEMORY);52985299if (wsi_info) {5300if(wsi_info->implicit_sync)5301flags |= RADEON_FLAG_IMPLICIT_SYNC;53025303/* In case of prime, linear buffer is allocated in default heap which is VRAM.5304* Due to this when display is connected to iGPU and render on dGPU, ddx5305* function amdgpu_present_check_flip() fails due to which there is blit5306* instead of flip. Setting the flag RADEON_FLAG_GTT_WC allows kernel to5307* allocate GTT memory in supported hardware where GTT can be directly scanout.5308* Using wsi_info variable check to set the flag RADEON_FLAG_GTT_WC so that5309* only for memory allocated by driver this flag is set.5310*/5311flags |= RADEON_FLAG_GTT_WC;5312}53135314if (dedicate_info) {5315mem->image = radv_image_from_handle(dedicate_info->image);5316mem->buffer = radv_buffer_from_handle(dedicate_info->buffer);5317} else {5318mem->image = NULL;5319mem->buffer = NULL;5320}53215322float priority_float = 0.5;5323const struct VkMemoryPriorityAllocateInfoEXT *priority_ext =5324vk_find_struct_const(pAllocateInfo->pNext, MEMORY_PRIORITY_ALLOCATE_INFO_EXT);5325if (priority_ext)5326priority_float = priority_ext->priority;53275328uint64_t replay_address = 0;5329const VkMemoryOpaqueCaptureAddressAllocateInfo *replay_info =5330vk_find_struct_const(pAllocateInfo->pNext, MEMORY_OPAQUE_CAPTURE_ADDRESS_ALLOCATE_INFO);5331if (replay_info && replay_info->opaqueCaptureAddress)5332replay_address = replay_info->opaqueCaptureAddress;53335334unsigned priority = MIN2(RADV_BO_PRIORITY_APPLICATION_MAX - 1,5335(int)(priority_float * RADV_BO_PRIORITY_APPLICATION_MAX));53365337mem->user_ptr = NULL;5338mem->bo = NULL;53395340#if RADV_SUPPORT_ANDROID_HARDWARE_BUFFER5341mem->android_hardware_buffer = NULL;5342#endif53435344if (ahb_import_info) {5345result = radv_import_ahb_memory(device, mem, priority, ahb_import_info);5346if (result != VK_SUCCESS)5347goto fail;5348} else if (export_info && (export_info->handleTypes &5349VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID)) {5350result = radv_create_ahb_memory(device, mem, priority, pAllocateInfo);5351if (result != VK_SUCCESS)5352goto fail;5353} else if (import_info) {5354assert(import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||5355import_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);5356result = device->ws->buffer_from_fd(device->ws, import_info->fd, priority, &mem->bo, NULL);5357if (result != VK_SUCCESS) {5358goto fail;5359} else {5360close(import_info->fd);5361}53625363if (mem->image && mem->image->plane_count == 1 &&5364!vk_format_is_depth_or_stencil(mem->image->vk_format) && mem->image->info.samples == 1 &&5365mem->image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {5366struct radeon_bo_metadata metadata;5367device->ws->buffer_get_metadata(device->ws, mem->bo, &metadata);53685369struct radv_image_create_info create_info = {.no_metadata_planes = true,5370.bo_metadata = &metadata};53715372/* This gives a basic ability to import radeonsi images5373* that don't have DCC. This is not guaranteed by any5374* spec and can be removed after we support modifiers. */5375result = radv_image_create_layout(device, create_info, NULL, mem->image);5376if (result != VK_SUCCESS) {5377device->ws->buffer_destroy(device->ws, mem->bo);5378goto fail;5379}5380}5381} else if (host_ptr_info) {5382assert(host_ptr_info->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_HOST_ALLOCATION_BIT_EXT);5383result = device->ws->buffer_from_ptr(device->ws, host_ptr_info->pHostPointer,5384pAllocateInfo->allocationSize, priority, &mem->bo);5385if (result != VK_SUCCESS) {5386goto fail;5387} else {5388mem->user_ptr = host_ptr_info->pHostPointer;5389}5390} else {5391uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);5392uint32_t heap_index;53935394heap_index =5395device->physical_device->memory_properties.memoryTypes[pAllocateInfo->memoryTypeIndex]5396.heapIndex;5397domain = device->physical_device->memory_domains[pAllocateInfo->memoryTypeIndex];5398flags |= device->physical_device->memory_flags[pAllocateInfo->memoryTypeIndex];53995400if (!import_info && (!export_info || !export_info->handleTypes)) {5401flags |= RADEON_FLAG_NO_INTERPROCESS_SHARING;5402if (device->use_global_bo_list) {5403flags |= RADEON_FLAG_PREFER_LOCAL_BO;5404}5405}54065407const VkMemoryAllocateFlagsInfo *flags_info = vk_find_struct_const(pAllocateInfo->pNext, MEMORY_ALLOCATE_FLAGS_INFO);5408if (flags_info && flags_info->flags & VK_MEMORY_ALLOCATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)5409flags |= RADEON_FLAG_REPLAYABLE;54105411if (device->overallocation_disallowed) {5412uint64_t total_size =5413device->physical_device->memory_properties.memoryHeaps[heap_index].size;54145415mtx_lock(&device->overallocation_mutex);5416if (device->allocated_memory_size[heap_index] + alloc_size > total_size) {5417mtx_unlock(&device->overallocation_mutex);5418result = VK_ERROR_OUT_OF_DEVICE_MEMORY;5419goto fail;5420}5421device->allocated_memory_size[heap_index] += alloc_size;5422mtx_unlock(&device->overallocation_mutex);5423}54245425result = device->ws->buffer_create(device->ws, alloc_size,5426device->physical_device->rad_info.max_alignment, domain,5427flags, priority, replay_address, &mem->bo);54285429if (result != VK_SUCCESS) {5430if (device->overallocation_disallowed) {5431mtx_lock(&device->overallocation_mutex);5432device->allocated_memory_size[heap_index] -= alloc_size;5433mtx_unlock(&device->overallocation_mutex);5434}5435goto fail;5436}54375438mem->heap_index = heap_index;5439mem->alloc_size = alloc_size;5440}54415442if (!wsi_info) {5443if (device->use_global_bo_list) {5444result = device->ws->buffer_make_resident(device->ws, mem->bo, true);5445if (result != VK_SUCCESS)5446goto fail;5447}5448}54495450*pMem = radv_device_memory_to_handle(mem);54515452return VK_SUCCESS;54535454fail:5455radv_free_memory(device, pAllocator, mem);54565457return result;5458}54595460VkResult5461radv_AllocateMemory(VkDevice _device, const VkMemoryAllocateInfo *pAllocateInfo,5462const VkAllocationCallbacks *pAllocator, VkDeviceMemory *pMem)5463{5464RADV_FROM_HANDLE(radv_device, device, _device);5465return radv_alloc_memory(device, pAllocateInfo, pAllocator, pMem);5466}54675468void5469radv_FreeMemory(VkDevice _device, VkDeviceMemory _mem, const VkAllocationCallbacks *pAllocator)5470{5471RADV_FROM_HANDLE(radv_device, device, _device);5472RADV_FROM_HANDLE(radv_device_memory, mem, _mem);54735474radv_free_memory(device, pAllocator, mem);5475}54765477VkResult5478radv_MapMemory(VkDevice _device, VkDeviceMemory _memory, VkDeviceSize offset, VkDeviceSize size,5479VkMemoryMapFlags flags, void **ppData)5480{5481RADV_FROM_HANDLE(radv_device, device, _device);5482RADV_FROM_HANDLE(radv_device_memory, mem, _memory);54835484if (mem == NULL) {5485*ppData = NULL;5486return VK_SUCCESS;5487}54885489if (mem->user_ptr)5490*ppData = mem->user_ptr;5491else5492*ppData = device->ws->buffer_map(mem->bo);54935494if (*ppData) {5495*ppData = (uint8_t *)*ppData + offset;5496return VK_SUCCESS;5497}54985499return vk_error(device->instance, VK_ERROR_MEMORY_MAP_FAILED);5500}55015502void5503radv_UnmapMemory(VkDevice _device, VkDeviceMemory _memory)5504{5505RADV_FROM_HANDLE(radv_device, device, _device);5506RADV_FROM_HANDLE(radv_device_memory, mem, _memory);55075508if (mem == NULL)5509return;55105511if (mem->user_ptr == NULL)5512device->ws->buffer_unmap(mem->bo);5513}55145515VkResult5516radv_FlushMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,5517const VkMappedMemoryRange *pMemoryRanges)5518{5519return VK_SUCCESS;5520}55215522VkResult5523radv_InvalidateMappedMemoryRanges(VkDevice _device, uint32_t memoryRangeCount,5524const VkMappedMemoryRange *pMemoryRanges)5525{5526return VK_SUCCESS;5527}55285529void5530radv_GetBufferMemoryRequirements(VkDevice _device, VkBuffer _buffer,5531VkMemoryRequirements *pMemoryRequirements)5532{5533RADV_FROM_HANDLE(radv_device, device, _device);5534RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);55355536pMemoryRequirements->memoryTypeBits =5537(1u << device->physical_device->memory_properties.memoryTypeCount) - 1;55385539if (buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT)5540pMemoryRequirements->alignment = 4096;5541else5542pMemoryRequirements->alignment = 16;55435544pMemoryRequirements->size = align64(buffer->size, pMemoryRequirements->alignment);5545}55465547void5548radv_GetBufferMemoryRequirements2(VkDevice device, const VkBufferMemoryRequirementsInfo2 *pInfo,5549VkMemoryRequirements2 *pMemoryRequirements)5550{5551radv_GetBufferMemoryRequirements(device, pInfo->buffer,5552&pMemoryRequirements->memoryRequirements);5553vk_foreach_struct(ext, pMemoryRequirements->pNext)5554{5555switch (ext->sType) {5556case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {5557VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;5558req->requiresDedicatedAllocation = false;5559req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;5560break;5561}5562default:5563break;5564}5565}5566}55675568void5569radv_GetImageMemoryRequirements(VkDevice _device, VkImage _image,5570VkMemoryRequirements *pMemoryRequirements)5571{5572RADV_FROM_HANDLE(radv_device, device, _device);5573RADV_FROM_HANDLE(radv_image, image, _image);55745575pMemoryRequirements->memoryTypeBits =5576(1u << device->physical_device->memory_properties.memoryTypeCount) - 1;55775578pMemoryRequirements->size = image->size;5579pMemoryRequirements->alignment = image->alignment;5580}55815582void5583radv_GetImageMemoryRequirements2(VkDevice device, const VkImageMemoryRequirementsInfo2 *pInfo,5584VkMemoryRequirements2 *pMemoryRequirements)5585{5586radv_GetImageMemoryRequirements(device, pInfo->image, &pMemoryRequirements->memoryRequirements);55875588RADV_FROM_HANDLE(radv_image, image, pInfo->image);55895590vk_foreach_struct(ext, pMemoryRequirements->pNext)5591{5592switch (ext->sType) {5593case VK_STRUCTURE_TYPE_MEMORY_DEDICATED_REQUIREMENTS: {5594VkMemoryDedicatedRequirements *req = (VkMemoryDedicatedRequirements *)ext;5595req->requiresDedicatedAllocation =5596image->shareable && image->tiling != VK_IMAGE_TILING_LINEAR;5597req->prefersDedicatedAllocation = req->requiresDedicatedAllocation;5598break;5599}5600default:5601break;5602}5603}5604}56055606void5607radv_GetDeviceMemoryCommitment(VkDevice device, VkDeviceMemory memory,5608VkDeviceSize *pCommittedMemoryInBytes)5609{5610*pCommittedMemoryInBytes = 0;5611}56125613VkResult5614radv_BindBufferMemory2(VkDevice _device, uint32_t bindInfoCount,5615const VkBindBufferMemoryInfo *pBindInfos)5616{5617RADV_FROM_HANDLE(radv_device, device, _device);56185619for (uint32_t i = 0; i < bindInfoCount; ++i) {5620RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);5621RADV_FROM_HANDLE(radv_buffer, buffer, pBindInfos[i].buffer);56225623if (mem) {5624if (mem->alloc_size) {5625VkMemoryRequirements req;56265627radv_GetBufferMemoryRequirements(_device, pBindInfos[i].buffer, &req);56285629if (pBindInfos[i].memoryOffset + req.size > mem->alloc_size) {5630return vk_errorf(device->instance, VK_ERROR_UNKNOWN,5631"Device memory object too small for the buffer.\n");5632}5633}56345635buffer->bo = mem->bo;5636buffer->offset = pBindInfos[i].memoryOffset;5637} else {5638buffer->bo = NULL;5639}5640}5641return VK_SUCCESS;5642}56435644VkResult5645radv_BindBufferMemory(VkDevice device, VkBuffer buffer, VkDeviceMemory memory,5646VkDeviceSize memoryOffset)5647{5648const VkBindBufferMemoryInfo info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,5649.buffer = buffer,5650.memory = memory,5651.memoryOffset = memoryOffset};56525653return radv_BindBufferMemory2(device, 1, &info);5654}56555656VkResult5657radv_BindImageMemory2(VkDevice _device, uint32_t bindInfoCount,5658const VkBindImageMemoryInfo *pBindInfos)5659{5660RADV_FROM_HANDLE(radv_device, device, _device);56615662for (uint32_t i = 0; i < bindInfoCount; ++i) {5663RADV_FROM_HANDLE(radv_device_memory, mem, pBindInfos[i].memory);5664RADV_FROM_HANDLE(radv_image, image, pBindInfos[i].image);56655666if (mem) {5667if (mem->alloc_size) {5668VkMemoryRequirements req;56695670radv_GetImageMemoryRequirements(_device, pBindInfos[i].image, &req);56715672if (pBindInfos[i].memoryOffset + req.size > mem->alloc_size) {5673return vk_errorf(device->instance, VK_ERROR_UNKNOWN,5674"Device memory object too small for the image.\n");5675}5676}56775678image->bo = mem->bo;5679image->offset = pBindInfos[i].memoryOffset;5680} else {5681image->bo = NULL;5682image->offset = 0;5683}5684}5685return VK_SUCCESS;5686}56875688VkResult5689radv_BindImageMemory(VkDevice device, VkImage image, VkDeviceMemory memory,5690VkDeviceSize memoryOffset)5691{5692const VkBindImageMemoryInfo info = {.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,5693.image = image,5694.memory = memory,5695.memoryOffset = memoryOffset};56965697return radv_BindImageMemory2(device, 1, &info);5698}56995700static bool5701radv_sparse_bind_has_effects(const VkBindSparseInfo *info)5702{5703return info->bufferBindCount || info->imageOpaqueBindCount || info->imageBindCount ||5704info->waitSemaphoreCount || info->signalSemaphoreCount;5705}57065707VkResult5708radv_QueueBindSparse(VkQueue _queue, uint32_t bindInfoCount, const VkBindSparseInfo *pBindInfo,5709VkFence fence)5710{5711RADV_FROM_HANDLE(radv_queue, queue, _queue);5712uint32_t fence_idx = 0;57135714if (radv_device_is_lost(queue->device))5715return VK_ERROR_DEVICE_LOST;57165717if (fence != VK_NULL_HANDLE) {5718for (uint32_t i = 0; i < bindInfoCount; ++i)5719if (radv_sparse_bind_has_effects(pBindInfo + i))5720fence_idx = i;5721} else5722fence_idx = UINT32_MAX;57235724for (uint32_t i = 0; i < bindInfoCount; ++i) {5725if (i != fence_idx && !radv_sparse_bind_has_effects(pBindInfo + i))5726continue;57275728const VkTimelineSemaphoreSubmitInfo *timeline_info =5729vk_find_struct_const(pBindInfo[i].pNext, TIMELINE_SEMAPHORE_SUBMIT_INFO);57305731VkResult result = radv_queue_submit(5732queue, &(struct radv_queue_submission){5733.buffer_binds = pBindInfo[i].pBufferBinds,5734.buffer_bind_count = pBindInfo[i].bufferBindCount,5735.image_opaque_binds = pBindInfo[i].pImageOpaqueBinds,5736.image_opaque_bind_count = pBindInfo[i].imageOpaqueBindCount,5737.image_binds = pBindInfo[i].pImageBinds,5738.image_bind_count = pBindInfo[i].imageBindCount,5739.wait_semaphores = pBindInfo[i].pWaitSemaphores,5740.wait_semaphore_count = pBindInfo[i].waitSemaphoreCount,5741.signal_semaphores = pBindInfo[i].pSignalSemaphores,5742.signal_semaphore_count = pBindInfo[i].signalSemaphoreCount,5743.fence = i == fence_idx ? fence : VK_NULL_HANDLE,5744.wait_values = timeline_info ? timeline_info->pWaitSemaphoreValues : NULL,5745.wait_value_count = timeline_info && timeline_info->pWaitSemaphoreValues5746? timeline_info->waitSemaphoreValueCount5747: 0,5748.signal_values = timeline_info ? timeline_info->pSignalSemaphoreValues : NULL,5749.signal_value_count = timeline_info && timeline_info->pSignalSemaphoreValues5750? timeline_info->signalSemaphoreValueCount5751: 0,5752});57535754if (result != VK_SUCCESS)5755return result;5756}57575758if (fence != VK_NULL_HANDLE && !bindInfoCount) {5759VkResult result = radv_signal_fence(queue, fence);5760if (result != VK_SUCCESS)5761return result;5762}57635764return VK_SUCCESS;5765}57665767static void5768radv_destroy_fence_part(struct radv_device *device, struct radv_fence_part *part)5769{5770if (part->kind != RADV_FENCE_NONE)5771device->ws->destroy_syncobj(device->ws, part->syncobj);5772part->kind = RADV_FENCE_NONE;5773}57745775static void5776radv_destroy_fence(struct radv_device *device, const VkAllocationCallbacks *pAllocator,5777struct radv_fence *fence)5778{5779radv_destroy_fence_part(device, &fence->temporary);5780radv_destroy_fence_part(device, &fence->permanent);57815782vk_object_base_finish(&fence->base);5783vk_free2(&device->vk.alloc, pAllocator, fence);5784}57855786VkResult5787radv_CreateFence(VkDevice _device, const VkFenceCreateInfo *pCreateInfo,5788const VkAllocationCallbacks *pAllocator, VkFence *pFence)5789{5790RADV_FROM_HANDLE(radv_device, device, _device);5791bool create_signaled = false;5792struct radv_fence *fence;5793int ret;57945795fence = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*fence), 8,5796VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);5797if (!fence)5798return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);57995800vk_object_base_init(&device->vk, &fence->base, VK_OBJECT_TYPE_FENCE);58015802fence->permanent.kind = RADV_FENCE_SYNCOBJ;58035804if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT)5805create_signaled = true;58065807ret = device->ws->create_syncobj(device->ws, create_signaled, &fence->permanent.syncobj);5808if (ret) {5809radv_destroy_fence(device, pAllocator, fence);5810return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);5811}58125813*pFence = radv_fence_to_handle(fence);58145815return VK_SUCCESS;5816}58175818void5819radv_DestroyFence(VkDevice _device, VkFence _fence, const VkAllocationCallbacks *pAllocator)5820{5821RADV_FROM_HANDLE(radv_device, device, _device);5822RADV_FROM_HANDLE(radv_fence, fence, _fence);58235824if (!fence)5825return;58265827radv_destroy_fence(device, pAllocator, fence);5828}58295830VkResult5831radv_WaitForFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences, VkBool32 waitAll,5832uint64_t timeout)5833{5834RADV_FROM_HANDLE(radv_device, device, _device);5835uint32_t *handles;58365837if (radv_device_is_lost(device))5838return VK_ERROR_DEVICE_LOST;58395840timeout = radv_get_absolute_timeout(timeout);58415842handles = malloc(sizeof(uint32_t) * fenceCount);5843if (!handles)5844return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);58455846for (uint32_t i = 0; i < fenceCount; ++i) {5847RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);58485849struct radv_fence_part *part =5850fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;58515852assert(part->kind == RADV_FENCE_SYNCOBJ);5853handles[i] = part->syncobj;5854}58555856bool success = device->ws->wait_syncobj(device->ws, handles, fenceCount, waitAll, timeout);5857free(handles);5858return success ? VK_SUCCESS : VK_TIMEOUT;5859}58605861VkResult5862radv_ResetFences(VkDevice _device, uint32_t fenceCount, const VkFence *pFences)5863{5864RADV_FROM_HANDLE(radv_device, device, _device);58655866for (unsigned i = 0; i < fenceCount; ++i) {5867RADV_FROM_HANDLE(radv_fence, fence, pFences[i]);58685869/* From the Vulkan 1.0.53 spec:5870*5871* "If any member of pFences currently has its payload5872* imported with temporary permanence, that fence’s prior5873* permanent payload is irst restored. The remaining5874* operations described therefore operate on the restored5875* payload."5876*/5877if (fence->temporary.kind != RADV_FENCE_NONE)5878radv_destroy_fence_part(device, &fence->temporary);58795880device->ws->reset_syncobj(device->ws, fence->permanent.syncobj);5881}58825883return VK_SUCCESS;5884}58855886VkResult5887radv_GetFenceStatus(VkDevice _device, VkFence _fence)5888{5889RADV_FROM_HANDLE(radv_device, device, _device);5890RADV_FROM_HANDLE(radv_fence, fence, _fence);58915892struct radv_fence_part *part =5893fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;58945895if (radv_device_is_lost(device))5896return VK_ERROR_DEVICE_LOST;58975898bool success = device->ws->wait_syncobj(device->ws, &part->syncobj, 1, true, 0);5899return success ? VK_SUCCESS : VK_NOT_READY;5900}59015902// Queue semaphore functions59035904static void5905radv_create_timeline(struct radv_timeline *timeline, uint64_t value)5906{5907timeline->highest_signaled = value;5908timeline->highest_submitted = value;5909list_inithead(&timeline->points);5910list_inithead(&timeline->free_points);5911list_inithead(&timeline->waiters);5912mtx_init(&timeline->mutex, mtx_plain);5913}59145915static void5916radv_destroy_timeline(struct radv_device *device, struct radv_timeline *timeline)5917{5918list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->free_points, list)5919{5920list_del(&point->list);5921device->ws->destroy_syncobj(device->ws, point->syncobj);5922free(point);5923}5924list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)5925{5926list_del(&point->list);5927device->ws->destroy_syncobj(device->ws, point->syncobj);5928free(point);5929}5930mtx_destroy(&timeline->mutex);5931}59325933static void5934radv_timeline_gc_locked(struct radv_device *device, struct radv_timeline *timeline)5935{5936list_for_each_entry_safe(struct radv_timeline_point, point, &timeline->points, list)5937{5938if (point->wait_count || point->value > timeline->highest_submitted)5939return;59405941if (device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, 0)) {5942timeline->highest_signaled = point->value;5943list_del(&point->list);5944list_add(&point->list, &timeline->free_points);5945}5946}5947}59485949static struct radv_timeline_point *5950radv_timeline_find_point_at_least_locked(struct radv_device *device, struct radv_timeline *timeline,5951uint64_t p)5952{5953radv_timeline_gc_locked(device, timeline);59545955if (p <= timeline->highest_signaled)5956return NULL;59575958list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)5959{5960if (point->value >= p) {5961++point->wait_count;5962return point;5963}5964}5965return NULL;5966}59675968static struct radv_timeline_point *5969radv_timeline_add_point_locked(struct radv_device *device, struct radv_timeline *timeline,5970uint64_t p)5971{5972radv_timeline_gc_locked(device, timeline);59735974struct radv_timeline_point *ret = NULL;5975struct radv_timeline_point *prev = NULL;5976int r;59775978if (p <= timeline->highest_signaled)5979return NULL;59805981list_for_each_entry(struct radv_timeline_point, point, &timeline->points, list)5982{5983if (point->value == p) {5984return NULL;5985}59865987if (point->value < p)5988prev = point;5989}59905991if (list_is_empty(&timeline->free_points)) {5992ret = malloc(sizeof(struct radv_timeline_point));5993r = device->ws->create_syncobj(device->ws, false, &ret->syncobj);5994if (r) {5995free(ret);5996return NULL;5997}5998} else {5999ret = list_first_entry(&timeline->free_points, struct radv_timeline_point, list);6000list_del(&ret->list);60016002device->ws->reset_syncobj(device->ws, ret->syncobj);6003}60046005ret->value = p;6006ret->wait_count = 1;60076008if (prev) {6009list_add(&ret->list, &prev->list);6010} else {6011list_addtail(&ret->list, &timeline->points);6012}6013return ret;6014}60156016static VkResult6017radv_timeline_wait(struct radv_device *device, struct radv_timeline *timeline, uint64_t value,6018uint64_t abs_timeout)6019{6020mtx_lock(&timeline->mutex);60216022while (timeline->highest_submitted < value) {6023struct timespec abstime;6024timespec_from_nsec(&abstime, abs_timeout);60256026u_cnd_monotonic_timedwait(&device->timeline_cond, &timeline->mutex, &abstime);60276028if (radv_get_current_time() >= abs_timeout && timeline->highest_submitted < value) {6029mtx_unlock(&timeline->mutex);6030return VK_TIMEOUT;6031}6032}60336034struct radv_timeline_point *point =6035radv_timeline_find_point_at_least_locked(device, timeline, value);6036mtx_unlock(&timeline->mutex);6037if (!point)6038return VK_SUCCESS;60396040bool success = device->ws->wait_syncobj(device->ws, &point->syncobj, 1, true, abs_timeout);60416042mtx_lock(&timeline->mutex);6043point->wait_count--;6044mtx_unlock(&timeline->mutex);6045return success ? VK_SUCCESS : VK_TIMEOUT;6046}60476048static void6049radv_timeline_trigger_waiters_locked(struct radv_timeline *timeline,6050struct list_head *processing_list)6051{6052list_for_each_entry_safe(struct radv_timeline_waiter, waiter, &timeline->waiters, list)6053{6054if (waiter->value > timeline->highest_submitted)6055continue;60566057radv_queue_trigger_submission(waiter->submission, 1, processing_list);6058list_del(&waiter->list);6059}6060}60616062static void6063radv_destroy_semaphore_part(struct radv_device *device, struct radv_semaphore_part *part)6064{6065switch (part->kind) {6066case RADV_SEMAPHORE_NONE:6067break;6068case RADV_SEMAPHORE_TIMELINE:6069radv_destroy_timeline(device, &part->timeline);6070break;6071case RADV_SEMAPHORE_SYNCOBJ:6072case RADV_SEMAPHORE_TIMELINE_SYNCOBJ:6073device->ws->destroy_syncobj(device->ws, part->syncobj);6074break;6075}6076part->kind = RADV_SEMAPHORE_NONE;6077}60786079static VkSemaphoreTypeKHR6080radv_get_semaphore_type(const void *pNext, uint64_t *initial_value)6081{6082const VkSemaphoreTypeCreateInfo *type_info =6083vk_find_struct_const(pNext, SEMAPHORE_TYPE_CREATE_INFO);60846085if (!type_info)6086return VK_SEMAPHORE_TYPE_BINARY;60876088if (initial_value)6089*initial_value = type_info->initialValue;6090return type_info->semaphoreType;6091}60926093static void6094radv_destroy_semaphore(struct radv_device *device, const VkAllocationCallbacks *pAllocator,6095struct radv_semaphore *sem)6096{6097radv_destroy_semaphore_part(device, &sem->temporary);6098radv_destroy_semaphore_part(device, &sem->permanent);6099vk_object_base_finish(&sem->base);6100vk_free2(&device->vk.alloc, pAllocator, sem);6101}61026103VkResult6104radv_CreateSemaphore(VkDevice _device, const VkSemaphoreCreateInfo *pCreateInfo,6105const VkAllocationCallbacks *pAllocator, VkSemaphore *pSemaphore)6106{6107RADV_FROM_HANDLE(radv_device, device, _device);6108uint64_t initial_value = 0;6109VkSemaphoreTypeKHR type = radv_get_semaphore_type(pCreateInfo->pNext, &initial_value);61106111struct radv_semaphore *sem =6112vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sem), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);6113if (!sem)6114return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);61156116vk_object_base_init(&device->vk, &sem->base, VK_OBJECT_TYPE_SEMAPHORE);61176118sem->temporary.kind = RADV_SEMAPHORE_NONE;6119sem->permanent.kind = RADV_SEMAPHORE_NONE;61206121if (type == VK_SEMAPHORE_TYPE_TIMELINE &&6122device->physical_device->rad_info.has_timeline_syncobj) {6123int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);6124if (ret) {6125radv_destroy_semaphore(device, pAllocator, sem);6126return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);6127}6128device->ws->signal_syncobj(device->ws, sem->permanent.syncobj, initial_value);6129sem->permanent.timeline_syncobj.max_point = initial_value;6130sem->permanent.kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;6131} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {6132radv_create_timeline(&sem->permanent.timeline, initial_value);6133sem->permanent.kind = RADV_SEMAPHORE_TIMELINE;6134} else {6135int ret = device->ws->create_syncobj(device->ws, false, &sem->permanent.syncobj);6136if (ret) {6137radv_destroy_semaphore(device, pAllocator, sem);6138return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);6139}6140sem->permanent.kind = RADV_SEMAPHORE_SYNCOBJ;6141}61426143*pSemaphore = radv_semaphore_to_handle(sem);6144return VK_SUCCESS;6145}61466147void6148radv_DestroySemaphore(VkDevice _device, VkSemaphore _semaphore,6149const VkAllocationCallbacks *pAllocator)6150{6151RADV_FROM_HANDLE(radv_device, device, _device);6152RADV_FROM_HANDLE(radv_semaphore, sem, _semaphore);6153if (!_semaphore)6154return;61556156radv_destroy_semaphore(device, pAllocator, sem);6157}61586159VkResult6160radv_GetSemaphoreCounterValue(VkDevice _device, VkSemaphore _semaphore, uint64_t *pValue)6161{6162RADV_FROM_HANDLE(radv_device, device, _device);6163RADV_FROM_HANDLE(radv_semaphore, semaphore, _semaphore);61646165if (radv_device_is_lost(device))6166return VK_ERROR_DEVICE_LOST;61676168struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE6169? &semaphore->temporary6170: &semaphore->permanent;61716172switch (part->kind) {6173case RADV_SEMAPHORE_TIMELINE: {6174mtx_lock(&part->timeline.mutex);6175radv_timeline_gc_locked(device, &part->timeline);6176*pValue = part->timeline.highest_signaled;6177mtx_unlock(&part->timeline.mutex);6178return VK_SUCCESS;6179}6180case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {6181return device->ws->query_syncobj(device->ws, part->syncobj, pValue);6182}6183case RADV_SEMAPHORE_NONE:6184case RADV_SEMAPHORE_SYNCOBJ:6185unreachable("Invalid semaphore type");6186}6187unreachable("Unhandled semaphore type");6188}61896190static VkResult6191radv_wait_timelines(struct radv_device *device, const VkSemaphoreWaitInfo *pWaitInfo,6192uint64_t abs_timeout)6193{6194if ((pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR) && pWaitInfo->semaphoreCount > 1) {6195for (;;) {6196for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {6197RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);6198VkResult result =6199radv_timeline_wait(device, &semaphore->permanent.timeline, pWaitInfo->pValues[i], 0);62006201if (result == VK_SUCCESS)6202return VK_SUCCESS;6203}6204if (radv_get_current_time() > abs_timeout)6205return VK_TIMEOUT;6206}6207}62086209for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {6210RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);6211VkResult result = radv_timeline_wait(device, &semaphore->permanent.timeline,6212pWaitInfo->pValues[i], abs_timeout);62136214if (result != VK_SUCCESS)6215return result;6216}6217return VK_SUCCESS;6218}6219VkResult6220radv_WaitSemaphores(VkDevice _device, const VkSemaphoreWaitInfo *pWaitInfo, uint64_t timeout)6221{6222RADV_FROM_HANDLE(radv_device, device, _device);62236224if (radv_device_is_lost(device))6225return VK_ERROR_DEVICE_LOST;62266227uint64_t abs_timeout = radv_get_absolute_timeout(timeout);62286229if (radv_semaphore_from_handle(pWaitInfo->pSemaphores[0])->permanent.kind ==6230RADV_SEMAPHORE_TIMELINE)6231return radv_wait_timelines(device, pWaitInfo, abs_timeout);62326233if (pWaitInfo->semaphoreCount > UINT32_MAX / sizeof(uint32_t))6234return vk_errorf(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY,6235"semaphoreCount integer overflow");62366237bool wait_all = !(pWaitInfo->flags & VK_SEMAPHORE_WAIT_ANY_BIT_KHR);6238uint32_t *handles = malloc(sizeof(*handles) * pWaitInfo->semaphoreCount);6239if (!handles)6240return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);62416242for (uint32_t i = 0; i < pWaitInfo->semaphoreCount; ++i) {6243RADV_FROM_HANDLE(radv_semaphore, semaphore, pWaitInfo->pSemaphores[i]);6244handles[i] = semaphore->permanent.syncobj;6245}62466247bool success =6248device->ws->wait_timeline_syncobj(device->ws, handles, pWaitInfo->pValues,6249pWaitInfo->semaphoreCount, wait_all, false, abs_timeout);6250free(handles);6251return success ? VK_SUCCESS : VK_TIMEOUT;6252}62536254VkResult6255radv_SignalSemaphore(VkDevice _device, const VkSemaphoreSignalInfo *pSignalInfo)6256{6257RADV_FROM_HANDLE(radv_device, device, _device);6258RADV_FROM_HANDLE(radv_semaphore, semaphore, pSignalInfo->semaphore);62596260struct radv_semaphore_part *part = semaphore->temporary.kind != RADV_SEMAPHORE_NONE6261? &semaphore->temporary6262: &semaphore->permanent;62636264switch (part->kind) {6265case RADV_SEMAPHORE_TIMELINE: {6266mtx_lock(&part->timeline.mutex);6267radv_timeline_gc_locked(device, &part->timeline);6268part->timeline.highest_submitted = MAX2(part->timeline.highest_submitted, pSignalInfo->value);6269part->timeline.highest_signaled = MAX2(part->timeline.highest_signaled, pSignalInfo->value);62706271struct list_head processing_list;6272list_inithead(&processing_list);6273radv_timeline_trigger_waiters_locked(&part->timeline, &processing_list);6274mtx_unlock(&part->timeline.mutex);62756276VkResult result = radv_process_submissions(&processing_list);62776278/* This needs to happen after radv_process_submissions, so6279* that any submitted submissions that are now unblocked get6280* processed before we wake the application. This way we6281* ensure that any binary semaphores that are now unblocked6282* are usable by the application. */6283u_cnd_monotonic_broadcast(&device->timeline_cond);62846285return result;6286}6287case RADV_SEMAPHORE_TIMELINE_SYNCOBJ: {6288part->timeline_syncobj.max_point = MAX2(part->timeline_syncobj.max_point, pSignalInfo->value);6289device->ws->signal_syncobj(device->ws, part->syncobj, pSignalInfo->value);6290break;6291}6292case RADV_SEMAPHORE_NONE:6293case RADV_SEMAPHORE_SYNCOBJ:6294unreachable("Invalid semaphore type");6295}6296return VK_SUCCESS;6297}62986299static void6300radv_destroy_event(struct radv_device *device, const VkAllocationCallbacks *pAllocator,6301struct radv_event *event)6302{6303if (event->bo)6304device->ws->buffer_destroy(device->ws, event->bo);63056306vk_object_base_finish(&event->base);6307vk_free2(&device->vk.alloc, pAllocator, event);6308}63096310VkResult6311radv_CreateEvent(VkDevice _device, const VkEventCreateInfo *pCreateInfo,6312const VkAllocationCallbacks *pAllocator, VkEvent *pEvent)6313{6314RADV_FROM_HANDLE(radv_device, device, _device);6315struct radv_event *event = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*event), 8,6316VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);63176318if (!event)6319return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);63206321vk_object_base_init(&device->vk, &event->base, VK_OBJECT_TYPE_EVENT);63226323VkResult result = device->ws->buffer_create(6324device->ws, 8, 8, RADEON_DOMAIN_GTT,6325RADEON_FLAG_VA_UNCACHED | RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING,6326RADV_BO_PRIORITY_FENCE, 0, &event->bo);6327if (result != VK_SUCCESS) {6328radv_destroy_event(device, pAllocator, event);6329return vk_error(device->instance, result);6330}63316332event->map = (uint64_t *)device->ws->buffer_map(event->bo);6333if (!event->map) {6334radv_destroy_event(device, pAllocator, event);6335return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);6336}63376338*pEvent = radv_event_to_handle(event);63396340return VK_SUCCESS;6341}63426343void6344radv_DestroyEvent(VkDevice _device, VkEvent _event, const VkAllocationCallbacks *pAllocator)6345{6346RADV_FROM_HANDLE(radv_device, device, _device);6347RADV_FROM_HANDLE(radv_event, event, _event);63486349if (!event)6350return;63516352radv_destroy_event(device, pAllocator, event);6353}63546355VkResult6356radv_GetEventStatus(VkDevice _device, VkEvent _event)6357{6358RADV_FROM_HANDLE(radv_device, device, _device);6359RADV_FROM_HANDLE(radv_event, event, _event);63606361if (radv_device_is_lost(device))6362return VK_ERROR_DEVICE_LOST;63636364if (*event->map == 1)6365return VK_EVENT_SET;6366return VK_EVENT_RESET;6367}63686369VkResult6370radv_SetEvent(VkDevice _device, VkEvent _event)6371{6372RADV_FROM_HANDLE(radv_event, event, _event);6373*event->map = 1;63746375return VK_SUCCESS;6376}63776378VkResult6379radv_ResetEvent(VkDevice _device, VkEvent _event)6380{6381RADV_FROM_HANDLE(radv_event, event, _event);6382*event->map = 0;63836384return VK_SUCCESS;6385}63866387static void6388radv_destroy_buffer(struct radv_device *device, const VkAllocationCallbacks *pAllocator,6389struct radv_buffer *buffer)6390{6391if ((buffer->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) && buffer->bo)6392device->ws->buffer_destroy(device->ws, buffer->bo);63936394vk_object_base_finish(&buffer->base);6395vk_free2(&device->vk.alloc, pAllocator, buffer);6396}63976398VkResult6399radv_CreateBuffer(VkDevice _device, const VkBufferCreateInfo *pCreateInfo,6400const VkAllocationCallbacks *pAllocator, VkBuffer *pBuffer)6401{6402RADV_FROM_HANDLE(radv_device, device, _device);6403struct radv_buffer *buffer;64046405if (pCreateInfo->size > RADV_MAX_MEMORY_ALLOCATION_SIZE)6406return VK_ERROR_OUT_OF_DEVICE_MEMORY;64076408assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);64096410buffer = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*buffer), 8,6411VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);6412if (buffer == NULL)6413return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);64146415vk_object_base_init(&device->vk, &buffer->base, VK_OBJECT_TYPE_BUFFER);64166417buffer->size = pCreateInfo->size;6418buffer->usage = pCreateInfo->usage;6419buffer->bo = NULL;6420buffer->offset = 0;6421buffer->flags = pCreateInfo->flags;64226423buffer->shareable =6424vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_BUFFER_CREATE_INFO) != NULL;64256426if (pCreateInfo->flags & VK_BUFFER_CREATE_SPARSE_BINDING_BIT) {6427enum radeon_bo_flag flags = RADEON_FLAG_VIRTUAL;6428if (pCreateInfo->flags & VK_BUFFER_CREATE_DEVICE_ADDRESS_CAPTURE_REPLAY_BIT)6429flags |= RADEON_FLAG_REPLAYABLE;64306431uint64_t replay_address = 0;6432const VkBufferOpaqueCaptureAddressCreateInfo *replay_info =6433vk_find_struct_const(pCreateInfo->pNext, BUFFER_OPAQUE_CAPTURE_ADDRESS_CREATE_INFO);6434if (replay_info && replay_info->opaqueCaptureAddress)6435replay_address = replay_info->opaqueCaptureAddress;64366437VkResult result = device->ws->buffer_create(device->ws, align64(buffer->size, 4096), 4096, 0,6438flags, RADV_BO_PRIORITY_VIRTUAL,6439replay_address, &buffer->bo);6440if (result != VK_SUCCESS) {6441radv_destroy_buffer(device, pAllocator, buffer);6442return vk_error(device->instance, result);6443}6444}64456446*pBuffer = radv_buffer_to_handle(buffer);64476448return VK_SUCCESS;6449}64506451void6452radv_DestroyBuffer(VkDevice _device, VkBuffer _buffer, const VkAllocationCallbacks *pAllocator)6453{6454RADV_FROM_HANDLE(radv_device, device, _device);6455RADV_FROM_HANDLE(radv_buffer, buffer, _buffer);64566457if (!buffer)6458return;64596460radv_destroy_buffer(device, pAllocator, buffer);6461}64626463VkDeviceAddress6464radv_GetBufferDeviceAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)6465{6466RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);6467return radv_buffer_get_va(buffer->bo) + buffer->offset;6468}64696470uint64_t6471radv_GetBufferOpaqueCaptureAddress(VkDevice device, const VkBufferDeviceAddressInfo *pInfo)6472{6473RADV_FROM_HANDLE(radv_buffer, buffer, pInfo->buffer);6474return buffer->bo ? radv_buffer_get_va(buffer->bo) + buffer->offset : 0;6475}64766477uint64_t6478radv_GetDeviceMemoryOpaqueCaptureAddress(VkDevice device,6479const VkDeviceMemoryOpaqueCaptureAddressInfo *pInfo)6480{6481RADV_FROM_HANDLE(radv_device_memory, mem, pInfo->memory);6482return radv_buffer_get_va(mem->bo);6483}64846485static inline unsigned6486si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)6487{6488if (stencil)6489return plane->surface.u.legacy.zs.stencil_tiling_index[level];6490else6491return plane->surface.u.legacy.tiling_index[level];6492}64936494static uint32_t6495radv_surface_max_layer_count(struct radv_image_view *iview)6496{6497return iview->type == VK_IMAGE_VIEW_TYPE_3D ? iview->extent.depth6498: (iview->base_layer + iview->layer_count);6499}65006501static unsigned6502get_dcc_max_uncompressed_block_size(const struct radv_device *device,6503const struct radv_image_view *iview)6504{6505if (device->physical_device->rad_info.chip_class < GFX10 && iview->image->info.samples > 1) {6506if (iview->image->planes[0].surface.bpe == 1)6507return V_028C78_MAX_BLOCK_SIZE_64B;6508else if (iview->image->planes[0].surface.bpe == 2)6509return V_028C78_MAX_BLOCK_SIZE_128B;6510}65116512return V_028C78_MAX_BLOCK_SIZE_256B;6513}65146515static unsigned6516get_dcc_min_compressed_block_size(const struct radv_device *device)6517{6518if (!device->physical_device->rad_info.has_dedicated_vram) {6519/* amdvlk: [min-compressed-block-size] should be set to 32 for6520* dGPU and 64 for APU because all of our APUs to date use6521* DIMMs which have a request granularity size of 64B while all6522* other chips have a 32B request size.6523*/6524return V_028C78_MIN_BLOCK_SIZE_64B;6525}65266527return V_028C78_MIN_BLOCK_SIZE_32B;6528}65296530static uint32_t6531radv_init_dcc_control_reg(struct radv_device *device, struct radv_image_view *iview)6532{6533unsigned max_uncompressed_block_size = get_dcc_max_uncompressed_block_size(device, iview);6534unsigned min_compressed_block_size = get_dcc_min_compressed_block_size(device);6535unsigned max_compressed_block_size;6536unsigned independent_128b_blocks;6537unsigned independent_64b_blocks;65386539if (!radv_dcc_enabled(iview->image, iview->base_mip))6540return 0;65416542/* For GFX9+ ac_surface computes values for us (except min_compressed6543* and max_uncompressed) */6544if (device->physical_device->rad_info.chip_class >= GFX9) {6545max_compressed_block_size =6546iview->image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size;6547independent_128b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_128B_blocks;6548independent_64b_blocks = iview->image->planes[0].surface.u.gfx9.color.dcc.independent_64B_blocks;6549} else {6550independent_128b_blocks = 0;65516552if (iview->image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT |6553VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)) {6554/* If this DCC image is potentially going to be used in texture6555* fetches, we need some special settings.6556*/6557independent_64b_blocks = 1;6558max_compressed_block_size = V_028C78_MAX_BLOCK_SIZE_64B;6559} else {6560/* MAX_UNCOMPRESSED_BLOCK_SIZE must be >=6561* MAX_COMPRESSED_BLOCK_SIZE. Set MAX_COMPRESSED_BLOCK_SIZE as6562* big as possible for better compression state.6563*/6564independent_64b_blocks = 0;6565max_compressed_block_size = max_uncompressed_block_size;6566}6567}65686569return S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) |6570S_028C78_MAX_COMPRESSED_BLOCK_SIZE(max_compressed_block_size) |6571S_028C78_MIN_COMPRESSED_BLOCK_SIZE(min_compressed_block_size) |6572S_028C78_INDEPENDENT_64B_BLOCKS(independent_64b_blocks) |6573S_028C78_INDEPENDENT_128B_BLOCKS(independent_128b_blocks);6574}65756576void6577radv_initialise_color_surface(struct radv_device *device, struct radv_color_buffer_info *cb,6578struct radv_image_view *iview)6579{6580const struct util_format_description *desc;6581unsigned ntype, format, swap, endian;6582unsigned blend_clamp = 0, blend_bypass = 0;6583uint64_t va;6584const struct radv_image_plane *plane = &iview->image->planes[iview->plane_id];6585const struct radeon_surf *surf = &plane->surface;65866587desc = vk_format_description(iview->vk_format);65886589memset(cb, 0, sizeof(*cb));65906591/* Intensity is implemented as Red, so treat it that way. */6592cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == PIPE_SWIZZLE_1);65936594va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;65956596cb->cb_color_base = va >> 8;65976598if (device->physical_device->rad_info.chip_class >= GFX9) {6599if (device->physical_device->rad_info.chip_class >= GFX10) {6600cb->cb_color_attrib3 |= S_028EE0_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |6601S_028EE0_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |6602S_028EE0_CMASK_PIPE_ALIGNED(1) |6603S_028EE0_DCC_PIPE_ALIGNED(surf->u.gfx9.color.dcc.pipe_aligned);6604} else {6605struct gfx9_surf_meta_flags meta = {6606.rb_aligned = 1,6607.pipe_aligned = 1,6608};66096610if (surf->meta_offset)6611meta = surf->u.gfx9.color.dcc;66126613cb->cb_color_attrib |= S_028C74_COLOR_SW_MODE(surf->u.gfx9.swizzle_mode) |6614S_028C74_FMASK_SW_MODE(surf->u.gfx9.color.fmask_swizzle_mode) |6615S_028C74_RB_ALIGNED(meta.rb_aligned) |6616S_028C74_PIPE_ALIGNED(meta.pipe_aligned);6617cb->cb_mrt_epitch = S_0287A0_EPITCH(surf->u.gfx9.epitch);6618}66196620cb->cb_color_base += surf->u.gfx9.surf_offset >> 8;6621cb->cb_color_base |= surf->tile_swizzle;6622} else {6623const struct legacy_surf_level *level_info = &surf->u.legacy.level[iview->base_mip];6624unsigned pitch_tile_max, slice_tile_max, tile_mode_index;66256626cb->cb_color_base += level_info->offset_256B;6627if (level_info->mode == RADEON_SURF_MODE_2D)6628cb->cb_color_base |= surf->tile_swizzle;66296630pitch_tile_max = level_info->nblk_x / 8 - 1;6631slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1;6632tile_mode_index = si_tile_mode_index(plane, iview->base_mip, false);66336634cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max);6635cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max);6636cb->cb_color_cmask_slice = surf->u.legacy.color.cmask_slice_tile_max;66376638cb->cb_color_attrib |= S_028C74_TILE_MODE_INDEX(tile_mode_index);66396640if (radv_image_has_fmask(iview->image)) {6641if (device->physical_device->rad_info.chip_class >= GFX7)6642cb->cb_color_pitch |=6643S_028C64_FMASK_TILE_MAX(surf->u.legacy.color.fmask.pitch_in_pixels / 8 - 1);6644cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(surf->u.legacy.color.fmask.tiling_index);6645cb->cb_color_fmask_slice = S_028C88_TILE_MAX(surf->u.legacy.color.fmask.slice_tile_max);6646} else {6647/* This must be set for fast clear to work without FMASK. */6648if (device->physical_device->rad_info.chip_class >= GFX7)6649cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max);6650cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index);6651cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max);6652}6653}66546655/* CMASK variables */6656va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;6657va += surf->cmask_offset;6658cb->cb_color_cmask = va >> 8;66596660va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;6661va += surf->meta_offset;66626663if (radv_dcc_enabled(iview->image, iview->base_mip) &&6664device->physical_device->rad_info.chip_class <= GFX8)6665va += plane->surface.u.legacy.color.dcc_level[iview->base_mip].dcc_offset;66666667unsigned dcc_tile_swizzle = surf->tile_swizzle;6668dcc_tile_swizzle &= ((1 << surf->meta_alignment_log2) - 1) >> 8;66696670cb->cb_dcc_base = va >> 8;6671cb->cb_dcc_base |= dcc_tile_swizzle;66726673/* GFX10 field has the same base shift as the GFX6 field. */6674uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;6675cb->cb_color_view =6676S_028C6C_SLICE_START(iview->base_layer) | S_028C6C_SLICE_MAX_GFX10(max_slice);66776678if (iview->image->info.samples > 1) {6679unsigned log_samples = util_logbase2(iview->image->info.samples);66806681cb->cb_color_attrib |=6682S_028C74_NUM_SAMPLES(log_samples) | S_028C74_NUM_FRAGMENTS(log_samples);6683}66846685if (radv_image_has_fmask(iview->image)) {6686va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->fmask_offset;6687cb->cb_color_fmask = va >> 8;6688cb->cb_color_fmask |= surf->fmask_tile_swizzle;6689} else {6690cb->cb_color_fmask = cb->cb_color_base;6691}66926693ntype = radv_translate_color_numformat(iview->vk_format, desc,6694vk_format_get_first_non_void_channel(iview->vk_format));6695format = radv_translate_colorformat(iview->vk_format);6696if (format == V_028C70_COLOR_INVALID || ntype == ~0u)6697radv_finishme("Illegal color\n");6698swap = radv_translate_colorswap(iview->vk_format, false);6699endian = radv_colorformat_endian_swap(format);67006701/* blend clamp should be set for all NORM/SRGB types */6702if (ntype == V_028C70_NUMBER_UNORM || ntype == V_028C70_NUMBER_SNORM ||6703ntype == V_028C70_NUMBER_SRGB)6704blend_clamp = 1;67056706/* set blend bypass according to docs if SINT/UINT or67078/24 COLOR variants */6708if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT ||6709format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 ||6710format == V_028C70_COLOR_X24_8_32_FLOAT) {6711blend_clamp = 0;6712blend_bypass = 1;6713}6714#if 06715if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) &&6716(format == V_028C70_COLOR_8 ||6717format == V_028C70_COLOR_8_8 ||6718format == V_028C70_COLOR_8_8_8_8))6719->color_is_int8 = true;6720#endif6721cb->cb_color_info =6722S_028C70_FORMAT(format) | S_028C70_COMP_SWAP(swap) | S_028C70_BLEND_CLAMP(blend_clamp) |6723S_028C70_BLEND_BYPASS(blend_bypass) | S_028C70_SIMPLE_FLOAT(1) |6724S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && ntype != V_028C70_NUMBER_SNORM &&6725ntype != V_028C70_NUMBER_SRGB && format != V_028C70_COLOR_8_24 &&6726format != V_028C70_COLOR_24_8) |6727S_028C70_NUMBER_TYPE(ntype) | S_028C70_ENDIAN(endian);6728if (radv_image_has_fmask(iview->image)) {6729cb->cb_color_info |= S_028C70_COMPRESSION(1);6730if (device->physical_device->rad_info.chip_class == GFX6) {6731unsigned fmask_bankh = util_logbase2(surf->u.legacy.color.fmask.bankh);6732cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(fmask_bankh);6733}67346735if (radv_image_is_tc_compat_cmask(iview->image)) {6736/* Allow the texture block to read FMASK directly6737* without decompressing it. This bit must be cleared6738* when performing FMASK_DECOMPRESS or DCC_COMPRESS,6739* otherwise the operation doesn't happen.6740*/6741cb->cb_color_info |= S_028C70_FMASK_COMPRESS_1FRAG_ONLY(1);67426743if (device->physical_device->rad_info.chip_class == GFX8) {6744/* Set CMASK into a tiling format that allows6745* the texture block to read it.6746*/6747cb->cb_color_info |= S_028C70_CMASK_ADDR_TYPE(2);6748}6749}6750}67516752if (radv_image_has_cmask(iview->image) &&6753!(device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS))6754cb->cb_color_info |= S_028C70_FAST_CLEAR(1);67556756if (radv_dcc_enabled(iview->image, iview->base_mip))6757cb->cb_color_info |= S_028C70_DCC_ENABLE(1);67586759cb->cb_dcc_control = radv_init_dcc_control_reg(device, iview);67606761/* This must be set for fast clear to work without FMASK. */6762if (!radv_image_has_fmask(iview->image) &&6763device->physical_device->rad_info.chip_class == GFX6) {6764unsigned bankh = util_logbase2(surf->u.legacy.bankh);6765cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh);6766}67676768if (device->physical_device->rad_info.chip_class >= GFX9) {6769unsigned mip0_depth = iview->image->type == VK_IMAGE_TYPE_3D6770? (iview->extent.depth - 1)6771: (iview->image->info.array_size - 1);6772unsigned width =6773vk_format_get_plane_width(iview->image->vk_format, iview->plane_id, iview->extent.width);6774unsigned height =6775vk_format_get_plane_height(iview->image->vk_format, iview->plane_id, iview->extent.height);67766777if (device->physical_device->rad_info.chip_class >= GFX10) {6778cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX10(iview->base_mip);67796780cb->cb_color_attrib3 |= S_028EE0_MIP0_DEPTH(mip0_depth) |6781S_028EE0_RESOURCE_TYPE(surf->u.gfx9.resource_type) |6782S_028EE0_RESOURCE_LEVEL(1);6783} else {6784cb->cb_color_view |= S_028C6C_MIP_LEVEL_GFX9(iview->base_mip);6785cb->cb_color_attrib |=6786S_028C74_MIP0_DEPTH(mip0_depth) | S_028C74_RESOURCE_TYPE(surf->u.gfx9.resource_type);6787}67886789cb->cb_color_attrib2 = S_028C68_MIP0_WIDTH(width - 1) | S_028C68_MIP0_HEIGHT(height - 1) |6790S_028C68_MAX_MIP(iview->image->info.levels - 1);6791}6792}67936794static unsigned6795radv_calc_decompress_on_z_planes(struct radv_device *device, struct radv_image_view *iview)6796{6797unsigned max_zplanes = 0;67986799assert(radv_image_is_tc_compat_htile(iview->image));68006801if (device->physical_device->rad_info.chip_class >= GFX9) {6802/* Default value for 32-bit depth surfaces. */6803max_zplanes = 4;68046805if (iview->vk_format == VK_FORMAT_D16_UNORM && iview->image->info.samples > 1)6806max_zplanes = 2;68076808/* Workaround for a DB hang when ITERATE_256 is set to 1. Only affects 4X MSAA D/S images. */6809if (device->physical_device->rad_info.has_two_planes_iterate256_bug &&6810radv_image_get_iterate256(device, iview->image) &&6811!radv_image_tile_stencil_disabled(device, iview->image) &&6812iview->image->info.samples == 4) {6813max_zplanes = 1;6814}68156816max_zplanes = max_zplanes + 1;6817} else {6818if (iview->vk_format == VK_FORMAT_D16_UNORM) {6819/* Do not enable Z plane compression for 16-bit depth6820* surfaces because isn't supported on GFX8. Only6821* 32-bit depth surfaces are supported by the hardware.6822* This allows to maintain shader compatibility and to6823* reduce the number of depth decompressions.6824*/6825max_zplanes = 1;6826} else {6827if (iview->image->info.samples <= 1)6828max_zplanes = 5;6829else if (iview->image->info.samples <= 4)6830max_zplanes = 3;6831else6832max_zplanes = 2;6833}6834}68356836return max_zplanes;6837}68386839void6840radv_initialise_ds_surface(struct radv_device *device, struct radv_ds_buffer_info *ds,6841struct radv_image_view *iview)6842{6843unsigned level = iview->base_mip;6844unsigned format, stencil_format;6845uint64_t va, s_offs, z_offs;6846bool stencil_only = iview->image->vk_format == VK_FORMAT_S8_UINT;6847const struct radv_image_plane *plane = &iview->image->planes[0];6848const struct radeon_surf *surf = &plane->surface;68496850assert(vk_format_get_plane_count(iview->image->vk_format) == 1);68516852memset(ds, 0, sizeof(*ds));6853if (!device->instance->absolute_depth_bias) {6854switch (iview->image->vk_format) {6855case VK_FORMAT_D24_UNORM_S8_UINT:6856case VK_FORMAT_X8_D24_UNORM_PACK32:6857ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24);6858break;6859case VK_FORMAT_D16_UNORM:6860case VK_FORMAT_D16_UNORM_S8_UINT:6861ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16);6862break;6863case VK_FORMAT_D32_SFLOAT:6864case VK_FORMAT_D32_SFLOAT_S8_UINT:6865ds->pa_su_poly_offset_db_fmt_cntl =6866S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1);6867break;6868default:6869break;6870}6871}68726873format = radv_translate_dbformat(iview->image->vk_format);6874stencil_format = surf->has_stencil ? V_028044_STENCIL_8 : V_028044_STENCIL_INVALID;68756876uint32_t max_slice = radv_surface_max_layer_count(iview) - 1;6877ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | S_028008_SLICE_MAX(max_slice);6878if (device->physical_device->rad_info.chip_class >= GFX10) {6879ds->db_depth_view |=6880S_028008_SLICE_START_HI(iview->base_layer >> 11) | S_028008_SLICE_MAX_HI(max_slice >> 11);6881}68826883ds->db_htile_data_base = 0;6884ds->db_htile_surface = 0;68856886va = radv_buffer_get_va(iview->image->bo) + iview->image->offset;6887s_offs = z_offs = va;68886889if (device->physical_device->rad_info.chip_class >= GFX9) {6890assert(surf->u.gfx9.surf_offset == 0);6891s_offs += surf->u.gfx9.zs.stencil_offset;68926893ds->db_z_info = S_028038_FORMAT(format) |6894S_028038_NUM_SAMPLES(util_logbase2(iview->image->info.samples)) |6895S_028038_SW_MODE(surf->u.gfx9.swizzle_mode) |6896S_028038_MAXMIP(iview->image->info.levels - 1) | S_028038_ZRANGE_PRECISION(1);6897ds->db_stencil_info =6898S_02803C_FORMAT(stencil_format) | S_02803C_SW_MODE(surf->u.gfx9.zs.stencil_swizzle_mode);68996900if (device->physical_device->rad_info.chip_class == GFX9) {6901ds->db_z_info2 = S_028068_EPITCH(surf->u.gfx9.epitch);6902ds->db_stencil_info2 = S_02806C_EPITCH(surf->u.gfx9.zs.stencil_epitch);6903}69046905ds->db_depth_view |= S_028008_MIPID(level);6906ds->db_depth_size = S_02801C_X_MAX(iview->image->info.width - 1) |6907S_02801C_Y_MAX(iview->image->info.height - 1);69086909if (radv_htile_enabled(iview->image, level)) {6910ds->db_z_info |= S_028038_TILE_SURFACE_ENABLE(1);69116912if (radv_image_is_tc_compat_htile(iview->image)) {6913unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);69146915ds->db_z_info |= S_028038_DECOMPRESS_ON_N_ZPLANES(max_zplanes);69166917if (device->physical_device->rad_info.chip_class >= GFX10) {6918bool iterate256 = radv_image_get_iterate256(device, iview->image);69196920ds->db_z_info |= S_028040_ITERATE_FLUSH(1);6921ds->db_stencil_info |= S_028044_ITERATE_FLUSH(1);6922ds->db_z_info |= S_028040_ITERATE_256(iterate256);6923ds->db_stencil_info |= S_028044_ITERATE_256(iterate256);6924} else {6925ds->db_z_info |= S_028038_ITERATE_FLUSH(1);6926ds->db_stencil_info |= S_02803C_ITERATE_FLUSH(1);6927}6928}69296930if (radv_image_tile_stencil_disabled(device, iview->image)) {6931ds->db_stencil_info |= S_02803C_TILE_STENCIL_DISABLE(1);6932}69336934va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;6935ds->db_htile_data_base = va >> 8;6936ds->db_htile_surface = S_028ABC_FULL_CACHE(1) | S_028ABC_PIPE_ALIGNED(1);69376938if (device->physical_device->rad_info.chip_class == GFX9) {6939ds->db_htile_surface |= S_028ABC_RB_ALIGNED(1);6940}69416942if (radv_image_has_vrs_htile(device, iview->image)) {6943ds->db_htile_surface |= S_028ABC_VRS_HTILE_ENCODING(V_028ABC_VRS_HTILE_4BIT_ENCODING);6944}6945}6946} else {6947const struct legacy_surf_level *level_info = &surf->u.legacy.level[level];69486949if (stencil_only)6950level_info = &surf->u.legacy.zs.stencil_level[level];69516952z_offs += (uint64_t)surf->u.legacy.level[level].offset_256B * 256;6953s_offs += (uint64_t)surf->u.legacy.zs.stencil_level[level].offset_256B * 256;69546955ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(!radv_image_is_tc_compat_htile(iview->image));6956ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1);6957ds->db_stencil_info = S_028044_FORMAT(stencil_format);69586959if (iview->image->info.samples > 1)6960ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->info.samples));69616962if (device->physical_device->rad_info.chip_class >= GFX7) {6963struct radeon_info *info = &device->physical_device->rad_info;6964unsigned tiling_index = surf->u.legacy.tiling_index[level];6965unsigned stencil_index = surf->u.legacy.zs.stencil_tiling_index[level];6966unsigned macro_index = surf->u.legacy.macro_tile_index;6967unsigned tile_mode = info->si_tile_mode_array[tiling_index];6968unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index];6969unsigned macro_mode = info->cik_macrotile_mode_array[macro_index];69706971if (stencil_only)6972tile_mode = stencil_tile_mode;69736974ds->db_depth_info |= S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) |6975S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) |6976S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) |6977S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) |6978S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) |6979S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode));6980ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode));6981ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode));6982} else {6983unsigned tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, false);6984ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);6985tile_mode_index = si_tile_mode_index(&iview->image->planes[0], level, true);6986ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index);6987if (stencil_only)6988ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index);6989}69906991ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) |6992S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1);6993ds->db_depth_slice =6994S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1);69956996if (radv_htile_enabled(iview->image, level)) {6997ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1);69986999if (radv_image_tile_stencil_disabled(device, iview->image)) {7000ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1);7001}70027003va = radv_buffer_get_va(iview->image->bo) + iview->image->offset + surf->meta_offset;7004ds->db_htile_data_base = va >> 8;7005ds->db_htile_surface = S_028ABC_FULL_CACHE(1);70067007if (radv_image_is_tc_compat_htile(iview->image)) {7008unsigned max_zplanes = radv_calc_decompress_on_z_planes(device, iview);70097010ds->db_htile_surface |= S_028ABC_TC_COMPATIBLE(1);7011ds->db_z_info |= S_028040_DECOMPRESS_ON_N_ZPLANES(max_zplanes);7012}7013}7014}70157016ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8;7017ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8;7018}70197020VkResult7021radv_CreateFramebuffer(VkDevice _device, const VkFramebufferCreateInfo *pCreateInfo,7022const VkAllocationCallbacks *pAllocator, VkFramebuffer *pFramebuffer)7023{7024RADV_FROM_HANDLE(radv_device, device, _device);7025struct radv_framebuffer *framebuffer;7026const VkFramebufferAttachmentsCreateInfo *imageless_create_info =7027vk_find_struct_const(pCreateInfo->pNext, FRAMEBUFFER_ATTACHMENTS_CREATE_INFO);70287029assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);70307031size_t size = sizeof(*framebuffer);7032if (!imageless_create_info)7033size += sizeof(struct radv_image_view *) * pCreateInfo->attachmentCount;7034framebuffer =7035vk_alloc2(&device->vk.alloc, pAllocator, size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);7036if (framebuffer == NULL)7037return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);70387039vk_object_base_init(&device->vk, &framebuffer->base, VK_OBJECT_TYPE_FRAMEBUFFER);70407041framebuffer->attachment_count = pCreateInfo->attachmentCount;7042framebuffer->width = pCreateInfo->width;7043framebuffer->height = pCreateInfo->height;7044framebuffer->layers = pCreateInfo->layers;7045framebuffer->imageless = !!imageless_create_info;70467047if (!imageless_create_info) {7048for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {7049VkImageView _iview = pCreateInfo->pAttachments[i];7050struct radv_image_view *iview = radv_image_view_from_handle(_iview);7051framebuffer->attachments[i] = iview;7052}7053}70547055*pFramebuffer = radv_framebuffer_to_handle(framebuffer);7056return VK_SUCCESS;7057}70587059void7060radv_DestroyFramebuffer(VkDevice _device, VkFramebuffer _fb,7061const VkAllocationCallbacks *pAllocator)7062{7063RADV_FROM_HANDLE(radv_device, device, _device);7064RADV_FROM_HANDLE(radv_framebuffer, fb, _fb);70657066if (!fb)7067return;7068vk_object_base_finish(&fb->base);7069vk_free2(&device->vk.alloc, pAllocator, fb);7070}70717072static unsigned7073radv_tex_wrap(VkSamplerAddressMode address_mode)7074{7075switch (address_mode) {7076case VK_SAMPLER_ADDRESS_MODE_REPEAT:7077return V_008F30_SQ_TEX_WRAP;7078case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT:7079return V_008F30_SQ_TEX_MIRROR;7080case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE:7081return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL;7082case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER:7083return V_008F30_SQ_TEX_CLAMP_BORDER;7084case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE:7085return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL;7086default:7087unreachable("illegal tex wrap mode");7088break;7089}7090}70917092static unsigned7093radv_tex_compare(VkCompareOp op)7094{7095switch (op) {7096case VK_COMPARE_OP_NEVER:7097return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;7098case VK_COMPARE_OP_LESS:7099return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS;7100case VK_COMPARE_OP_EQUAL:7101return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL;7102case VK_COMPARE_OP_LESS_OR_EQUAL:7103return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL;7104case VK_COMPARE_OP_GREATER:7105return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER;7106case VK_COMPARE_OP_NOT_EQUAL:7107return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL;7108case VK_COMPARE_OP_GREATER_OR_EQUAL:7109return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL;7110case VK_COMPARE_OP_ALWAYS:7111return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS;7112default:7113unreachable("illegal compare mode");7114break;7115}7116}71177118static unsigned7119radv_tex_filter(VkFilter filter, unsigned max_ansio)7120{7121switch (filter) {7122case VK_FILTER_NEAREST:7123return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT7124: V_008F38_SQ_TEX_XY_FILTER_POINT);7125case VK_FILTER_LINEAR:7126return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR7127: V_008F38_SQ_TEX_XY_FILTER_BILINEAR);7128case VK_FILTER_CUBIC_IMG:7129default:7130fprintf(stderr, "illegal texture filter");7131return 0;7132}7133}71347135static unsigned7136radv_tex_mipfilter(VkSamplerMipmapMode mode)7137{7138switch (mode) {7139case VK_SAMPLER_MIPMAP_MODE_NEAREST:7140return V_008F38_SQ_TEX_Z_FILTER_POINT;7141case VK_SAMPLER_MIPMAP_MODE_LINEAR:7142return V_008F38_SQ_TEX_Z_FILTER_LINEAR;7143default:7144return V_008F38_SQ_TEX_Z_FILTER_NONE;7145}7146}71477148static unsigned7149radv_tex_bordercolor(VkBorderColor bcolor)7150{7151switch (bcolor) {7152case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK:7153case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK:7154return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK;7155case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK:7156case VK_BORDER_COLOR_INT_OPAQUE_BLACK:7157return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK;7158case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE:7159case VK_BORDER_COLOR_INT_OPAQUE_WHITE:7160return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE;7161case VK_BORDER_COLOR_FLOAT_CUSTOM_EXT:7162case VK_BORDER_COLOR_INT_CUSTOM_EXT:7163return V_008F3C_SQ_TEX_BORDER_COLOR_REGISTER;7164default:7165break;7166}7167return 0;7168}71697170static unsigned7171radv_tex_aniso_filter(unsigned filter)7172{7173if (filter < 2)7174return 0;7175if (filter < 4)7176return 1;7177if (filter < 8)7178return 2;7179if (filter < 16)7180return 3;7181return 4;7182}71837184static unsigned7185radv_tex_filter_mode(VkSamplerReductionMode mode)7186{7187switch (mode) {7188case VK_SAMPLER_REDUCTION_MODE_WEIGHTED_AVERAGE_EXT:7189return V_008F30_SQ_IMG_FILTER_MODE_BLEND;7190case VK_SAMPLER_REDUCTION_MODE_MIN_EXT:7191return V_008F30_SQ_IMG_FILTER_MODE_MIN;7192case VK_SAMPLER_REDUCTION_MODE_MAX_EXT:7193return V_008F30_SQ_IMG_FILTER_MODE_MAX;7194default:7195break;7196}7197return 0;7198}71997200static uint32_t7201radv_get_max_anisotropy(struct radv_device *device, const VkSamplerCreateInfo *pCreateInfo)7202{7203if (device->force_aniso >= 0)7204return device->force_aniso;72057206if (pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0f)7207return (uint32_t)pCreateInfo->maxAnisotropy;72087209return 0;7210}72117212static inline int7213S_FIXED(float value, unsigned frac_bits)7214{7215return value * (1 << frac_bits);7216}72177218static uint32_t7219radv_register_border_color(struct radv_device *device, VkClearColorValue value)7220{7221uint32_t slot;72227223mtx_lock(&device->border_color_data.mutex);72247225for (slot = 0; slot < RADV_BORDER_COLOR_COUNT; slot++) {7226if (!device->border_color_data.used[slot]) {7227/* Copy to the GPU wrt endian-ness. */7228util_memcpy_cpu_to_le32(&device->border_color_data.colors_gpu_ptr[slot], &value,7229sizeof(VkClearColorValue));72307231device->border_color_data.used[slot] = true;7232break;7233}7234}72357236mtx_unlock(&device->border_color_data.mutex);72377238return slot;7239}72407241static void7242radv_unregister_border_color(struct radv_device *device, uint32_t slot)7243{7244mtx_lock(&device->border_color_data.mutex);72457246device->border_color_data.used[slot] = false;72477248mtx_unlock(&device->border_color_data.mutex);7249}72507251static void7252radv_init_sampler(struct radv_device *device, struct radv_sampler *sampler,7253const VkSamplerCreateInfo *pCreateInfo)7254{7255uint32_t max_aniso = radv_get_max_anisotropy(device, pCreateInfo);7256uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso);7257bool compat_mode = device->physical_device->rad_info.chip_class == GFX8 ||7258device->physical_device->rad_info.chip_class == GFX9;7259unsigned filter_mode = V_008F30_SQ_IMG_FILTER_MODE_BLEND;7260unsigned depth_compare_func = V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER;7261bool trunc_coord =7262pCreateInfo->minFilter == VK_FILTER_NEAREST && pCreateInfo->magFilter == VK_FILTER_NEAREST;7263bool uses_border_color = pCreateInfo->addressModeU == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||7264pCreateInfo->addressModeV == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER ||7265pCreateInfo->addressModeW == VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER;7266VkBorderColor border_color =7267uses_border_color ? pCreateInfo->borderColor : VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;7268uint32_t border_color_ptr;72697270const struct VkSamplerReductionModeCreateInfo *sampler_reduction =7271vk_find_struct_const(pCreateInfo->pNext, SAMPLER_REDUCTION_MODE_CREATE_INFO);7272if (sampler_reduction)7273filter_mode = radv_tex_filter_mode(sampler_reduction->reductionMode);72747275if (pCreateInfo->compareEnable)7276depth_compare_func = radv_tex_compare(pCreateInfo->compareOp);72777278sampler->border_color_slot = RADV_BORDER_COLOR_COUNT;72797280if (border_color == VK_BORDER_COLOR_FLOAT_CUSTOM_EXT ||7281border_color == VK_BORDER_COLOR_INT_CUSTOM_EXT) {7282const VkSamplerCustomBorderColorCreateInfoEXT *custom_border_color =7283vk_find_struct_const(pCreateInfo->pNext, SAMPLER_CUSTOM_BORDER_COLOR_CREATE_INFO_EXT);72847285assert(custom_border_color);72867287sampler->border_color_slot =7288radv_register_border_color(device, custom_border_color->customBorderColor);72897290/* Did we fail to find a slot? */7291if (sampler->border_color_slot == RADV_BORDER_COLOR_COUNT) {7292fprintf(stderr, "WARNING: no free border color slots, defaulting to TRANS_BLACK.\n");7293border_color = VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK;7294}7295}72967297/* If we don't have a custom color, set the ptr to 0 */7298border_color_ptr =7299sampler->border_color_slot != RADV_BORDER_COLOR_COUNT ? sampler->border_color_slot : 0;73007301sampler->state[0] =7302(S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) |7303S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) |7304S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) |7305S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | S_008F30_DEPTH_COMPARE_FUNC(depth_compare_func) |7306S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) |7307S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | S_008F30_ANISO_BIAS(max_aniso_ratio) |7308S_008F30_DISABLE_CUBE_WRAP(0) | S_008F30_COMPAT_MODE(compat_mode) |7309S_008F30_FILTER_MODE(filter_mode) | S_008F30_TRUNC_COORD(trunc_coord));7310sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) |7311S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) |7312S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0));7313sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) |7314S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) |7315S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) |7316S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) |7317S_008F38_MIP_POINT_PRECLAMP(0));7318sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(border_color_ptr) |7319S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(border_color)));73207321if (device->physical_device->rad_info.chip_class >= GFX10) {7322sampler->state[2] |= S_008F38_ANISO_OVERRIDE_GFX10(1);7323} else {7324sampler->state[2] |=7325S_008F38_DISABLE_LSB_CEIL(device->physical_device->rad_info.chip_class <= GFX8) |7326S_008F38_FILTER_PREC_FIX(1) |7327S_008F38_ANISO_OVERRIDE_GFX8(device->physical_device->rad_info.chip_class >= GFX8);7328}7329}73307331VkResult7332radv_CreateSampler(VkDevice _device, const VkSamplerCreateInfo *pCreateInfo,7333const VkAllocationCallbacks *pAllocator, VkSampler *pSampler)7334{7335RADV_FROM_HANDLE(radv_device, device, _device);7336struct radv_sampler *sampler;73377338const struct VkSamplerYcbcrConversionInfo *ycbcr_conversion =7339vk_find_struct_const(pCreateInfo->pNext, SAMPLER_YCBCR_CONVERSION_INFO);73407341assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO);73427343sampler = vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*sampler), 8,7344VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);7345if (!sampler)7346return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);73477348vk_object_base_init(&device->vk, &sampler->base, VK_OBJECT_TYPE_SAMPLER);73497350radv_init_sampler(device, sampler, pCreateInfo);73517352sampler->ycbcr_sampler =7353ycbcr_conversion ? radv_sampler_ycbcr_conversion_from_handle(ycbcr_conversion->conversion)7354: NULL;7355*pSampler = radv_sampler_to_handle(sampler);73567357return VK_SUCCESS;7358}73597360void7361radv_DestroySampler(VkDevice _device, VkSampler _sampler, const VkAllocationCallbacks *pAllocator)7362{7363RADV_FROM_HANDLE(radv_device, device, _device);7364RADV_FROM_HANDLE(radv_sampler, sampler, _sampler);73657366if (!sampler)7367return;73687369if (sampler->border_color_slot != RADV_BORDER_COLOR_COUNT)7370radv_unregister_border_color(device, sampler->border_color_slot);73717372vk_object_base_finish(&sampler->base);7373vk_free2(&device->vk.alloc, pAllocator, sampler);7374}73757376PUBLIC VKAPI_ATTR VkResult VKAPI_CALL7377vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion)7378{7379/* For the full details on loader interface versioning, see7380* <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.7381* What follows is a condensed summary, to help you navigate the large and7382* confusing official doc.7383*7384* - Loader interface v0 is incompatible with later versions. We don't7385* support it.7386*7387* - In loader interface v1:7388* - The first ICD entrypoint called by the loader is7389* vk_icdGetInstanceProcAddr(). The ICD must statically expose this7390* entrypoint.7391* - The ICD must statically expose no other Vulkan symbol unless it is7392* linked with -Bsymbolic.7393* - Each dispatchable Vulkan handle created by the ICD must be7394* a pointer to a struct whose first member is VK_LOADER_DATA. The7395* ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.7396* - The loader implements vkCreate{PLATFORM}SurfaceKHR() and7397* vkDestroySurfaceKHR(). The ICD must be capable of working with7398* such loader-managed surfaces.7399*7400* - Loader interface v2 differs from v1 in:7401* - The first ICD entrypoint called by the loader is7402* vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must7403* statically expose this entrypoint.7404*7405* - Loader interface v3 differs from v2 in:7406* - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),7407* vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,7408* because the loader no longer does so.7409*/7410*pSupportedVersion = MIN2(*pSupportedVersion, 4u);7411return VK_SUCCESS;7412}74137414VkResult7415radv_GetMemoryFdKHR(VkDevice _device, const VkMemoryGetFdInfoKHR *pGetFdInfo, int *pFD)7416{7417RADV_FROM_HANDLE(radv_device, device, _device);7418RADV_FROM_HANDLE(radv_device_memory, memory, pGetFdInfo->memory);74197420assert(pGetFdInfo->sType == VK_STRUCTURE_TYPE_MEMORY_GET_FD_INFO_KHR);74217422/* At the moment, we support only the below handle types. */7423assert(pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_OPAQUE_FD_BIT ||7424pGetFdInfo->handleType == VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT);74257426bool ret = radv_get_memory_fd(device, memory, pFD);7427if (ret == false)7428return vk_error(device->instance, VK_ERROR_OUT_OF_DEVICE_MEMORY);7429return VK_SUCCESS;7430}74317432static uint32_t7433radv_compute_valid_memory_types_attempt(struct radv_physical_device *dev,7434enum radeon_bo_domain domains, enum radeon_bo_flag flags,7435enum radeon_bo_flag ignore_flags)7436{7437/* Don't count GTT/CPU as relevant:7438*7439* - We're not fully consistent between the two.7440* - Sometimes VRAM gets VRAM|GTT.7441*/7442const enum radeon_bo_domain relevant_domains =7443RADEON_DOMAIN_VRAM | RADEON_DOMAIN_GDS | RADEON_DOMAIN_OA;7444uint32_t bits = 0;7445for (unsigned i = 0; i < dev->memory_properties.memoryTypeCount; ++i) {7446if ((domains & relevant_domains) != (dev->memory_domains[i] & relevant_domains))7447continue;74487449if ((flags & ~ignore_flags) != (dev->memory_flags[i] & ~ignore_flags))7450continue;74517452bits |= 1u << i;7453}74547455return bits;7456}74577458static uint32_t7459radv_compute_valid_memory_types(struct radv_physical_device *dev, enum radeon_bo_domain domains,7460enum radeon_bo_flag flags)7461{7462enum radeon_bo_flag ignore_flags = ~(RADEON_FLAG_NO_CPU_ACCESS | RADEON_FLAG_GTT_WC);7463uint32_t bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);74647465if (!bits) {7466ignore_flags |= RADEON_FLAG_GTT_WC;7467bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);7468}74697470if (!bits) {7471ignore_flags |= RADEON_FLAG_NO_CPU_ACCESS;7472bits = radv_compute_valid_memory_types_attempt(dev, domains, flags, ignore_flags);7473}74747475return bits;7476}7477VkResult7478radv_GetMemoryFdPropertiesKHR(VkDevice _device, VkExternalMemoryHandleTypeFlagBits handleType,7479int fd, VkMemoryFdPropertiesKHR *pMemoryFdProperties)7480{7481RADV_FROM_HANDLE(radv_device, device, _device);74827483switch (handleType) {7484case VK_EXTERNAL_MEMORY_HANDLE_TYPE_DMA_BUF_BIT_EXT: {7485enum radeon_bo_domain domains;7486enum radeon_bo_flag flags;7487if (!device->ws->buffer_get_flags_from_fd(device->ws, fd, &domains, &flags))7488return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);74897490pMemoryFdProperties->memoryTypeBits =7491radv_compute_valid_memory_types(device->physical_device, domains, flags);7492return VK_SUCCESS;7493}7494default:7495/* The valid usage section for this function says:7496*7497* "handleType must not be one of the handle types defined as7498* opaque."7499*7500* So opaque handle types fall into the default "unsupported" case.7501*/7502return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);7503}7504}75057506static VkResult7507radv_import_opaque_fd(struct radv_device *device, int fd, uint32_t *syncobj)7508{7509uint32_t syncobj_handle = 0;7510int ret = device->ws->import_syncobj(device->ws, fd, &syncobj_handle);7511if (ret != 0)7512return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);75137514if (*syncobj)7515device->ws->destroy_syncobj(device->ws, *syncobj);75167517*syncobj = syncobj_handle;7518close(fd);75197520return VK_SUCCESS;7521}75227523static VkResult7524radv_import_sync_fd(struct radv_device *device, int fd, uint32_t *syncobj)7525{7526/* If we create a syncobj we do it locally so that if we have an error, we don't7527* leave a syncobj in an undetermined state in the fence. */7528uint32_t syncobj_handle = *syncobj;7529if (!syncobj_handle) {7530bool create_signaled = fd == -1 ? true : false;75317532int ret = device->ws->create_syncobj(device->ws, create_signaled, &syncobj_handle);7533if (ret) {7534return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);7535}7536} else {7537if (fd == -1)7538device->ws->signal_syncobj(device->ws, syncobj_handle, 0);7539}75407541if (fd != -1) {7542int ret = device->ws->import_syncobj_from_sync_file(device->ws, syncobj_handle, fd);7543if (ret)7544return vk_error(device->instance, VK_ERROR_INVALID_EXTERNAL_HANDLE);7545close(fd);7546}75477548*syncobj = syncobj_handle;75497550return VK_SUCCESS;7551}75527553VkResult7554radv_ImportSemaphoreFdKHR(VkDevice _device,7555const VkImportSemaphoreFdInfoKHR *pImportSemaphoreFdInfo)7556{7557RADV_FROM_HANDLE(radv_device, device, _device);7558RADV_FROM_HANDLE(radv_semaphore, sem, pImportSemaphoreFdInfo->semaphore);7559VkResult result;7560struct radv_semaphore_part *dst = NULL;7561bool timeline = sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ;75627563if (pImportSemaphoreFdInfo->flags & VK_SEMAPHORE_IMPORT_TEMPORARY_BIT) {7564assert(!timeline);7565dst = &sem->temporary;7566} else {7567dst = &sem->permanent;7568}75697570uint32_t syncobj =7571(dst->kind == RADV_SEMAPHORE_SYNCOBJ || dst->kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ)7572? dst->syncobj7573: 0;75747575switch (pImportSemaphoreFdInfo->handleType) {7576case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:7577result = radv_import_opaque_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);7578break;7579case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:7580assert(!timeline);7581result = radv_import_sync_fd(device, pImportSemaphoreFdInfo->fd, &syncobj);7582break;7583default:7584unreachable("Unhandled semaphore handle type");7585}75867587if (result == VK_SUCCESS) {7588dst->syncobj = syncobj;7589dst->kind = RADV_SEMAPHORE_SYNCOBJ;7590if (timeline) {7591dst->kind = RADV_SEMAPHORE_TIMELINE_SYNCOBJ;7592dst->timeline_syncobj.max_point = 0;7593}7594}75957596return result;7597}75987599VkResult7600radv_GetSemaphoreFdKHR(VkDevice _device, const VkSemaphoreGetFdInfoKHR *pGetFdInfo, int *pFd)7601{7602RADV_FROM_HANDLE(radv_device, device, _device);7603RADV_FROM_HANDLE(radv_semaphore, sem, pGetFdInfo->semaphore);7604int ret;7605uint32_t syncobj_handle;76067607if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {7608assert(sem->temporary.kind == RADV_SEMAPHORE_SYNCOBJ ||7609sem->temporary.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);7610syncobj_handle = sem->temporary.syncobj;7611} else {7612assert(sem->permanent.kind == RADV_SEMAPHORE_SYNCOBJ ||7613sem->permanent.kind == RADV_SEMAPHORE_TIMELINE_SYNCOBJ);7614syncobj_handle = sem->permanent.syncobj;7615}76167617switch (pGetFdInfo->handleType) {7618case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT:7619ret = device->ws->export_syncobj(device->ws, syncobj_handle, pFd);7620if (ret)7621return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);7622break;7623case VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT:7624ret = device->ws->export_syncobj_to_sync_file(device->ws, syncobj_handle, pFd);7625if (ret)7626return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);76277628if (sem->temporary.kind != RADV_SEMAPHORE_NONE) {7629radv_destroy_semaphore_part(device, &sem->temporary);7630} else {7631device->ws->reset_syncobj(device->ws, syncobj_handle);7632}7633break;7634default:7635unreachable("Unhandled semaphore handle type");7636}76377638return VK_SUCCESS;7639}76407641void7642radv_GetPhysicalDeviceExternalSemaphoreProperties(7643VkPhysicalDevice physicalDevice,7644const VkPhysicalDeviceExternalSemaphoreInfo *pExternalSemaphoreInfo,7645VkExternalSemaphoreProperties *pExternalSemaphoreProperties)7646{7647RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice);7648VkSemaphoreTypeKHR type = radv_get_semaphore_type(pExternalSemaphoreInfo->pNext, NULL);76497650if (type == VK_SEMAPHORE_TYPE_TIMELINE && pdevice->rad_info.has_timeline_syncobj &&7651pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {7652pExternalSemaphoreProperties->exportFromImportedHandleTypes =7653VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;7654pExternalSemaphoreProperties->compatibleHandleTypes =7655VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;7656pExternalSemaphoreProperties->externalSemaphoreFeatures =7657VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |7658VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;7659} else if (type == VK_SEMAPHORE_TYPE_TIMELINE) {7660pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;7661pExternalSemaphoreProperties->compatibleHandleTypes = 0;7662pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;7663} else if (pExternalSemaphoreInfo->handleType ==7664VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT ||7665pExternalSemaphoreInfo->handleType == VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT) {7666pExternalSemaphoreProperties->exportFromImportedHandleTypes =7667VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |7668VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;7669pExternalSemaphoreProperties->compatibleHandleTypes =7670VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT |7671VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_SYNC_FD_BIT;7672pExternalSemaphoreProperties->externalSemaphoreFeatures =7673VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |7674VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;7675} else if (pExternalSemaphoreInfo->handleType ==7676VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT) {7677pExternalSemaphoreProperties->exportFromImportedHandleTypes =7678VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;7679pExternalSemaphoreProperties->compatibleHandleTypes =7680VK_EXTERNAL_SEMAPHORE_HANDLE_TYPE_OPAQUE_FD_BIT;7681pExternalSemaphoreProperties->externalSemaphoreFeatures =7682VK_EXTERNAL_SEMAPHORE_FEATURE_EXPORTABLE_BIT |7683VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;7684} else {7685pExternalSemaphoreProperties->exportFromImportedHandleTypes = 0;7686pExternalSemaphoreProperties->compatibleHandleTypes = 0;7687pExternalSemaphoreProperties->externalSemaphoreFeatures = 0;7688}7689}76907691VkResult7692radv_ImportFenceFdKHR(VkDevice _device, const VkImportFenceFdInfoKHR *pImportFenceFdInfo)7693{7694RADV_FROM_HANDLE(radv_device, device, _device);7695RADV_FROM_HANDLE(radv_fence, fence, pImportFenceFdInfo->fence);7696struct radv_fence_part *dst = NULL;7697VkResult result;76987699if (pImportFenceFdInfo->flags & VK_FENCE_IMPORT_TEMPORARY_BIT) {7700dst = &fence->temporary;7701} else {7702dst = &fence->permanent;7703}77047705uint32_t syncobj = dst->kind == RADV_FENCE_SYNCOBJ ? dst->syncobj : 0;77067707switch (pImportFenceFdInfo->handleType) {7708case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:7709result = radv_import_opaque_fd(device, pImportFenceFdInfo->fd, &syncobj);7710break;7711case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:7712result = radv_import_sync_fd(device, pImportFenceFdInfo->fd, &syncobj);7713break;7714default:7715unreachable("Unhandled fence handle type");7716}77177718if (result == VK_SUCCESS) {7719dst->syncobj = syncobj;7720dst->kind = RADV_FENCE_SYNCOBJ;7721}77227723return result;7724}77257726VkResult7727radv_GetFenceFdKHR(VkDevice _device, const VkFenceGetFdInfoKHR *pGetFdInfo, int *pFd)7728{7729RADV_FROM_HANDLE(radv_device, device, _device);7730RADV_FROM_HANDLE(radv_fence, fence, pGetFdInfo->fence);7731int ret;77327733struct radv_fence_part *part =7734fence->temporary.kind != RADV_FENCE_NONE ? &fence->temporary : &fence->permanent;77357736switch (pGetFdInfo->handleType) {7737case VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT:7738ret = device->ws->export_syncobj(device->ws, part->syncobj, pFd);7739if (ret)7740return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);7741break;7742case VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT:7743ret = device->ws->export_syncobj_to_sync_file(device->ws, part->syncobj, pFd);7744if (ret)7745return vk_error(device->instance, VK_ERROR_TOO_MANY_OBJECTS);77467747if (part == &fence->temporary) {7748radv_destroy_fence_part(device, part);7749} else {7750device->ws->reset_syncobj(device->ws, part->syncobj);7751}7752break;7753default:7754unreachable("Unhandled fence handle type");7755}77567757return VK_SUCCESS;7758}77597760void7761radv_GetPhysicalDeviceExternalFenceProperties(7762VkPhysicalDevice physicalDevice, const VkPhysicalDeviceExternalFenceInfo *pExternalFenceInfo,7763VkExternalFenceProperties *pExternalFenceProperties)7764{7765if (pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT ||7766pExternalFenceInfo->handleType == VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT) {7767pExternalFenceProperties->exportFromImportedHandleTypes =7768VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;7769pExternalFenceProperties->compatibleHandleTypes =7770VK_EXTERNAL_FENCE_HANDLE_TYPE_OPAQUE_FD_BIT | VK_EXTERNAL_FENCE_HANDLE_TYPE_SYNC_FD_BIT;7771pExternalFenceProperties->externalFenceFeatures =7772VK_EXTERNAL_FENCE_FEATURE_EXPORTABLE_BIT | VK_EXTERNAL_SEMAPHORE_FEATURE_IMPORTABLE_BIT;7773} else {7774pExternalFenceProperties->exportFromImportedHandleTypes = 0;7775pExternalFenceProperties->compatibleHandleTypes = 0;7776pExternalFenceProperties->externalFenceFeatures = 0;7777}7778}77797780void7781radv_GetDeviceGroupPeerMemoryFeatures(VkDevice device, uint32_t heapIndex,7782uint32_t localDeviceIndex, uint32_t remoteDeviceIndex,7783VkPeerMemoryFeatureFlags *pPeerMemoryFeatures)7784{7785assert(localDeviceIndex == remoteDeviceIndex);77867787*pPeerMemoryFeatures =7788VK_PEER_MEMORY_FEATURE_COPY_SRC_BIT | VK_PEER_MEMORY_FEATURE_COPY_DST_BIT |7789VK_PEER_MEMORY_FEATURE_GENERIC_SRC_BIT | VK_PEER_MEMORY_FEATURE_GENERIC_DST_BIT;7790}77917792static const VkTimeDomainEXT radv_time_domains[] = {7793VK_TIME_DOMAIN_DEVICE_EXT,7794VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT,7795#ifdef CLOCK_MONOTONIC_RAW7796VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT,7797#endif7798};77997800VkResult7801radv_GetPhysicalDeviceCalibrateableTimeDomainsEXT(VkPhysicalDevice physicalDevice,7802uint32_t *pTimeDomainCount,7803VkTimeDomainEXT *pTimeDomains)7804{7805int d;7806VK_OUTARRAY_MAKE_TYPED(VkTimeDomainEXT, out, pTimeDomains, pTimeDomainCount);78077808for (d = 0; d < ARRAY_SIZE(radv_time_domains); d++) {7809vk_outarray_append_typed(VkTimeDomainEXT, &out, i)7810{7811*i = radv_time_domains[d];7812}7813}78147815return vk_outarray_status(&out);7816}78177818#ifndef _WIN327819static uint64_t7820radv_clock_gettime(clockid_t clock_id)7821{7822struct timespec current;7823int ret;78247825ret = clock_gettime(clock_id, ¤t);7826#ifdef CLOCK_MONOTONIC_RAW7827if (ret < 0 && clock_id == CLOCK_MONOTONIC_RAW)7828ret = clock_gettime(CLOCK_MONOTONIC, ¤t);7829#endif7830if (ret < 0)7831return 0;78327833return (uint64_t)current.tv_sec * 1000000000ULL + current.tv_nsec;7834}78357836VkResult7837radv_GetCalibratedTimestampsEXT(VkDevice _device, uint32_t timestampCount,7838const VkCalibratedTimestampInfoEXT *pTimestampInfos,7839uint64_t *pTimestamps, uint64_t *pMaxDeviation)7840{7841RADV_FROM_HANDLE(radv_device, device, _device);7842uint32_t clock_crystal_freq = device->physical_device->rad_info.clock_crystal_freq;7843int d;7844uint64_t begin, end;7845uint64_t max_clock_period = 0;78467847#ifdef CLOCK_MONOTONIC_RAW7848begin = radv_clock_gettime(CLOCK_MONOTONIC_RAW);7849#else7850begin = radv_clock_gettime(CLOCK_MONOTONIC);7851#endif78527853for (d = 0; d < timestampCount; d++) {7854switch (pTimestampInfos[d].timeDomain) {7855case VK_TIME_DOMAIN_DEVICE_EXT:7856pTimestamps[d] = device->ws->query_value(device->ws, RADEON_TIMESTAMP);7857uint64_t device_period = DIV_ROUND_UP(1000000, clock_crystal_freq);7858max_clock_period = MAX2(max_clock_period, device_period);7859break;7860case VK_TIME_DOMAIN_CLOCK_MONOTONIC_EXT:7861pTimestamps[d] = radv_clock_gettime(CLOCK_MONOTONIC);7862max_clock_period = MAX2(max_clock_period, 1);7863break;78647865#ifdef CLOCK_MONOTONIC_RAW7866case VK_TIME_DOMAIN_CLOCK_MONOTONIC_RAW_EXT:7867pTimestamps[d] = begin;7868break;7869#endif7870default:7871pTimestamps[d] = 0;7872break;7873}7874}78757876#ifdef CLOCK_MONOTONIC_RAW7877end = radv_clock_gettime(CLOCK_MONOTONIC_RAW);7878#else7879end = radv_clock_gettime(CLOCK_MONOTONIC);7880#endif78817882/*7883* The maximum deviation is the sum of the interval over which we7884* perform the sampling and the maximum period of any sampled7885* clock. That's because the maximum skew between any two sampled7886* clock edges is when the sampled clock with the largest period is7887* sampled at the end of that period but right at the beginning of the7888* sampling interval and some other clock is sampled right at the7889* begining of its sampling period and right at the end of the7890* sampling interval. Let's assume the GPU has the longest clock7891* period and that the application is sampling GPU and monotonic:7892*7893* s e7894* w x y z 0 1 2 3 4 5 6 7 8 9 a b c d e f7895* Raw -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-7896*7897* g7898* 0 1 2 37899* GPU -----_____-----_____-----_____-----_____7900*7901* m7902* x y z 0 1 2 3 4 5 6 7 8 9 a b c7903* Monotonic -_-_-_-_-_-_-_-_-_-_-_-_-_-_-_-7904*7905* Interval <----------------->7906* Deviation <-------------------------->7907*7908* s = read(raw) 27909* g = read(GPU) 17910* m = read(monotonic) 27911* e = read(raw) b7912*7913* We round the sample interval up by one tick to cover sampling error7914* in the interval clock7915*/79167917uint64_t sample_interval = end - begin + 1;79187919*pMaxDeviation = sample_interval + max_clock_period;79207921return VK_SUCCESS;7922}7923#endif79247925void7926radv_GetPhysicalDeviceMultisamplePropertiesEXT(VkPhysicalDevice physicalDevice,7927VkSampleCountFlagBits samples,7928VkMultisamplePropertiesEXT *pMultisampleProperties)7929{7930RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice);7931VkSampleCountFlagBits supported_samples = VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT;79327933if (physical_device->rad_info.chip_class < GFX10)7934supported_samples |= VK_SAMPLE_COUNT_8_BIT;79357936if (samples & supported_samples) {7937pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){2, 2};7938} else {7939pMultisampleProperties->maxSampleLocationGridSize = (VkExtent2D){0, 0};7940}7941}79427943VkResult7944radv_GetPhysicalDeviceFragmentShadingRatesKHR(7945VkPhysicalDevice physicalDevice, uint32_t *pFragmentShadingRateCount,7946VkPhysicalDeviceFragmentShadingRateKHR *pFragmentShadingRates)7947{7948VK_OUTARRAY_MAKE_TYPED(VkPhysicalDeviceFragmentShadingRateKHR, out, pFragmentShadingRates,7949pFragmentShadingRateCount);79507951#define append_rate(w, h, s) \7952{ \7953VkPhysicalDeviceFragmentShadingRateKHR rate = { \7954.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_FRAGMENT_SHADING_RATE_PROPERTIES_KHR, \7955.sampleCounts = s, \7956.fragmentSize = {.width = w, .height = h}, \7957}; \7958vk_outarray_append_typed(VkPhysicalDeviceFragmentShadingRateKHR, &out, r) *r = rate; \7959}79607961for (uint32_t x = 2; x >= 1; x--) {7962for (uint32_t y = 2; y >= 1; y--) {7963append_rate(x, y,7964VK_SAMPLE_COUNT_1_BIT | VK_SAMPLE_COUNT_2_BIT | VK_SAMPLE_COUNT_4_BIT |7965VK_SAMPLE_COUNT_8_BIT);7966}7967}7968#undef append_rate79697970return vk_outarray_status(&out);7971}797279737974