Path: blob/21.2-virgl/src/intel/vulkan/anv_perf.c
4547 views
/*1* Copyright © 2018 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*/2223#include <assert.h>24#include <stdbool.h>25#include <stdint.h>2627#include "anv_private.h"28#include "vk_util.h"2930#include "perf/intel_perf.h"31#include "perf/intel_perf_mdapi.h"3233#include "util/mesa-sha1.h"3435void36anv_physical_device_init_perf(struct anv_physical_device *device, int fd)37{38const struct intel_device_info *devinfo = &device->info;3940device->perf = NULL;4142/* We need self modifying batches. The i915 parser prevents it on43* Gfx7.5 :( maybe one day.44*/45if (devinfo->ver < 8)46return;4748struct intel_perf_config *perf = intel_perf_new(NULL);4950intel_perf_init_metrics(perf, &device->info, fd,51false /* pipeline statistics */,52true /* register snapshots */);5354if (!perf->n_queries) {55if (perf->platform_supported) {56static bool warned_once = false;5758if (!warned_once) {59mesa_logw("Performance support disabled, "60"consider sysctl dev.i915.perf_stream_paranoid=0\n");61warned_once = true;62}63}64goto err;65}6667/* We need DRM_I915_PERF_PROP_HOLD_PREEMPTION support, only available in68* perf revision 2.69*/70if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {71if (!intel_perf_has_hold_preemption(perf))72goto err;73}7475device->perf = perf;7677/* Compute the number of commands we need to implement a performance78* query.79*/80const struct intel_perf_query_field_layout *layout = &perf->query_layout;81device->n_perf_query_commands = 0;82for (uint32_t f = 0; f < layout->n_fields; f++) {83struct intel_perf_query_field *field = &layout->fields[f];8485switch (field->type) {86case INTEL_PERF_QUERY_FIELD_TYPE_MI_RPC:87device->n_perf_query_commands++;88break;89case INTEL_PERF_QUERY_FIELD_TYPE_SRM_PERFCNT:90case INTEL_PERF_QUERY_FIELD_TYPE_SRM_RPSTAT:91case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_B:92case INTEL_PERF_QUERY_FIELD_TYPE_SRM_OA_C:93device->n_perf_query_commands += field->size / 4;94break;95}96}97device->n_perf_query_commands *= 2; /* Begin & End */98device->n_perf_query_commands += 1; /* availability */99100return;101102err:103ralloc_free(perf);104}105106void107anv_device_perf_init(struct anv_device *device)108{109device->perf_fd = -1;110}111112static int113anv_device_perf_open(struct anv_device *device, uint64_t metric_id)114{115uint64_t properties[DRM_I915_PERF_PROP_MAX * 2];116struct drm_i915_perf_open_param param;117int p = 0, stream_fd;118119properties[p++] = DRM_I915_PERF_PROP_SAMPLE_OA;120properties[p++] = true;121122properties[p++] = DRM_I915_PERF_PROP_OA_METRICS_SET;123properties[p++] = metric_id;124125properties[p++] = DRM_I915_PERF_PROP_OA_FORMAT;126properties[p++] = device->info.ver >= 8 ?127I915_OA_FORMAT_A32u40_A4u32_B8_C8 :128I915_OA_FORMAT_A45_B8_C8;129130properties[p++] = DRM_I915_PERF_PROP_OA_EXPONENT;131properties[p++] = 31; /* slowest sampling period */132133properties[p++] = DRM_I915_PERF_PROP_CTX_HANDLE;134properties[p++] = device->context_id;135136properties[p++] = DRM_I915_PERF_PROP_HOLD_PREEMPTION;137properties[p++] = true;138139/* If global SSEU is available, pin it to the default. This will ensure on140* Gfx11 for instance we use the full EU array. Initially when perf was141* enabled we would use only half on Gfx11 because of functional142* requirements.143*/144if (intel_perf_has_global_sseu(device->physical->perf)) {145properties[p++] = DRM_I915_PERF_PROP_GLOBAL_SSEU;146properties[p++] = (uintptr_t) &device->physical->perf->sseu;147}148149memset(¶m, 0, sizeof(param));150param.flags = 0;151param.flags |= I915_PERF_FLAG_FD_CLOEXEC | I915_PERF_FLAG_FD_NONBLOCK;152param.properties_ptr = (uintptr_t)properties;153param.num_properties = p / 2;154155stream_fd = intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_OPEN, ¶m);156return stream_fd;157}158159/* VK_INTEL_performance_query */160VkResult anv_InitializePerformanceApiINTEL(161VkDevice _device,162const VkInitializePerformanceApiInfoINTEL* pInitializeInfo)163{164ANV_FROM_HANDLE(anv_device, device, _device);165166if (!device->physical->perf)167return VK_ERROR_EXTENSION_NOT_PRESENT;168169/* Not much to do here */170return VK_SUCCESS;171}172173VkResult anv_GetPerformanceParameterINTEL(174VkDevice _device,175VkPerformanceParameterTypeINTEL parameter,176VkPerformanceValueINTEL* pValue)177{178ANV_FROM_HANDLE(anv_device, device, _device);179180if (!device->physical->perf)181return VK_ERROR_EXTENSION_NOT_PRESENT;182183VkResult result = VK_SUCCESS;184switch (parameter) {185case VK_PERFORMANCE_PARAMETER_TYPE_HW_COUNTERS_SUPPORTED_INTEL:186pValue->type = VK_PERFORMANCE_VALUE_TYPE_BOOL_INTEL;187pValue->data.valueBool = VK_TRUE;188break;189190case VK_PERFORMANCE_PARAMETER_TYPE_STREAM_MARKER_VALID_BITS_INTEL:191pValue->type = VK_PERFORMANCE_VALUE_TYPE_UINT32_INTEL;192pValue->data.value32 = 25;193break;194195default:196result = VK_ERROR_FEATURE_NOT_PRESENT;197break;198}199200return result;201}202203VkResult anv_CmdSetPerformanceMarkerINTEL(204VkCommandBuffer commandBuffer,205const VkPerformanceMarkerInfoINTEL* pMarkerInfo)206{207ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, commandBuffer);208209cmd_buffer->intel_perf_marker = pMarkerInfo->marker;210211return VK_SUCCESS;212}213214VkResult anv_AcquirePerformanceConfigurationINTEL(215VkDevice _device,216const VkPerformanceConfigurationAcquireInfoINTEL* pAcquireInfo,217VkPerformanceConfigurationINTEL* pConfiguration)218{219ANV_FROM_HANDLE(anv_device, device, _device);220struct anv_performance_configuration_intel *config;221222config = vk_object_alloc(&device->vk, NULL, sizeof(*config),223VK_OBJECT_TYPE_PERFORMANCE_CONFIGURATION_INTEL);224if (!config)225return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);226227if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {228config->register_config =229intel_perf_load_configuration(device->physical->perf, device->fd,230INTEL_PERF_QUERY_GUID_MDAPI);231if (!config->register_config) {232vk_object_free(&device->vk, NULL, config);233return VK_INCOMPLETE;234}235236int ret =237intel_perf_store_configuration(device->physical->perf, device->fd,238config->register_config, NULL /* guid */);239if (ret < 0) {240ralloc_free(config->register_config);241vk_object_free(&device->vk, NULL, config);242return VK_INCOMPLETE;243}244245config->config_id = ret;246}247248*pConfiguration = anv_performance_configuration_intel_to_handle(config);249250return VK_SUCCESS;251}252253VkResult anv_ReleasePerformanceConfigurationINTEL(254VkDevice _device,255VkPerformanceConfigurationINTEL _configuration)256{257ANV_FROM_HANDLE(anv_device, device, _device);258ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);259260if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG))261intel_ioctl(device->fd, DRM_IOCTL_I915_PERF_REMOVE_CONFIG, &config->config_id);262263ralloc_free(config->register_config);264265vk_object_free(&device->vk, NULL, config);266267return VK_SUCCESS;268}269270VkResult anv_QueueSetPerformanceConfigurationINTEL(271VkQueue _queue,272VkPerformanceConfigurationINTEL _configuration)273{274ANV_FROM_HANDLE(anv_queue, queue, _queue);275ANV_FROM_HANDLE(anv_performance_configuration_intel, config, _configuration);276struct anv_device *device = queue->device;277278if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {279if (device->perf_fd < 0) {280device->perf_fd = anv_device_perf_open(device, config->config_id);281if (device->perf_fd < 0)282return VK_ERROR_INITIALIZATION_FAILED;283} else {284int ret = intel_ioctl(device->perf_fd, I915_PERF_IOCTL_CONFIG,285(void *)(uintptr_t) config->config_id);286if (ret < 0)287return anv_device_set_lost(device, "i915-perf config failed: %m");288}289}290291return VK_SUCCESS;292}293294void anv_UninitializePerformanceApiINTEL(295VkDevice _device)296{297ANV_FROM_HANDLE(anv_device, device, _device);298299if (device->perf_fd >= 0) {300close(device->perf_fd);301device->perf_fd = -1;302}303}304305/* VK_KHR_performance_query */306static const VkPerformanceCounterUnitKHR307intel_perf_counter_unit_to_vk_unit[] = {308[INTEL_PERF_COUNTER_UNITS_BYTES] = VK_PERFORMANCE_COUNTER_UNIT_BYTES_KHR,309[INTEL_PERF_COUNTER_UNITS_HZ] = VK_PERFORMANCE_COUNTER_UNIT_HERTZ_KHR,310[INTEL_PERF_COUNTER_UNITS_NS] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR,311[INTEL_PERF_COUNTER_UNITS_US] = VK_PERFORMANCE_COUNTER_UNIT_NANOSECONDS_KHR, /* todo */312[INTEL_PERF_COUNTER_UNITS_PIXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,313[INTEL_PERF_COUNTER_UNITS_TEXELS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,314[INTEL_PERF_COUNTER_UNITS_THREADS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,315[INTEL_PERF_COUNTER_UNITS_PERCENT] = VK_PERFORMANCE_COUNTER_UNIT_PERCENTAGE_KHR,316[INTEL_PERF_COUNTER_UNITS_MESSAGES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,317[INTEL_PERF_COUNTER_UNITS_NUMBER] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,318[INTEL_PERF_COUNTER_UNITS_CYCLES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,319[INTEL_PERF_COUNTER_UNITS_EVENTS] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,320[INTEL_PERF_COUNTER_UNITS_UTILIZATION] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,321[INTEL_PERF_COUNTER_UNITS_EU_SENDS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,322[INTEL_PERF_COUNTER_UNITS_EU_ATOMIC_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,323[INTEL_PERF_COUNTER_UNITS_EU_REQUESTS_TO_L3_CACHE_LINES] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,324[INTEL_PERF_COUNTER_UNITS_EU_BYTES_PER_L3_CACHE_LINE] = VK_PERFORMANCE_COUNTER_UNIT_GENERIC_KHR,325};326327static const VkPerformanceCounterStorageKHR328intel_perf_counter_data_type_to_vk_storage[] = {329[INTEL_PERF_COUNTER_DATA_TYPE_BOOL32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,330[INTEL_PERF_COUNTER_DATA_TYPE_UINT32] = VK_PERFORMANCE_COUNTER_STORAGE_UINT32_KHR,331[INTEL_PERF_COUNTER_DATA_TYPE_UINT64] = VK_PERFORMANCE_COUNTER_STORAGE_UINT64_KHR,332[INTEL_PERF_COUNTER_DATA_TYPE_FLOAT] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT32_KHR,333[INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE] = VK_PERFORMANCE_COUNTER_STORAGE_FLOAT64_KHR,334};335336VkResult anv_EnumeratePhysicalDeviceQueueFamilyPerformanceQueryCountersKHR(337VkPhysicalDevice physicalDevice,338uint32_t queueFamilyIndex,339uint32_t* pCounterCount,340VkPerformanceCounterKHR* pCounters,341VkPerformanceCounterDescriptionKHR* pCounterDescriptions)342{343ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);344struct intel_perf_config *perf = pdevice->perf;345346uint32_t desc_count = *pCounterCount;347348VK_OUTARRAY_MAKE(out, pCounters, pCounterCount);349VK_OUTARRAY_MAKE(out_desc, pCounterDescriptions, &desc_count);350351for (int c = 0; c < (perf ? perf->n_counters : 0); c++) {352const struct intel_perf_query_counter *intel_counter = perf->counter_infos[c].counter;353354vk_outarray_append(&out, counter) {355counter->unit = intel_perf_counter_unit_to_vk_unit[intel_counter->units];356counter->scope = VK_QUERY_SCOPE_COMMAND_KHR;357counter->storage = intel_perf_counter_data_type_to_vk_storage[intel_counter->data_type];358359unsigned char sha1_result[20];360_mesa_sha1_compute(intel_counter->symbol_name,361strlen(intel_counter->symbol_name),362sha1_result);363memcpy(counter->uuid, sha1_result, sizeof(counter->uuid));364}365366vk_outarray_append(&out_desc, desc) {367desc->flags = 0; /* None so far. */368snprintf(desc->name, sizeof(desc->name), "%s", intel_counter->name);369snprintf(desc->category, sizeof(desc->category), "%s", intel_counter->category);370snprintf(desc->description, sizeof(desc->description), "%s", intel_counter->desc);371}372}373374return vk_outarray_status(&out);375}376377void anv_GetPhysicalDeviceQueueFamilyPerformanceQueryPassesKHR(378VkPhysicalDevice physicalDevice,379const VkQueryPoolPerformanceCreateInfoKHR* pPerformanceQueryCreateInfo,380uint32_t* pNumPasses)381{382ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);383struct intel_perf_config *perf = pdevice->perf;384385if (!perf) {386*pNumPasses = 0;387return;388}389390*pNumPasses = intel_perf_get_n_passes(perf,391pPerformanceQueryCreateInfo->pCounterIndices,392pPerformanceQueryCreateInfo->counterIndexCount,393NULL);394}395396VkResult anv_AcquireProfilingLockKHR(397VkDevice _device,398const VkAcquireProfilingLockInfoKHR* pInfo)399{400ANV_FROM_HANDLE(anv_device, device, _device);401struct intel_perf_config *perf = device->physical->perf;402struct intel_perf_query_info *first_metric_set = &perf->queries[0];403int fd = -1;404405assert(device->perf_fd == -1);406407if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {408fd = anv_device_perf_open(device, first_metric_set->oa_metrics_set_id);409if (fd < 0)410return VK_TIMEOUT;411}412413device->perf_fd = fd;414return VK_SUCCESS;415}416417void anv_ReleaseProfilingLockKHR(418VkDevice _device)419{420ANV_FROM_HANDLE(anv_device, device, _device);421422if (!(INTEL_DEBUG & DEBUG_NO_OACONFIG)) {423assert(device->perf_fd >= 0);424close(device->perf_fd);425}426device->perf_fd = -1;427}428429void430anv_perf_write_pass_results(struct intel_perf_config *perf,431struct anv_query_pool *pool, uint32_t pass,432const struct intel_perf_query_result *accumulated_results,433union VkPerformanceCounterResultKHR *results)434{435for (uint32_t c = 0; c < pool->n_counters; c++) {436const struct intel_perf_counter_pass *counter_pass = &pool->counter_pass[c];437438if (counter_pass->pass != pass)439continue;440441switch (pool->pass_query[pass]->kind) {442case INTEL_PERF_QUERY_TYPE_PIPELINE: {443assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);444uint32_t accu_offset = counter_pass->counter->offset / sizeof(uint64_t);445results[c].uint64 = accumulated_results->accumulator[accu_offset];446break;447}448449case INTEL_PERF_QUERY_TYPE_OA:450case INTEL_PERF_QUERY_TYPE_RAW:451switch (counter_pass->counter->data_type) {452case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:453results[c].uint64 =454counter_pass->counter->oa_counter_read_uint64(perf,455counter_pass->query,456accumulated_results);457break;458case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:459results[c].float32 =460counter_pass->counter->oa_counter_read_float(perf,461counter_pass->query,462accumulated_results);463break;464default:465/* So far we aren't using uint32, double or bool32... */466unreachable("unexpected counter data type");467}468break;469470default:471unreachable("invalid query type");472}473474/* The Vulkan extension only has nanoseconds as a unit */475if (counter_pass->counter->units == INTEL_PERF_COUNTER_UNITS_US) {476assert(counter_pass->counter->data_type == INTEL_PERF_COUNTER_DATA_TYPE_UINT64);477results[c].uint64 *= 1000;478}479}480}481482483