Path: blob/21.2-virgl/src/gallium/drivers/crocus/crocus_monitor.c
4570 views
/*1* Copyright © 2019 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included11* in all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS14* OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER17* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING18* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER19* DEALINGS IN THE SOFTWARE.20*/2122#include "crocus_monitor.h"2324#include <xf86drm.h>2526#include "crocus_screen.h"27#include "crocus_context.h"2829#include "perf/intel_perf.h"30#include "perf/intel_perf_query.h"31#include "perf/intel_perf_regs.h"3233struct crocus_monitor_object {34int num_active_counters;35int *active_counters;3637size_t result_size;38unsigned char *result_buffer;3940struct intel_perf_query_object *query;41};4243int44crocus_get_monitor_info(struct pipe_screen *pscreen, unsigned index,45struct pipe_driver_query_info *info)46{47const struct crocus_screen *screen = (struct crocus_screen *)pscreen;48assert(screen->monitor_cfg);49if (!screen->monitor_cfg)50return 0;5152const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;5354if (!info) {55/* return the number of metrics */56return monitor_cfg->num_counters;57}5859const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;60const int group = monitor_cfg->counters[index].group;61const int counter_index = monitor_cfg->counters[index].counter;62struct intel_perf_query_counter *counter =63&perf_cfg->queries[group].counters[counter_index];6465info->group_id = group;66info->name = counter->name;67info->query_type = PIPE_QUERY_DRIVER_SPECIFIC + index;6869if (counter->type == INTEL_PERF_COUNTER_TYPE_THROUGHPUT)70info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_AVERAGE;71else72info->result_type = PIPE_DRIVER_QUERY_RESULT_TYPE_CUMULATIVE;73switch (counter->data_type) {74case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:75case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:76info->type = PIPE_DRIVER_QUERY_TYPE_UINT;77info->max_value.u32 = 0;78break;79case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:80info->type = PIPE_DRIVER_QUERY_TYPE_UINT64;81info->max_value.u64 = 0;82break;83case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:84case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE:85info->type = PIPE_DRIVER_QUERY_TYPE_FLOAT;86info->max_value.u64 = -1;87break;88default:89assert(false);90break;91}9293/* indicates that this is an OA query, not a pipeline statistics query */94info->flags = PIPE_DRIVER_QUERY_FLAG_BATCH;95return 1;96}9798typedef void (*bo_unreference_t)(void *);99typedef void *(*bo_map_t)(void *, void *, unsigned flags);100typedef void (*bo_unmap_t)(void *);101typedef void (*emit_mi_report_t)(void *, void *, uint32_t, uint32_t);102typedef void (*emit_mi_flush_t)(void *);103typedef void (*capture_frequency_stat_register_t)(void *, void *,104uint32_t );105typedef void (*store_register_mem64_t)(void *ctx, void *bo,106uint32_t reg, uint32_t offset);107typedef bool (*batch_references_t)(void *batch, void *bo);108typedef void (*bo_wait_rendering_t)(void *bo);109typedef int (*bo_busy_t)(void *bo);110111static void *112crocus_oa_bo_alloc(void *bufmgr, const char *name, uint64_t size)113{114return crocus_bo_alloc(bufmgr, name, size);115}116117#if 0118static void119crocus_monitor_emit_mi_flush(struct crocus_context *ice)120{121const int flags = PIPE_CONTROL_RENDER_TARGET_FLUSH |122PIPE_CONTROL_INSTRUCTION_INVALIDATE |123PIPE_CONTROL_CONST_CACHE_INVALIDATE |124PIPE_CONTROL_DATA_CACHE_FLUSH |125PIPE_CONTROL_DEPTH_CACHE_FLUSH |126PIPE_CONTROL_VF_CACHE_INVALIDATE |127PIPE_CONTROL_TEXTURE_CACHE_INVALIDATE |128PIPE_CONTROL_CS_STALL;129crocus_emit_pipe_control_flush(&ice->batches[CROCUS_BATCH_RENDER],130"OA metrics", flags);131}132#endif133134static void135crocus_monitor_emit_mi_report_perf_count(void *c,136void *bo,137uint32_t offset_in_bytes,138uint32_t report_id)139{140struct crocus_context *ice = c;141struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];142struct crocus_screen *screen = batch->screen;143screen->vtbl.emit_mi_report_perf_count(batch, bo, offset_in_bytes, report_id);144}145146static void147crocus_monitor_batchbuffer_flush(void *c, const char *file, int line)148{149struct crocus_context *ice = c;150_crocus_batch_flush(&ice->batches[CROCUS_BATCH_RENDER], __FILE__, __LINE__);151}152153#if 0154static void155crocus_monitor_capture_frequency_stat_register(void *ctx,156void *bo,157uint32_t bo_offset)158{159struct crocus_context *ice = ctx;160struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];161ice->vtbl.store_register_mem32(batch, GEN9_RPSTAT0, bo, bo_offset, false);162}163164static void165crocus_monitor_store_register_mem64(void *ctx, void *bo,166uint32_t reg, uint32_t offset)167{168struct crocus_context *ice = ctx;169struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];170ice->vtbl.store_register_mem64(batch, reg, bo, offset, false);171}172#endif173174static bool175crocus_monitor_init_metrics(struct crocus_screen *screen)176{177struct crocus_monitor_config *monitor_cfg =178rzalloc(screen, struct crocus_monitor_config);179struct intel_perf_config *perf_cfg = NULL;180if (unlikely(!monitor_cfg))181goto allocation_error;182perf_cfg = intel_perf_new(monitor_cfg);183if (unlikely(!perf_cfg))184goto allocation_error;185186monitor_cfg->perf_cfg = perf_cfg;187188perf_cfg->vtbl.bo_alloc = crocus_oa_bo_alloc;189perf_cfg->vtbl.bo_unreference = (bo_unreference_t)crocus_bo_unreference;190perf_cfg->vtbl.bo_map = (bo_map_t)crocus_bo_map;191perf_cfg->vtbl.bo_unmap = (bo_unmap_t)crocus_bo_unmap;192193perf_cfg->vtbl.emit_mi_report_perf_count =194(emit_mi_report_t)crocus_monitor_emit_mi_report_perf_count;195perf_cfg->vtbl.batchbuffer_flush = crocus_monitor_batchbuffer_flush;196perf_cfg->vtbl.batch_references = (batch_references_t)crocus_batch_references;197perf_cfg->vtbl.bo_wait_rendering =198(bo_wait_rendering_t)crocus_bo_wait_rendering;199perf_cfg->vtbl.bo_busy = (bo_busy_t)crocus_bo_busy;200201intel_perf_init_metrics(perf_cfg, &screen->devinfo, screen->fd, false, false);202screen->monitor_cfg = monitor_cfg;203204/* a gallium "group" is equivalent to a gen "query"205* a gallium "query" is equivalent to a gen "query_counter"206*207* Each gen_query supports a specific number of query_counters. To208* allocate the array of crocus_monitor_counter, we need an upper bound209* (ignoring duplicate query_counters).210*/211int gen_query_counters_count = 0;212for (int gen_query_id = 0;213gen_query_id < perf_cfg->n_queries;214++gen_query_id) {215gen_query_counters_count += perf_cfg->queries[gen_query_id].n_counters;216}217218monitor_cfg->counters = rzalloc_size(monitor_cfg,219sizeof(struct crocus_monitor_counter) *220gen_query_counters_count);221if (unlikely(!monitor_cfg->counters))222goto allocation_error;223224int crocus_monitor_id = 0;225for (int group = 0; group < perf_cfg->n_queries; ++group) {226for (int counter = 0;227counter < perf_cfg->queries[group].n_counters;228++counter) {229/* Check previously identified metrics to filter out duplicates. The230* user is not helped by having the same metric available in several231* groups. (n^2 algorithm).232*/233bool duplicate = false;234for (int existing_group = 0;235existing_group < group && !duplicate;236++existing_group) {237for (int existing_counter = 0;238existing_counter < perf_cfg->queries[existing_group].n_counters && !duplicate;239++existing_counter) {240const char *current_name =241perf_cfg->queries[group].counters[counter].name;242const char *existing_name =243perf_cfg->queries[existing_group].counters[existing_counter].name;244if (strcmp(current_name, existing_name) == 0) {245duplicate = true;246}247}248}249if (duplicate)250continue;251monitor_cfg->counters[crocus_monitor_id].group = group;252monitor_cfg->counters[crocus_monitor_id].counter = counter;253++crocus_monitor_id;254}255}256monitor_cfg->num_counters = crocus_monitor_id;257return monitor_cfg->num_counters;258259allocation_error:260if (monitor_cfg)261free(monitor_cfg->counters);262free(perf_cfg);263free(monitor_cfg);264return false;265}266267int268crocus_get_monitor_group_info(struct pipe_screen *pscreen,269unsigned group_index,270struct pipe_driver_query_group_info *info)271{272struct crocus_screen *screen = (struct crocus_screen *)pscreen;273if (!screen->monitor_cfg) {274if (!crocus_monitor_init_metrics(screen))275return 0;276}277278const struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;279const struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;280281if (!info) {282/* return the count that can be queried */283return perf_cfg->n_queries;284}285286if (group_index >= perf_cfg->n_queries) {287/* out of range */288return 0;289}290291struct intel_perf_query_info *query = &perf_cfg->queries[group_index];292293info->name = query->name;294info->max_active_queries = query->n_counters;295info->num_queries = query->n_counters;296297return 1;298}299300static void301crocus_init_monitor_ctx(struct crocus_context *ice)302{303struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;304struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;305306ice->perf_ctx = intel_perf_new_context(ice);307if (unlikely(!ice->perf_ctx))308return;309310struct intel_perf_context *perf_ctx = ice->perf_ctx;311struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;312intel_perf_init_context(perf_ctx,313perf_cfg,314ice,315ice,316screen->bufmgr,317&screen->devinfo,318ice->batches[CROCUS_BATCH_RENDER].hw_ctx_id,319screen->fd);320}321322/* entry point for GenPerfMonitorsAMD */323struct crocus_monitor_object *324crocus_create_monitor_object(struct crocus_context *ice,325unsigned num_queries,326unsigned *query_types)327{328struct crocus_screen *screen = (struct crocus_screen *) ice->ctx.screen;329struct crocus_monitor_config *monitor_cfg = screen->monitor_cfg;330struct intel_perf_config *perf_cfg = monitor_cfg->perf_cfg;331struct intel_perf_query_object *query_obj = NULL;332333/* initialize perf context if this has not already been done. This334* function is the first entry point that carries the gl context.335*/336if (ice->perf_ctx == NULL) {337crocus_init_monitor_ctx(ice);338}339struct intel_perf_context *perf_ctx = ice->perf_ctx;340341assert(num_queries > 0);342int query_index = query_types[0] - PIPE_QUERY_DRIVER_SPECIFIC;343assert(query_index <= monitor_cfg->num_counters);344const int group = monitor_cfg->counters[query_index].group;345346struct crocus_monitor_object *monitor =347calloc(1, sizeof(struct crocus_monitor_object));348if (unlikely(!monitor))349goto allocation_failure;350351monitor->num_active_counters = num_queries;352monitor->active_counters = calloc(num_queries, sizeof(int));353if (unlikely(!monitor->active_counters))354goto allocation_failure;355356for (int i = 0; i < num_queries; ++i) {357unsigned current_query = query_types[i];358unsigned current_query_index = current_query - PIPE_QUERY_DRIVER_SPECIFIC;359360/* all queries must be in the same group */361assert(current_query_index <= monitor_cfg->num_counters);362assert(monitor_cfg->counters[current_query_index].group == group);363monitor->active_counters[i] =364monitor_cfg->counters[current_query_index].counter;365}366367/* create the intel_perf_query */368query_obj = intel_perf_new_query(perf_ctx, group);369if (unlikely(!query_obj))370goto allocation_failure;371372monitor->query = query_obj;373monitor->result_size = perf_cfg->queries[group].data_size;374monitor->result_buffer = calloc(1, monitor->result_size);375if (unlikely(!monitor->result_buffer))376goto allocation_failure;377378return monitor;379380allocation_failure:381if (monitor) {382free(monitor->active_counters);383free(monitor->result_buffer);384}385free(query_obj);386free(monitor);387return NULL;388}389390void391crocus_destroy_monitor_object(struct pipe_context *ctx,392struct crocus_monitor_object *monitor)393{394struct crocus_context *ice = (struct crocus_context *)ctx;395396intel_perf_delete_query(ice->perf_ctx, monitor->query);397free(monitor->result_buffer);398monitor->result_buffer = NULL;399free(monitor->active_counters);400monitor->active_counters = NULL;401free(monitor);402}403404bool405crocus_begin_monitor(struct pipe_context *ctx,406struct crocus_monitor_object *monitor)407{408struct crocus_context *ice = (void *) ctx;409struct intel_perf_context *perf_ctx = ice->perf_ctx;410411return intel_perf_begin_query(perf_ctx, monitor->query);412}413414bool415crocus_end_monitor(struct pipe_context *ctx,416struct crocus_monitor_object *monitor)417{418struct crocus_context *ice = (void *) ctx;419struct intel_perf_context *perf_ctx = ice->perf_ctx;420421intel_perf_end_query(perf_ctx, monitor->query);422return true;423}424425bool426crocus_get_monitor_result(struct pipe_context *ctx,427struct crocus_monitor_object *monitor,428bool wait,429union pipe_numeric_type_union *result)430{431struct crocus_context *ice = (void *) ctx;432struct intel_perf_context *perf_ctx = ice->perf_ctx;433struct crocus_batch *batch = &ice->batches[CROCUS_BATCH_RENDER];434435bool monitor_ready =436intel_perf_is_query_ready(perf_ctx, monitor->query, batch);437438if (!monitor_ready) {439if (!wait)440return false;441intel_perf_wait_query(perf_ctx, monitor->query, batch);442}443444assert(intel_perf_is_query_ready(perf_ctx, monitor->query, batch));445446unsigned bytes_written;447intel_perf_get_query_data(perf_ctx, monitor->query, batch,448monitor->result_size,449(unsigned*) monitor->result_buffer,450&bytes_written);451if (bytes_written != monitor->result_size)452return false;453454/* copy metrics into the batch result */455for (int i = 0; i < monitor->num_active_counters; ++i) {456int current_counter = monitor->active_counters[i];457const struct intel_perf_query_info *info =458intel_perf_query_info(monitor->query);459const struct intel_perf_query_counter *counter =460&info->counters[current_counter];461assert(intel_perf_query_counter_get_size(counter));462switch (counter->data_type) {463case INTEL_PERF_COUNTER_DATA_TYPE_UINT64:464result[i].u64 = *(uint64_t*)(monitor->result_buffer + counter->offset);465break;466case INTEL_PERF_COUNTER_DATA_TYPE_FLOAT:467result[i].f = *(float*)(monitor->result_buffer + counter->offset);468break;469case INTEL_PERF_COUNTER_DATA_TYPE_UINT32:470case INTEL_PERF_COUNTER_DATA_TYPE_BOOL32:471result[i].u64 = *(uint32_t*)(monitor->result_buffer + counter->offset);472break;473case INTEL_PERF_COUNTER_DATA_TYPE_DOUBLE: {474double v = *(double*)(monitor->result_buffer + counter->offset);475result[i].f = v;476break;477}478default:479unreachable("unexpected counter data type");480}481}482return true;483}484485486