Path: blob/21.2-virgl/src/intel/perf/intel_perf_mdapi.c
4547 views
/*1* Copyright © 2018 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "intel_perf.h"24#include "intel_perf_mdapi.h"25#include "intel_perf_private.h"26#include "intel_perf_regs.h"2728#include "dev/intel_device_info.h"2930#include <drm-uapi/i915_drm.h>313233int34intel_perf_query_result_write_mdapi(void *data, uint32_t data_size,35const struct intel_device_info *devinfo,36const struct intel_perf_query_info *query,37const struct intel_perf_query_result *result)38{39switch (devinfo->ver) {40case 7: {41struct gfx7_mdapi_metrics *mdapi_data = (struct gfx7_mdapi_metrics *) data;4243if (data_size < sizeof(*mdapi_data))44return 0;4546assert(devinfo->is_haswell);4748for (int i = 0; i < ARRAY_SIZE(mdapi_data->ACounters); i++)49mdapi_data->ACounters[i] = result->accumulator[1 + i];5051for (int i = 0; i < ARRAY_SIZE(mdapi_data->NOACounters); i++) {52mdapi_data->NOACounters[i] =53result->accumulator[1 + ARRAY_SIZE(mdapi_data->ACounters) + i];54}5556mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];57mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];5859mdapi_data->ReportsCount = result->reports_accumulated;60mdapi_data->TotalTime =61intel_device_info_timebase_scale(devinfo, result->accumulator[0]);62mdapi_data->CoreFrequency = result->gt_frequency[1];63mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];64mdapi_data->SplitOccured = result->query_disjoint;65return sizeof(*mdapi_data);66}67case 8: {68struct gfx8_mdapi_metrics *mdapi_data = (struct gfx8_mdapi_metrics *) data;6970if (data_size < sizeof(*mdapi_data))71return 0;7273for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)74mdapi_data->OaCntr[i] = result->accumulator[2 + i];75for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {76mdapi_data->NoaCntr[i] =77result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];78}7980mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];81mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];8283mdapi_data->ReportId = result->hw_id;84mdapi_data->ReportsCount = result->reports_accumulated;85mdapi_data->TotalTime =86intel_device_info_timebase_scale(devinfo, result->accumulator[0]);87mdapi_data->BeginTimestamp =88intel_device_info_timebase_scale(devinfo, result->begin_timestamp);89mdapi_data->GPUTicks = result->accumulator[1];90mdapi_data->CoreFrequency = result->gt_frequency[1];91mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];92mdapi_data->SliceFrequency =93(result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;94mdapi_data->UnsliceFrequency =95(result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;96mdapi_data->SplitOccured = result->query_disjoint;97return sizeof(*mdapi_data);98}99case 9:100case 11:101case 12:{102struct gfx9_mdapi_metrics *mdapi_data = (struct gfx9_mdapi_metrics *) data;103104if (data_size < sizeof(*mdapi_data))105return 0;106107for (int i = 0; i < ARRAY_SIZE(mdapi_data->OaCntr); i++)108mdapi_data->OaCntr[i] = result->accumulator[2 + i];109for (int i = 0; i < ARRAY_SIZE(mdapi_data->NoaCntr); i++) {110mdapi_data->NoaCntr[i] =111result->accumulator[2 + ARRAY_SIZE(mdapi_data->OaCntr) + i];112}113114mdapi_data->PerfCounter1 = result->accumulator[query->perfcnt_offset + 0];115mdapi_data->PerfCounter2 = result->accumulator[query->perfcnt_offset + 1];116117mdapi_data->ReportId = result->hw_id;118mdapi_data->ReportsCount = result->reports_accumulated;119mdapi_data->TotalTime =120intel_device_info_timebase_scale(devinfo, result->accumulator[0]);121mdapi_data->BeginTimestamp =122intel_device_info_timebase_scale(devinfo, result->begin_timestamp);123mdapi_data->GPUTicks = result->accumulator[1];124mdapi_data->CoreFrequency = result->gt_frequency[1];125mdapi_data->CoreFrequencyChanged = result->gt_frequency[1] != result->gt_frequency[0];126mdapi_data->SliceFrequency =127(result->slice_frequency[0] + result->slice_frequency[1]) / 2ULL;128mdapi_data->UnsliceFrequency =129(result->unslice_frequency[0] + result->unslice_frequency[1]) / 2ULL;130mdapi_data->SplitOccured = result->query_disjoint;131return sizeof(*mdapi_data);132}133default:134unreachable("unexpected gen");135}136}137138void139intel_perf_register_mdapi_statistic_query(struct intel_perf_config *perf_cfg,140const struct intel_device_info *devinfo)141{142if (!(devinfo->ver >= 7 && devinfo->ver <= 12))143return;144145struct intel_perf_query_info *query =146intel_perf_append_query_info(perf_cfg, MAX_STAT_COUNTERS);147148query->kind = INTEL_PERF_QUERY_TYPE_PIPELINE;149query->name = "Intel_Raw_Pipeline_Statistics_Query";150151/* The order has to match mdapi_pipeline_metrics. */152intel_perf_query_add_basic_stat_reg(query, IA_VERTICES_COUNT,153"N vertices submitted");154intel_perf_query_add_basic_stat_reg(query, IA_PRIMITIVES_COUNT,155"N primitives submitted");156intel_perf_query_add_basic_stat_reg(query, VS_INVOCATION_COUNT,157"N vertex shader invocations");158intel_perf_query_add_basic_stat_reg(query, GS_INVOCATION_COUNT,159"N geometry shader invocations");160intel_perf_query_add_basic_stat_reg(query, GS_PRIMITIVES_COUNT,161"N geometry shader primitives emitted");162intel_perf_query_add_basic_stat_reg(query, CL_INVOCATION_COUNT,163"N primitives entering clipping");164intel_perf_query_add_basic_stat_reg(query, CL_PRIMITIVES_COUNT,165"N primitives leaving clipping");166if (devinfo->is_haswell || devinfo->ver == 8) {167intel_perf_query_add_stat_reg(query, PS_INVOCATION_COUNT, 1, 4,168"N fragment shader invocations",169"N fragment shader invocations");170} else {171intel_perf_query_add_basic_stat_reg(query, PS_INVOCATION_COUNT,172"N fragment shader invocations");173}174intel_perf_query_add_basic_stat_reg(query, HS_INVOCATION_COUNT,175"N TCS shader invocations");176intel_perf_query_add_basic_stat_reg(query, DS_INVOCATION_COUNT,177"N TES shader invocations");178if (devinfo->ver >= 7) {179intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,180"N compute shader invocations");181}182183if (devinfo->ver >= 10) {184/* Reuse existing CS invocation register until we can expose this new185* one.186*/187intel_perf_query_add_basic_stat_reg(query, CS_INVOCATION_COUNT,188"Reserved1");189}190191query->data_size = sizeof(uint64_t) * query->n_counters;192}193194static void195fill_mdapi_perf_query_counter(struct intel_perf_query_info *query,196const char *name,197uint32_t data_offset,198uint32_t data_size,199enum intel_perf_counter_data_type data_type)200{201struct intel_perf_query_counter *counter = &query->counters[query->n_counters];202203assert(query->n_counters <= query->max_counters);204205counter->name = name;206counter->desc = "Raw counter value";207counter->type = INTEL_PERF_COUNTER_TYPE_RAW;208counter->data_type = data_type;209counter->offset = data_offset;210211query->n_counters++;212213assert(counter->offset + intel_perf_query_counter_get_size(counter) <= query->data_size);214}215216#define MDAPI_QUERY_ADD_COUNTER(query, struct_name, field_name, type_name) \217fill_mdapi_perf_query_counter(query, #field_name, \218(uint8_t *) &struct_name.field_name - \219(uint8_t *) &struct_name, \220sizeof(struct_name.field_name), \221INTEL_PERF_COUNTER_DATA_TYPE_##type_name)222#define MDAPI_QUERY_ADD_ARRAY_COUNTER(ctx, query, struct_name, field_name, idx, type_name) \223fill_mdapi_perf_query_counter(query, \224ralloc_asprintf(ctx, "%s%i", #field_name, idx), \225(uint8_t *) &struct_name.field_name[idx] - \226(uint8_t *) &struct_name, \227sizeof(struct_name.field_name[0]), \228INTEL_PERF_COUNTER_DATA_TYPE_##type_name)229230void231intel_perf_register_mdapi_oa_query(struct intel_perf_config *perf,232const struct intel_device_info *devinfo)233{234struct intel_perf_query_info *query = NULL;235236/* MDAPI requires different structures for pretty much every generation237* (right now we have definitions for gen 7 to 12).238*/239if (!(devinfo->ver >= 7 && devinfo->ver <= 12))240return;241242switch (devinfo->ver) {243case 7: {244query = intel_perf_append_query_info(perf, 1 + 45 + 16 + 7);245query->oa_format = I915_OA_FORMAT_A45_B8_C8;246247struct gfx7_mdapi_metrics metric_data;248query->data_size = sizeof(metric_data);249250MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);251for (int i = 0; i < ARRAY_SIZE(metric_data.ACounters); i++) {252MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,253metric_data, ACounters, i, UINT64);254}255for (int i = 0; i < ARRAY_SIZE(metric_data.NOACounters); i++) {256MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,257metric_data, NOACounters, i, UINT64);258}259MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);260MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);261MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);262MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);263MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);264MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);265MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);266break;267}268case 8: {269query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16);270query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;271272struct gfx8_mdapi_metrics metric_data;273query->data_size = sizeof(metric_data);274275MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);276MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);277for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {278MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,279metric_data, OaCntr, i, UINT64);280}281for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {282MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,283metric_data, NoaCntr, i, UINT64);284}285MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);286MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);287MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);288MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);289MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);290MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);291MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);292MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);293MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);294MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);295MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);296MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);297MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);298MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);299MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);300MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);301break;302}303case 9:304case 11:305case 12: {306query = intel_perf_append_query_info(perf, 2 + 36 + 16 + 16 + 16 + 2);307query->oa_format = I915_OA_FORMAT_A32u40_A4u32_B8_C8;308309struct gfx9_mdapi_metrics metric_data;310query->data_size = sizeof(metric_data);311312MDAPI_QUERY_ADD_COUNTER(query, metric_data, TotalTime, UINT64);313MDAPI_QUERY_ADD_COUNTER(query, metric_data, GPUTicks, UINT64);314for (int i = 0; i < ARRAY_SIZE(metric_data.OaCntr); i++) {315MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,316metric_data, OaCntr, i, UINT64);317}318for (int i = 0; i < ARRAY_SIZE(metric_data.NoaCntr); i++) {319MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,320metric_data, NoaCntr, i, UINT64);321}322MDAPI_QUERY_ADD_COUNTER(query, metric_data, BeginTimestamp, UINT64);323MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved1, UINT64);324MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved2, UINT64);325MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved3, UINT32);326MDAPI_QUERY_ADD_COUNTER(query, metric_data, OverrunOccured, BOOL32);327MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerUser, UINT64);328MDAPI_QUERY_ADD_COUNTER(query, metric_data, MarkerDriver, UINT64);329MDAPI_QUERY_ADD_COUNTER(query, metric_data, SliceFrequency, UINT64);330MDAPI_QUERY_ADD_COUNTER(query, metric_data, UnsliceFrequency, UINT64);331MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter1, UINT64);332MDAPI_QUERY_ADD_COUNTER(query, metric_data, PerfCounter2, UINT64);333MDAPI_QUERY_ADD_COUNTER(query, metric_data, SplitOccured, BOOL32);334MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequencyChanged, BOOL32);335MDAPI_QUERY_ADD_COUNTER(query, metric_data, CoreFrequency, UINT64);336MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportId, UINT32);337MDAPI_QUERY_ADD_COUNTER(query, metric_data, ReportsCount, UINT32);338for (int i = 0; i < ARRAY_SIZE(metric_data.UserCntr); i++) {339MDAPI_QUERY_ADD_ARRAY_COUNTER(perf->queries, query,340metric_data, UserCntr, i, UINT64);341}342MDAPI_QUERY_ADD_COUNTER(query, metric_data, UserCntrCfgId, UINT32);343MDAPI_QUERY_ADD_COUNTER(query, metric_data, Reserved4, UINT32);344break;345}346default:347unreachable("Unsupported gen");348break;349}350351query->kind = INTEL_PERF_QUERY_TYPE_RAW;352query->name = "Intel_Raw_Hardware_Counters_Set_0_Query";353query->guid = INTEL_PERF_QUERY_GUID_MDAPI;354355{356/* Accumulation buffer offsets copied from an actual query... */357const struct intel_perf_query_info *copy_query =358&perf->queries[0];359360query->gpu_time_offset = copy_query->gpu_time_offset;361query->gpu_clock_offset = copy_query->gpu_clock_offset;362query->a_offset = copy_query->a_offset;363query->b_offset = copy_query->b_offset;364query->c_offset = copy_query->c_offset;365query->perfcnt_offset = copy_query->perfcnt_offset;366}367}368369370