Path: blob/21.2-virgl/src/amd/common/ac_perfcounter.c
7188 views
/*1* Copyright 2015 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324#include "ac_gpu_info.h"25#include "ac_perfcounter.h"2627#include "util/u_memory.h"28#include "macros.h"2930/* cik_CB */31static unsigned cik_CB_select0[] = {32R_037004_CB_PERFCOUNTER0_SELECT,33R_03700C_CB_PERFCOUNTER1_SELECT,34R_037010_CB_PERFCOUNTER2_SELECT,35R_037014_CB_PERFCOUNTER3_SELECT,36};37static unsigned cik_CB_select1[] = {38R_037008_CB_PERFCOUNTER0_SELECT1,39};40static struct ac_pc_block_base cik_CB = {41.gpu_block = CB,42.name = "CB",43.num_counters = 4,44.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,4546.select0 = cik_CB_select0,47.select1 = cik_CB_select1,48.counter0_lo = R_035018_CB_PERFCOUNTER0_LO,4950.num_spm_counters = 1,51.num_spm_wires = 2,52.spm_block_select = 0x0,53};5455/* cik_CPC */56static unsigned cik_CPC_select0[] = {57R_036024_CPC_PERFCOUNTER0_SELECT,58R_03600C_CPC_PERFCOUNTER1_SELECT,59};60static unsigned cik_CPC_select1[] = {61R_036010_CPC_PERFCOUNTER0_SELECT1,62};63static unsigned cik_CPC_counters[] = {64R_034018_CPC_PERFCOUNTER0_LO,65R_034010_CPC_PERFCOUNTER1_LO,66};67static struct ac_pc_block_base cik_CPC = {68.gpu_block = CPC,69.name = "CPC",70.num_counters = 2,7172.select0 = cik_CPC_select0,73.select1 = cik_CPC_select1,74.counters = cik_CPC_counters,7576.num_spm_counters = 1,77.num_spm_wires = 2,78.spm_block_select = 0x1,79};8081/* cik_CPF */82static unsigned cik_CPF_select0[] = {83R_03601C_CPF_PERFCOUNTER0_SELECT,84R_036014_CPF_PERFCOUNTER1_SELECT,85};86static unsigned cik_CPF_select1[] = {87R_036018_CPF_PERFCOUNTER0_SELECT1,88};89static unsigned cik_CPF_counters[] = {90R_034028_CPF_PERFCOUNTER0_LO,91R_034020_CPF_PERFCOUNTER1_LO,92};93static struct ac_pc_block_base cik_CPF = {94.gpu_block = CPF,95.name = "CPF",96.num_counters = 2,9798.select0 = cik_CPF_select0,99.select1 = cik_CPF_select1,100.counters = cik_CPF_counters,101102.num_spm_counters = 1,103.num_spm_wires = 2,104.spm_block_select = 0x2,105};106107/* cik_CPG */108static unsigned cik_CPG_select0[] = {109R_036008_CPG_PERFCOUNTER0_SELECT,110R_036000_CPG_PERFCOUNTER1_SELECT,111};112static unsigned cik_CPG_select1[] = {113R_036004_CPG_PERFCOUNTER0_SELECT1114};115static unsigned cik_CPG_counters[] = {116R_034008_CPG_PERFCOUNTER0_LO,117R_034000_CPG_PERFCOUNTER1_LO,118};119static struct ac_pc_block_base cik_CPG = {120.gpu_block = CPG,121.name = "CPG",122.num_counters = 2,123124.select0 = cik_CPG_select0,125.select1 = cik_CPG_select1,126.counters = cik_CPG_counters,127128.num_spm_counters = 1,129.num_spm_wires = 2,130.spm_block_select = 0x0,131};132133/* cik_DB */134static unsigned cik_DB_select0[] = {135R_037100_DB_PERFCOUNTER0_SELECT,136R_037108_DB_PERFCOUNTER1_SELECT,137R_037110_DB_PERFCOUNTER2_SELECT,138R_037118_DB_PERFCOUNTER3_SELECT,139};140static unsigned cik_DB_select1[] = {141R_037104_DB_PERFCOUNTER0_SELECT1,142R_03710C_DB_PERFCOUNTER1_SELECT1,143};144static struct ac_pc_block_base cik_DB = {145.gpu_block = DB,146.name = "DB",147.num_counters = 4,148.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,149150.select0 = cik_DB_select0,151.select1 = cik_DB_select1,152.counter0_lo = R_035100_DB_PERFCOUNTER0_LO,153154.num_spm_counters = 2,155.num_spm_wires = 3,156.spm_block_select = 0x1,157};158159/* cik_GDS */160static unsigned cik_GDS_select0[] = {161R_036A00_GDS_PERFCOUNTER0_SELECT,162R_036A04_GDS_PERFCOUNTER1_SELECT,163R_036A08_GDS_PERFCOUNTER2_SELECT,164R_036A0C_GDS_PERFCOUNTER3_SELECT,165};166static unsigned cik_GDS_select1[] = {167R_036A10_GDS_PERFCOUNTER0_SELECT1,168};169static struct ac_pc_block_base cik_GDS = {170.gpu_block = GDS,171.name = "GDS",172.num_counters = 4,173174.select0 = cik_GDS_select0,175.select1 = cik_GDS_select1,176.counter0_lo = R_034A00_GDS_PERFCOUNTER0_LO,177178.num_spm_counters = 1,179.num_spm_wires = 2,180.spm_block_select = 0x3,181};182183/* cik_GRBM */184static unsigned cik_GRBM_select0[] = {185R_036100_GRBM_PERFCOUNTER0_SELECT,186R_036104_GRBM_PERFCOUNTER1_SELECT,187};188static unsigned cik_GRBM_counters[] = {189R_034100_GRBM_PERFCOUNTER0_LO,190R_03410C_GRBM_PERFCOUNTER1_LO,191};192static struct ac_pc_block_base cik_GRBM = {193.gpu_block = GRBM,194.name = "GRBM",195.num_counters = 2,196197.select0 = cik_GRBM_select0,198.counters = cik_GRBM_counters,199};200201/* cik_GRBMSE */202static unsigned cik_GRBMSE_select0[] = {203R_036108_GRBM_SE0_PERFCOUNTER_SELECT,204R_03610C_GRBM_SE1_PERFCOUNTER_SELECT,205R_036110_GRBM_SE2_PERFCOUNTER_SELECT,206R_036114_GRBM_SE3_PERFCOUNTER_SELECT,207};208static struct ac_pc_block_base cik_GRBMSE = {209.gpu_block = GRBMSE,210.name = "GRBMSE",211.num_counters = 4,212213.select0 = cik_GRBMSE_select0,214.counter0_lo = R_034114_GRBM_SE0_PERFCOUNTER_LO,215};216217/* cik_IA */218static unsigned cik_IA_select0[] = {219R_036210_IA_PERFCOUNTER0_SELECT,220R_036214_IA_PERFCOUNTER1_SELECT,221R_036218_IA_PERFCOUNTER2_SELECT,222R_03621C_IA_PERFCOUNTER3_SELECT,223};224static unsigned cik_IA_select1[] = {225R_036220_IA_PERFCOUNTER0_SELECT1,226};227static struct ac_pc_block_base cik_IA = {228.gpu_block = IA,229.name = "IA",230.num_counters = 4,231232.select0 = cik_IA_select0,233.select1 = cik_IA_select1,234.counter0_lo = R_034220_IA_PERFCOUNTER0_LO,235236.num_spm_counters = 1,237.num_spm_wires = 2,238.spm_block_select = 0x6,239};240241/* cik_PA_SC */242static unsigned cik_PA_SC_select0[] = {243R_036500_PA_SC_PERFCOUNTER0_SELECT,244R_036508_PA_SC_PERFCOUNTER1_SELECT,245R_03650C_PA_SC_PERFCOUNTER2_SELECT,246R_036510_PA_SC_PERFCOUNTER3_SELECT,247R_036514_PA_SC_PERFCOUNTER4_SELECT,248R_036518_PA_SC_PERFCOUNTER5_SELECT,249R_03651C_PA_SC_PERFCOUNTER6_SELECT,250R_036520_PA_SC_PERFCOUNTER7_SELECT,251};252static unsigned cik_PA_SC_select1[] = {253R_036504_PA_SC_PERFCOUNTER0_SELECT1,254};255static struct ac_pc_block_base cik_PA_SC = {256.gpu_block = PA_SC,257.name = "PA_SC",258.num_counters = 8,259.flags = AC_PC_BLOCK_SE,260261.select0 = cik_PA_SC_select0,262.select1 = cik_PA_SC_select1,263.counter0_lo = R_034500_PA_SC_PERFCOUNTER0_LO,264265.num_spm_counters = 1,266.num_spm_wires = 2,267.spm_block_select = 0x4,268};269270/* cik_PA_SU */271static unsigned cik_PA_SU_select0[] = {272R_036400_PA_SU_PERFCOUNTER0_SELECT,273R_036408_PA_SU_PERFCOUNTER1_SELECT,274R_036410_PA_SU_PERFCOUNTER2_SELECT,275R_036414_PA_SU_PERFCOUNTER3_SELECT,276};277static unsigned cik_PA_SU_select1[] = {278R_036404_PA_SU_PERFCOUNTER0_SELECT1,279R_03640C_PA_SU_PERFCOUNTER1_SELECT1,280};281/* According to docs, PA_SU counters are only 48 bits wide. */282static struct ac_pc_block_base cik_PA_SU = {283.gpu_block = PA_SU,284.name = "PA_SU",285.num_counters = 4,286.flags = AC_PC_BLOCK_SE,287288.select0 = cik_PA_SU_select0,289.select1 = cik_PA_SU_select1,290.counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,291292.num_spm_counters = 2,293.num_spm_wires = 3,294.spm_block_select = 0x2,295};296297/* cik_SPI */298static unsigned cik_SPI_select0[] = {299R_036600_SPI_PERFCOUNTER0_SELECT,300R_036604_SPI_PERFCOUNTER1_SELECT,301R_036608_SPI_PERFCOUNTER2_SELECT,302R_03660C_SPI_PERFCOUNTER3_SELECT,303R_036620_SPI_PERFCOUNTER4_SELECT,304R_036624_SPI_PERFCOUNTER5_SELECT,305};306static unsigned cik_SPI_select1[] = {307R_036610_SPI_PERFCOUNTER0_SELECT1,308R_036614_SPI_PERFCOUNTER1_SELECT1,309R_036618_SPI_PERFCOUNTER2_SELECT1,310R_03661C_SPI_PERFCOUNTER3_SELECT1311};312static struct ac_pc_block_base cik_SPI = {313.gpu_block = SPI,314.name = "SPI",315.num_counters = 6,316.flags = AC_PC_BLOCK_SE,317318.select0 = cik_SPI_select0,319.select1 = cik_SPI_select1,320.counter0_lo = R_034604_SPI_PERFCOUNTER0_LO,321322.num_spm_counters = 4,323.num_spm_wires = 8,324.spm_block_select = 0x8,325};326327/* cik_SQ */328static unsigned cik_SQ_select0[] = {329R_036700_SQ_PERFCOUNTER0_SELECT,330R_036704_SQ_PERFCOUNTER1_SELECT,331R_036708_SQ_PERFCOUNTER2_SELECT,332R_03670C_SQ_PERFCOUNTER3_SELECT,333R_036710_SQ_PERFCOUNTER4_SELECT,334R_036714_SQ_PERFCOUNTER5_SELECT,335R_036718_SQ_PERFCOUNTER6_SELECT,336R_03671C_SQ_PERFCOUNTER7_SELECT,337R_036720_SQ_PERFCOUNTER8_SELECT,338R_036724_SQ_PERFCOUNTER9_SELECT,339R_036728_SQ_PERFCOUNTER10_SELECT,340R_03672C_SQ_PERFCOUNTER11_SELECT,341R_036730_SQ_PERFCOUNTER12_SELECT,342R_036734_SQ_PERFCOUNTER13_SELECT,343R_036738_SQ_PERFCOUNTER14_SELECT,344R_03673C_SQ_PERFCOUNTER15_SELECT,345};346static struct ac_pc_block_base cik_SQ = {347.gpu_block = SQ,348.name = "SQ",349.num_counters = 16,350.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER,351352.select0 = cik_SQ_select0,353.select_or = S_036700_SQC_BANK_MASK(15) | S_036700_SQC_CLIENT_MASK(15) | S_036700_SIMD_MASK(15),354.counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,355356.num_spm_wires = 8,357.spm_block_select = 0x9,358};359360/* cik_SX */361static unsigned cik_SX_select0[] = {362R_036900_SX_PERFCOUNTER0_SELECT,363R_036904_SX_PERFCOUNTER1_SELECT,364R_036908_SX_PERFCOUNTER2_SELECT,365R_03690C_SX_PERFCOUNTER3_SELECT,366};367static unsigned cik_SX_select1[] = {368R_036910_SX_PERFCOUNTER0_SELECT1,369R_036914_SX_PERFCOUNTER1_SELECT1,370};371static struct ac_pc_block_base cik_SX = {372.gpu_block = SX,373.name = "SX",374.num_counters = 4,375.flags = AC_PC_BLOCK_SE,376377.select0 = cik_SX_select0,378.select1 = cik_SX_select1,379.counter0_lo = R_034900_SX_PERFCOUNTER0_LO,380381.num_spm_counters = 2,382.num_spm_wires = 4,383.spm_block_select = 0x3,384};385386/* cik_TA */387static unsigned cik_TA_select0[] = {388R_036B00_TA_PERFCOUNTER0_SELECT,389R_036B08_TA_PERFCOUNTER1_SELECT,390};391static unsigned cik_TA_select1[] = {392R_036B04_TA_PERFCOUNTER0_SELECT1,393};394static struct ac_pc_block_base cik_TA = {395.gpu_block = TA,396.name = "TA",397.num_counters = 2,398.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,399400.select0 = cik_TA_select0,401.select1 = cik_TA_select1,402.counter0_lo = R_034B00_TA_PERFCOUNTER0_LO,403404.num_spm_counters = 1,405.num_spm_wires = 2,406.spm_block_select = 0x5,407};408409/* cik_TD */410static unsigned cik_TD_select0[] = {411R_036C00_TD_PERFCOUNTER0_SELECT,412R_036C08_TD_PERFCOUNTER1_SELECT,413};414static unsigned cik_TD_select1[] = {415R_036C04_TD_PERFCOUNTER0_SELECT1,416};417static struct ac_pc_block_base cik_TD = {418.gpu_block = TD,419.name = "TD",420.num_counters = 2,421.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,422423.select0 = cik_TD_select0,424.select1 = cik_TD_select1,425.counter0_lo = R_034C00_TD_PERFCOUNTER0_LO,426427.num_spm_counters = 1,428.num_spm_wires = 2,429.spm_block_select = 0x6,430};431432/* cik_TCA */433static unsigned cik_TCA_select0[] = {434R_036E40_TCA_PERFCOUNTER0_SELECT,435R_036E48_TCA_PERFCOUNTER1_SELECT,436R_036E50_TCA_PERFCOUNTER2_SELECT,437R_036E54_TCA_PERFCOUNTER3_SELECT,438};439static unsigned cik_TCA_select1[] = {440R_036E44_TCA_PERFCOUNTER0_SELECT1,441R_036E4C_TCA_PERFCOUNTER1_SELECT1,442};443static struct ac_pc_block_base cik_TCA = {444.gpu_block = TCA,445.name = "TCA",446.num_counters = 4,447.flags = AC_PC_BLOCK_INSTANCE_GROUPS,448449.select0 = cik_TCA_select0,450.select1 = cik_TCA_select1,451.counter0_lo = R_034E40_TCA_PERFCOUNTER0_LO,452453.num_spm_counters = 2,454.num_spm_wires = 4,455.spm_block_select = 0x5,456};457458/* cik_TCC */459static unsigned cik_TCC_select0[] = {460R_036E00_TCC_PERFCOUNTER0_SELECT,461R_036E08_TCC_PERFCOUNTER1_SELECT,462R_036E10_TCC_PERFCOUNTER2_SELECT,463R_036E14_TCC_PERFCOUNTER3_SELECT,464};465static unsigned cik_TCC_select1[] = {466R_036E04_TCC_PERFCOUNTER0_SELECT1,467R_036E0C_TCC_PERFCOUNTER1_SELECT1,468};469static struct ac_pc_block_base cik_TCC = {470.gpu_block = TCC,471.name = "TCC",472.num_counters = 4,473.flags = AC_PC_BLOCK_INSTANCE_GROUPS,474475.select0 = cik_TCC_select0,476.select1 = cik_TCC_select1,477.counter0_lo = R_034E00_TCC_PERFCOUNTER0_LO,478479.num_spm_counters = 2,480.num_spm_wires = 4,481.spm_block_select = 0x4,482};483484/* cik_TCP */485static unsigned cik_TCP_select0[] = {486R_036D00_TCP_PERFCOUNTER0_SELECT,487R_036D08_TCP_PERFCOUNTER1_SELECT,488R_036D10_TCP_PERFCOUNTER2_SELECT,489R_036D14_TCP_PERFCOUNTER3_SELECT,490};491static unsigned cik_TCP_select1[] = {492R_036D04_TCP_PERFCOUNTER0_SELECT1,493R_036D0C_TCP_PERFCOUNTER1_SELECT1,494};495static struct ac_pc_block_base cik_TCP = {496.gpu_block = TCP,497.name = "TCP",498.num_counters = 4,499.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,500501.select0 = cik_TCP_select0,502.select1 = cik_TCP_select1,503.counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO,504505.num_spm_counters = 2,506.num_spm_wires = 3,507.spm_block_select = 0x7,508};509510/* cik_VGT */511static unsigned cik_VGT_select0[] = {512R_036230_VGT_PERFCOUNTER0_SELECT,513R_036234_VGT_PERFCOUNTER1_SELECT,514R_036238_VGT_PERFCOUNTER2_SELECT,515R_03623C_VGT_PERFCOUNTER3_SELECT,516};517static unsigned cik_VGT_select1[] = {518R_036240_VGT_PERFCOUNTER0_SELECT1,519R_036244_VGT_PERFCOUNTER1_SELECT1,520};521static struct ac_pc_block_base cik_VGT = {522.gpu_block = VGT,523.name = "VGT",524.num_counters = 4,525.flags = AC_PC_BLOCK_SE,526527.select0 = cik_VGT_select0,528.select1 = cik_VGT_select1,529.counter0_lo = R_034240_VGT_PERFCOUNTER0_LO,530531.num_spm_counters = 2,532.num_spm_wires = 3,533.spm_block_select = 0xa,534};535536/* cik_WD */537static unsigned cik_WD_select0[] = {538R_036200_WD_PERFCOUNTER0_SELECT,539R_036204_WD_PERFCOUNTER1_SELECT,540R_036208_WD_PERFCOUNTER2_SELECT,541R_03620C_WD_PERFCOUNTER3_SELECT,542};543static struct ac_pc_block_base cik_WD = {544.gpu_block = WD,545.name = "WD",546.num_counters = 4,547548.select0 = cik_WD_select0,549.counter0_lo = R_034200_WD_PERFCOUNTER0_LO,550};551552/* cik_MC */553static struct ac_pc_block_base cik_MC = {554.gpu_block = MC,555.name = "MC",556.num_counters = 4,557};558559/* cik_SRBM */560static struct ac_pc_block_base cik_SRBM = {561.gpu_block = SRBM,562.name = "SRBM",563.num_counters = 2,564};565566/* gfx10_CHA */567static unsigned gfx10_CHA_select0[] = {568R_037780_CHA_PERFCOUNTER0_SELECT,569R_037788_CHA_PERFCOUNTER1_SELECT,570R_03778C_CHA_PERFCOUNTER2_SELECT,571R_037790_CHA_PERFCOUNTER3_SELECT,572};573static unsigned gfx10_CHA_select1[] = {574R_037784_CHA_PERFCOUNTER0_SELECT1,575};576static struct ac_pc_block_base gfx10_CHA = {577.gpu_block = CHA,578.name = "CHA",579.num_counters = 4,580581.select0 = gfx10_CHA_select0,582.select1 = gfx10_CHA_select1,583.counter0_lo = R_035800_CHA_PERFCOUNTER0_LO,584585.num_spm_counters = 1,586.num_spm_wires = 2,587.spm_block_select = 0xc,588};589590/* gfx10_CHCG */591static unsigned gfx10_CHCG_select0[] = {592R_036F18_CHCG_PERFCOUNTER0_SELECT,593R_036F20_CHCG_PERFCOUNTER1_SELECT,594R_036F24_CHCG_PERFCOUNTER2_SELECT,595R_036F28_CHCG_PERFCOUNTER3_SELECT,596};597static unsigned gfx10_CHCG_select1[] = {598R_036F1C_CHCG_PERFCOUNTER0_SELECT1,599};600static struct ac_pc_block_base gfx10_CHCG = {601.gpu_block = CHCG,602.name = "CHCG",603.num_counters = 4,604605.select0 = gfx10_CHCG_select0,606.select1 = gfx10_CHCG_select1,607.counter0_lo = R_034F20_CHCG_PERFCOUNTER0_LO,608609.num_spm_counters = 1,610.num_spm_wires = 2,611.spm_block_select = 0xe,612};613614/* gfx10_CHC */615static unsigned gfx10_CHC_select0[] = {616R_036F00_CHC_PERFCOUNTER0_SELECT,617R_036F08_CHC_PERFCOUNTER1_SELECT,618R_036F0C_CHC_PERFCOUNTER2_SELECT,619R_036F10_CHC_PERFCOUNTER3_SELECT,620};621static unsigned gfx10_CHC_select1[] = {622R_036F04_CHC_PERFCOUNTER0_SELECT1,623};624static struct ac_pc_block_base gfx10_CHC = {625.gpu_block = CHC,626.name = "CHC",627.num_counters = 4,628629.select0 = gfx10_CHC_select0,630.select1 = gfx10_CHC_select1,631.counter0_lo = R_034F00_CHC_PERFCOUNTER0_LO,632633.num_spm_counters = 1,634.num_spm_wires = 2,635.spm_block_select = 0xd,636};637638/* gfx10_DB */639static struct ac_pc_block_base gfx10_DB = {640.gpu_block = DB,641.name = "DB",642.num_counters = 4,643.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,644645.select0 = cik_DB_select0,646.select1 = cik_DB_select1,647.counter0_lo = R_035100_DB_PERFCOUNTER0_LO,648649.num_spm_counters = 2,650.num_spm_wires = 4,651.spm_block_select = 0x1,652};653654/* gfx10_GCR */655static unsigned gfx10_GCR_select0[] = {656R_037580_GCR_PERFCOUNTER0_SELECT,657R_037588_GCR_PERFCOUNTER1_SELECT,658};659static unsigned gfx10_GCR_select1[] = {660R_037584_GCR_PERFCOUNTER0_SELECT1,661};662static struct ac_pc_block_base gfx10_GCR = {663.gpu_block = GCR,664.name = "GCR",665.num_counters = 2,666667.select0 = gfx10_GCR_select0,668.select1 = gfx10_GCR_select1,669.counter0_lo = R_035480_GCR_PERFCOUNTER0_LO,670671.num_spm_counters = 1,672.num_spm_wires = 2,673.spm_block_select = 0x4,674};675676/* gfx10_GE */677static unsigned gfx10_GE_select0[] = {678R_036200_GE_PERFCOUNTER0_SELECT,679R_036208_GE_PERFCOUNTER1_SELECT,680R_036210_GE_PERFCOUNTER2_SELECT,681R_036218_GE_PERFCOUNTER3_SELECT,682R_036220_GE_PERFCOUNTER4_SELECT,683R_036228_GE_PERFCOUNTER5_SELECT,684R_036230_GE_PERFCOUNTER6_SELECT,685R_036238_GE_PERFCOUNTER7_SELECT,686R_036240_GE_PERFCOUNTER8_SELECT,687R_036248_GE_PERFCOUNTER9_SELECT,688R_036250_GE_PERFCOUNTER10_SELECT,689R_036258_GE_PERFCOUNTER11_SELECT,690};691static unsigned gfx10_GE_select1[] = {692R_036204_GE_PERFCOUNTER0_SELECT1,693R_03620C_GE_PERFCOUNTER1_SELECT1,694R_036214_GE_PERFCOUNTER2_SELECT1,695R_03621C_GE_PERFCOUNTER3_SELECT1,696};697static struct ac_pc_block_base gfx10_GE = {698.gpu_block = GE,699.name = "GE",700.num_counters = 12,701702.select0 = gfx10_GE_select0,703.select1 = gfx10_GE_select1,704.counter0_lo = R_034200_GE_PERFCOUNTER0_LO,705706.num_spm_counters = 4,707.num_spm_wires = 8,708.spm_block_select = 0x6,709};710711/* gfx10_GL1A */712static unsigned gfx10_GL1A_select0[] = {713R_037700_GL1A_PERFCOUNTER0_SELECT,714R_037708_GL1A_PERFCOUNTER1_SELECT,715R_03770C_GL1A_PERFCOUNTER2_SELECT,716R_037710_GL1A_PERFCOUNTER3_SELECT,717};718static unsigned gfx10_GL1A_select1[] = {719R_037704_GL1A_PERFCOUNTER0_SELECT1,720};721static struct ac_pc_block_base gfx10_GL1A = {722.gpu_block = GL1A,723.name = "GL1A",724.num_counters = 4,725.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,726727.select0 = gfx10_GL1A_select0,728.select1 = gfx10_GL1A_select1,729.counter0_lo = R_035700_GL1A_PERFCOUNTER0_LO,730731.num_spm_counters = 1,732.num_spm_wires = 2,733.spm_block_select = 0xa,734};735736/* gfx10_GL1C */737static unsigned gfx10_GL1C_select0[] = {738R_036E80_GL1C_PERFCOUNTER0_SELECT,739R_036E88_GL1C_PERFCOUNTER1_SELECT,740R_036E8C_GL1C_PERFCOUNTER2_SELECT,741R_036E90_GL1C_PERFCOUNTER3_SELECT,742};743static unsigned gfx10_GL1C_select1[] = {744R_036E84_GL1C_PERFCOUNTER0_SELECT1,745};746static struct ac_pc_block_base gfx10_GL1C = {747.gpu_block = GL1C,748.name = "GL1C",749.num_counters = 4,750.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,751752.select0 = gfx10_GL1C_select0,753.select1 = gfx10_GL1C_select1,754.counter0_lo = R_034E80_GL1C_PERFCOUNTER0_LO,755756.num_spm_counters = 1,757.num_spm_wires = 2,758.spm_block_select = 0xc759};760761/* gfx10_GL2A */762static unsigned gfx10_GL2A_select0[] = {763R_036E40_GL2A_PERFCOUNTER0_SELECT,764R_036E48_GL2A_PERFCOUNTER1_SELECT,765R_036E50_GL2A_PERFCOUNTER2_SELECT,766R_036E54_GL2A_PERFCOUNTER3_SELECT,767};768static unsigned gfx10_GL2A_select1[] = {769R_036E44_GL2A_PERFCOUNTER0_SELECT1,770R_036E4C_GL2A_PERFCOUNTER1_SELECT1,771};772static struct ac_pc_block_base gfx10_GL2A = {773.gpu_block = GL2A,774.name = "GL2A",775.num_counters = 4,776777.select0 = gfx10_GL2A_select0,778.select1 = gfx10_GL2A_select1,779.counter0_lo = R_034E40_GL2A_PERFCOUNTER0_LO,780781.num_spm_counters = 2,782.num_spm_wires = 4,783.spm_block_select = 0x7,784};785786/* gfx10_GL2C */787static unsigned gfx10_GL2C_select0[] = {788R_036E00_GL2C_PERFCOUNTER0_SELECT,789R_036E08_GL2C_PERFCOUNTER1_SELECT,790R_036E10_GL2C_PERFCOUNTER2_SELECT,791R_036E14_GL2C_PERFCOUNTER3_SELECT,792};793static unsigned gfx10_GL2C_select1[] = {794R_036E04_GL2C_PERFCOUNTER0_SELECT1,795R_036E0C_GL2C_PERFCOUNTER1_SELECT1,796};797static struct ac_pc_block_base gfx10_GL2C = {798.gpu_block = GL2C,799.name = "GL2C",800.num_counters = 4,801802.select0 = gfx10_GL2C_select0,803.select1 = gfx10_GL2C_select1,804.counter0_lo = R_034E00_GL2C_PERFCOUNTER0_LO,805806.num_spm_counters = 2,807.num_spm_wires = 4,808.spm_block_select = 0x8,809};810811/* gfx10_PA_PH */812static unsigned gfx10_PA_PH_select0[] = {813R_037600_PA_PH_PERFCOUNTER0_SELECT,814R_037608_PA_PH_PERFCOUNTER1_SELECT,815R_03760C_PA_PH_PERFCOUNTER2_SELECT,816R_037610_PA_PH_PERFCOUNTER3_SELECT,817R_037614_PA_PH_PERFCOUNTER4_SELECT,818R_037618_PA_PH_PERFCOUNTER5_SELECT,819R_03761C_PA_PH_PERFCOUNTER6_SELECT,820R_037620_PA_PH_PERFCOUNTER7_SELECT,821};822static unsigned gfx10_PA_PH_select1[] = {823R_037604_PA_PH_PERFCOUNTER0_SELECT1,824R_037640_PA_PH_PERFCOUNTER1_SELECT1,825R_037644_PA_PH_PERFCOUNTER2_SELECT1,826R_037648_PA_PH_PERFCOUNTER3_SELECT1,827};828static struct ac_pc_block_base gfx10_PA_PH = {829.gpu_block = PA_PH,830.name = "PA_PH",831.num_counters = 8,832.flags = AC_PC_BLOCK_SE,833834.select0 = gfx10_PA_PH_select0,835.select1 = gfx10_PA_PH_select1,836.counter0_lo = R_035600_PA_PH_PERFCOUNTER0_LO,837838.num_spm_counters = 4,839.num_spm_wires = 8,840.spm_block_select = 0x5,841};842843/* gfx10_PA_SU */844static unsigned gfx10_PA_SU_select0[] = {845R_036400_PA_SU_PERFCOUNTER0_SELECT,846R_036408_PA_SU_PERFCOUNTER1_SELECT,847R_036410_PA_SU_PERFCOUNTER2_SELECT,848R_036418_PA_SU_PERFCOUNTER3_SELECT,849};850static unsigned gfx10_PA_SU_select1[] = {851R_036404_PA_SU_PERFCOUNTER0_SELECT1,852R_03640C_PA_SU_PERFCOUNTER1_SELECT1,853R_036414_PA_SU_PERFCOUNTER2_SELECT1,854R_03641C_PA_SU_PERFCOUNTER3_SELECT1,855};856static struct ac_pc_block_base gfx10_PA_SU = {857.gpu_block = PA_SU,858.name = "PA_SU",859.num_counters = 4,860.flags = AC_PC_BLOCK_SE,861862.select0 = gfx10_PA_SU_select0,863.select1 = gfx10_PA_SU_select1,864.counter0_lo = R_034400_PA_SU_PERFCOUNTER0_LO,865866.num_spm_counters = 4,867.num_spm_wires = 8,868.spm_block_select = 0x2,869};870871/* gfx10_RLC */872static unsigned gfx10_RLC_select0[] = {873R_037304_RLC_PERFCOUNTER0_SELECT,874R_037308_RLC_PERFCOUNTER1_SELECT,875};876static struct ac_pc_block_base gfx10_RLC = {877.gpu_block = RLC,878.name = "RLC",879.num_counters = 2,880881.select0 = gfx10_RLC_select0,882.counter0_lo = R_035200_RLC_PERFCOUNTER0_LO,883.num_spm_counters = 0,884};885886/* gfx10_RMI */887static unsigned gfx10_RMI_select0[] = {888R_037400_RMI_PERFCOUNTER0_SELECT,889R_037408_RMI_PERFCOUNTER1_SELECT,890R_03740C_RMI_PERFCOUNTER2_SELECT,891R_037414_RMI_PERFCOUNTER3_SELECT,892};893static unsigned gfx10_RMI_select1[] = {894R_037404_RMI_PERFCOUNTER0_SELECT1,895R_037410_RMI_PERFCOUNTER2_SELECT1,896};897static struct ac_pc_block_base gfx10_RMI = {898.gpu_block = RMI,899.name = "RMI",900.num_counters = 4,901.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS,902903.select0 = gfx10_RMI_select0,904.select1 = gfx10_RMI_select1,905.counter0_lo = R_035300_RMI_PERFCOUNTER0_LO,906907.num_spm_counters = 2,908.num_spm_wires = 2,909.spm_block_select = 0xb,910};911912/* gfx10_SQ */913static struct ac_pc_block_base gfx10_SQ = {914.gpu_block = SQ,915.name = "SQ",916.num_counters = 16,917.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER,918919.select0 = cik_SQ_select0,920.select_or = S_036700_SQC_BANK_MASK(15),921.counter0_lo = R_034700_SQ_PERFCOUNTER0_LO,922923.num_spm_wires = 16,924.spm_block_select = 0x9,925};926927/* gfx10_TCP */928static struct ac_pc_block_base gfx10_TCP = {929.gpu_block = TCP,930.name = "TCP",931.num_counters = 4,932.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_INSTANCE_GROUPS | AC_PC_BLOCK_SHADER_WINDOWED,933934.select0 = cik_TCP_select0,935.select1 = cik_TCP_select1,936.counter0_lo = R_034D00_TCP_PERFCOUNTER0_LO,937938.num_spm_counters = 2,939.num_spm_wires = 4,940.spm_block_select = 0x7,941};942943/* gfx10_UTCL1 */944static unsigned gfx10_UTCL1_select0[] = {945R_03758C_UTCL1_PERFCOUNTER0_SELECT,946R_037590_UTCL1_PERFCOUNTER1_SELECT,947};948static struct ac_pc_block_base gfx10_UTCL1 = {949.gpu_block = UTCL1,950.name = "UTCL1",951.num_counters = 2,952.flags = AC_PC_BLOCK_SE | AC_PC_BLOCK_SHADER_WINDOWED,953954.select0 = gfx10_UTCL1_select0,955.counter0_lo = R_035470_UTCL1_PERFCOUNTER0_LO,956.num_spm_counters = 0,957};958959/* Both the number of instances and selectors varies between chips of the same960* class. We only differentiate by class here and simply expose the maximum961* number over all chips in a class.962*963* Unfortunately, GPUPerfStudio uses the order of performance counter groups964* blindly once it believes it has identified the hardware, so the order of965* blocks here matters.966*/967static struct ac_pc_block_gfxdescr groups_CIK[] = {968{&cik_CB, 226}, {&cik_CPF, 17}, {&cik_DB, 257}, {&cik_GRBM, 34}, {&cik_GRBMSE, 15},969{&cik_PA_SU, 153}, {&cik_PA_SC, 395}, {&cik_SPI, 186}, {&cik_SQ, 252}, {&cik_SX, 32},970{&cik_TA, 111}, {&cik_TCA, 39, 2}, {&cik_TCC, 160}, {&cik_TD, 55}, {&cik_TCP, 154},971{&cik_GDS, 121}, {&cik_VGT, 140}, {&cik_IA, 22}, {&cik_MC, 22}, {&cik_SRBM, 19},972{&cik_WD, 22}, {&cik_CPG, 46}, {&cik_CPC, 22},973974};975976static struct ac_pc_block_gfxdescr groups_VI[] = {977{&cik_CB, 405}, {&cik_CPF, 19}, {&cik_DB, 257}, {&cik_GRBM, 34}, {&cik_GRBMSE, 15},978{&cik_PA_SU, 154}, {&cik_PA_SC, 397}, {&cik_SPI, 197}, {&cik_SQ, 273}, {&cik_SX, 34},979{&cik_TA, 119}, {&cik_TCA, 35, 2}, {&cik_TCC, 192}, {&cik_TD, 55}, {&cik_TCP, 180},980{&cik_GDS, 121}, {&cik_VGT, 147}, {&cik_IA, 24}, {&cik_MC, 22}, {&cik_SRBM, 27},981{&cik_WD, 37}, {&cik_CPG, 48}, {&cik_CPC, 24},982983};984985static struct ac_pc_block_gfxdescr groups_gfx9[] = {986{&cik_CB, 438}, {&cik_CPF, 32}, {&cik_DB, 328}, {&cik_GRBM, 38}, {&cik_GRBMSE, 16},987{&cik_PA_SU, 292}, {&cik_PA_SC, 491}, {&cik_SPI, 196}, {&cik_SQ, 374}, {&cik_SX, 208},988{&cik_TA, 119}, {&cik_TCA, 35, 2}, {&cik_TCC, 256}, {&cik_TD, 57}, {&cik_TCP, 85},989{&cik_GDS, 121}, {&cik_VGT, 148}, {&cik_IA, 32}, {&cik_WD, 58}, {&cik_CPG, 59},990{&cik_CPC, 35},991};992993static struct ac_pc_block_gfxdescr groups_gfx10[] = {994{&cik_CB, 461},995{&gfx10_CHA, 45},996{&gfx10_CHCG, 35},997{&gfx10_CHC, 35},998{&cik_CPC, 47},999{&cik_CPF, 40},1000{&cik_CPG, 82},1001{&gfx10_DB, 370},1002{&gfx10_GCR, 94},1003{&cik_GDS, 123},1004{&gfx10_GE, 315},1005{&gfx10_GL1A, 36},1006{&gfx10_GL1C, 64},1007{&gfx10_GL2A, 91},1008{&gfx10_GL2C, 235},1009{&cik_GRBM, 47},1010{&cik_GRBMSE, 19},1011{&gfx10_PA_PH, 960},1012{&cik_PA_SC, 552},1013{&gfx10_PA_SU, 266},1014{&gfx10_RLC, 7},1015{&gfx10_RMI, 258},1016{&cik_SPI, 329},1017{&gfx10_SQ, 509},1018{&cik_SX, 225},1019{&cik_TA, 226},1020{&gfx10_TCP, 77},1021{&cik_TD, 61},1022{&gfx10_UTCL1, 15},1023};10241025struct ac_pc_block *ac_lookup_counter(const struct ac_perfcounters *pc,1026unsigned index, unsigned *base_gid,1027unsigned *sub_index)1028{1029struct ac_pc_block *block = pc->blocks;1030unsigned bid;10311032*base_gid = 0;1033for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {1034unsigned total = block->num_groups * block->b->selectors;10351036if (index < total) {1037*sub_index = index;1038return block;1039}10401041index -= total;1042*base_gid += block->num_groups;1043}10441045return NULL;1046}10471048struct ac_pc_block *ac_lookup_group(const struct ac_perfcounters *pc,1049unsigned *index)1050{1051unsigned bid;1052struct ac_pc_block *block = pc->blocks;10531054for (bid = 0; bid < pc->num_blocks; ++bid, ++block) {1055if (*index < block->num_groups)1056return block;1057*index -= block->num_groups;1058}10591060return NULL;1061}10621063bool ac_init_block_names(const struct radeon_info *info,1064const struct ac_perfcounters *pc,1065struct ac_pc_block *block)1066{1067bool per_instance_groups = ac_pc_block_has_per_instance_groups(pc, block);1068bool per_se_groups = ac_pc_block_has_per_se_groups(pc, block);1069unsigned i, j, k;1070unsigned groups_shader = 1, groups_se = 1, groups_instance = 1;1071unsigned namelen;1072char *groupname;1073char *p;10741075if (per_instance_groups)1076groups_instance = block->num_instances;1077if (per_se_groups)1078groups_se = info->max_se;1079if (block->b->b->flags & AC_PC_BLOCK_SHADER)1080groups_shader = ARRAY_SIZE(ac_pc_shader_type_bits);10811082namelen = strlen(block->b->b->name);1083block->group_name_stride = namelen + 1;1084if (block->b->b->flags & AC_PC_BLOCK_SHADER)1085block->group_name_stride += 3;1086if (per_se_groups) {1087assert(groups_se <= 10);1088block->group_name_stride += 1;10891090if (per_instance_groups)1091block->group_name_stride += 1;1092}1093if (per_instance_groups) {1094assert(groups_instance <= 100);1095block->group_name_stride += 2;1096}10971098block->group_names = MALLOC(block->num_groups * block->group_name_stride);1099if (!block->group_names)1100return false;11011102groupname = block->group_names;1103for (i = 0; i < groups_shader; ++i) {1104const char *shader_suffix = ac_pc_shader_type_suffixes[i];1105unsigned shaderlen = strlen(shader_suffix);1106for (j = 0; j < groups_se; ++j) {1107for (k = 0; k < groups_instance; ++k) {1108strcpy(groupname, block->b->b->name);1109p = groupname + namelen;11101111if (block->b->b->flags & AC_PC_BLOCK_SHADER) {1112strcpy(p, shader_suffix);1113p += shaderlen;1114}11151116if (per_se_groups) {1117p += sprintf(p, "%d", j);1118if (per_instance_groups)1119*p++ = '_';1120}11211122if (per_instance_groups)1123p += sprintf(p, "%d", k);11241125groupname += block->group_name_stride;1126}1127}1128}11291130assert(block->b->selectors <= 1000);1131block->selector_name_stride = block->group_name_stride + 4;1132block->selector_names =1133MALLOC(block->num_groups * block->b->selectors * block->selector_name_stride);1134if (!block->selector_names)1135return false;11361137groupname = block->group_names;1138p = block->selector_names;1139for (i = 0; i < block->num_groups; ++i) {1140for (j = 0; j < block->b->selectors; ++j) {1141sprintf(p, "%s_%03d", groupname, j);1142p += block->selector_name_stride;1143}1144groupname += block->group_name_stride;1145}11461147return true;1148}11491150bool ac_init_perfcounters(const struct radeon_info *info,1151bool separate_se,1152bool separate_instance,1153struct ac_perfcounters *pc)1154{1155const struct ac_pc_block_gfxdescr *blocks;1156unsigned num_blocks;11571158switch (info->chip_class) {1159case GFX7:1160blocks = groups_CIK;1161num_blocks = ARRAY_SIZE(groups_CIK);1162break;1163case GFX8:1164blocks = groups_VI;1165num_blocks = ARRAY_SIZE(groups_VI);1166break;1167case GFX9:1168blocks = groups_gfx9;1169num_blocks = ARRAY_SIZE(groups_gfx9);1170break;1171case GFX10:1172case GFX10_3:1173blocks = groups_gfx10;1174num_blocks = ARRAY_SIZE(groups_gfx10);1175break;1176case GFX6:1177default:1178return false; /* not implemented */1179}11801181pc->separate_se = separate_se;1182pc->separate_instance = separate_instance;11831184pc->blocks = CALLOC(num_blocks, sizeof(struct ac_pc_block));1185if (!pc->blocks)1186return false;1187pc->num_blocks = num_blocks;11881189for (unsigned i = 0; i < num_blocks; i++) {1190struct ac_pc_block *block = &pc->blocks[i];11911192block->b = &blocks[i];1193block->num_instances = MAX2(1, block->b->instances);11941195if (!strcmp(block->b->b->name, "CB") ||1196!strcmp(block->b->b->name, "DB") ||1197!strcmp(block->b->b->name, "RMI"))1198block->num_instances = info->max_se;1199else if (!strcmp(block->b->b->name, "TCC"))1200block->num_instances = info->max_tcc_blocks;1201else if (!strcmp(block->b->b->name, "IA"))1202block->num_instances = MAX2(1, info->max_se / 2);1203else if (!strcmp(block->b->b->name, "TA") ||1204!strcmp(block->b->b->name, "TCP") ||1205!strcmp(block->b->b->name, "TD")) {1206block->num_instances = MAX2(1, info->max_good_cu_per_sa);1207}12081209if (ac_pc_block_has_per_instance_groups(pc, block)) {1210block->num_groups = block->num_instances;1211} else {1212block->num_groups = 1;1213}12141215if (ac_pc_block_has_per_se_groups(pc, block))1216block->num_groups *= info->max_se;1217if (block->b->b->flags & AC_PC_BLOCK_SHADER)1218block->num_groups *= ARRAY_SIZE(ac_pc_shader_type_bits);12191220pc->num_groups += block->num_groups;1221}12221223return true;1224}12251226void ac_destroy_perfcounters(struct ac_perfcounters *pc)1227{1228if (!pc)1229return;12301231for (unsigned i = 0; i < pc->num_blocks; ++i) {1232FREE(pc->blocks[i].group_names);1233FREE(pc->blocks[i].selector_names);1234}1235FREE(pc->blocks);1236}123712381239