Path: blob/21.2-virgl/src/gallium/drivers/nouveau/nvc0/nvc0_query_hw_metric.c
4574 views
/*1* Copyright 2015 Samuel Pitoiset2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice shall be included in11* all copies or substantial portions of the Software.12*13* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR14* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,15* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL16* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR17* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,18* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR19* OTHER DEALINGS IN THE SOFTWARE.20*/2122#include "nvc0/nvc0_context.h"23#include "nvc0/nvc0_query_hw_metric.h"24#include "nvc0/nvc0_query_hw_sm.h"2526#define _Q(i,n,t,d) { NVC0_HW_METRIC_QUERY_##i, n, PIPE_DRIVER_QUERY_TYPE_##t, d }27static const struct nvc0_hw_metric_cfg {28unsigned id;29const char *name;30enum pipe_driver_query_type type;31const char *desc;32} nvc0_hw_metric_queries[] = {33_Q(ACHIEVED_OCCUPANCY,34"metric-achieved_occupancy",35PERCENTAGE,36"Ratio of the average active warps per active cycle to the maximum "37"number of warps supported on a multiprocessor"),3839_Q(BRANCH_EFFICIENCY,40"metric-branch_efficiency",41PERCENTAGE,42"Ratio of non-divergent branches to total branches"),4344_Q(INST_ISSUED,45"metric-inst_issued",46UINT64,47"The number of instructions issued"),4849_Q(INST_PER_WRAP,50"metric-inst_per_wrap",51UINT64,52"Average number of instructions executed by each warp"),5354_Q(INST_REPLAY_OVERHEAD,55"metric-inst_replay_overhead",56UINT64,57"Average number of replays for each instruction executed"),5859_Q(ISSUED_IPC,60"metric-issued_ipc",61UINT64,62"Instructions issued per cycle"),6364_Q(ISSUE_SLOTS,65"metric-issue_slots",66UINT64,67"The number of issue slots used"),6869_Q(ISSUE_SLOT_UTILIZATION,70"metric-issue_slot_utilization",71PERCENTAGE,72"Percentage of issue slots that issued at least one instruction, "73"averaged across all cycles"),7475_Q(IPC,76"metric-ipc",77UINT64,78"Instructions executed per cycle"),7980_Q(SHARED_REPLAY_OVERHEAD,81"metric-shared_replay_overhead",82UINT64,83"Average number of replays due to shared memory conflicts for each "84"instruction executed"),8586_Q(WARP_EXECUTION_EFFICIENCY,87"metric-warp_execution_efficiency",88PERCENTAGE,89"Ratio of the average active threads per warp to the maximum number of "90"threads per warp supported on a multiprocessor"),9192_Q(WARP_NONPRED_EXECUTION_EFFICIENCY,93"metric-warp_nonpred_execution_efficiency",94PERCENTAGE,95"Ratio of the average active threads per warp executing non-predicated "96"instructions to the maximum number of threads per warp supported on a "97"multiprocessor"),98};99100#undef _Q101102static inline const struct nvc0_hw_metric_cfg *103nvc0_hw_metric_get_cfg(unsigned metric_id)104{105unsigned i;106107for (i = 0; i < ARRAY_SIZE(nvc0_hw_metric_queries); i++) {108if (nvc0_hw_metric_queries[i].id == metric_id)109return &nvc0_hw_metric_queries[i];110}111assert(0);112return NULL;113}114115struct nvc0_hw_metric_query_cfg {116unsigned type;117uint32_t queries[8];118uint32_t num_queries;119};120121#define _SM(n) NVC0_HW_SM_QUERY(NVC0_HW_SM_QUERY_ ##n)122123/* ==== Compute capability 2.0 (GF100/GF110) ==== */124static const struct nvc0_hw_metric_query_cfg125sm20_achieved_occupancy =126{127.type = NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY,128.queries[0] = _SM(ACTIVE_WARPS),129.queries[1] = _SM(ACTIVE_CYCLES),130.num_queries = 2,131};132133static const struct nvc0_hw_metric_query_cfg134sm20_branch_efficiency =135{136.type = NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY,137.queries[0] = _SM(BRANCH),138.queries[1] = _SM(DIVERGENT_BRANCH),139.num_queries = 2,140};141142static const struct nvc0_hw_metric_query_cfg143sm20_inst_per_wrap =144{145.type = NVC0_HW_METRIC_QUERY_INST_PER_WRAP,146.queries[0] = _SM(INST_EXECUTED),147.queries[1] = _SM(WARPS_LAUNCHED),148.num_queries = 2,149};150151static const struct nvc0_hw_metric_query_cfg152sm20_inst_replay_overhead =153{154.type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,155.queries[0] = _SM(INST_ISSUED),156.queries[1] = _SM(INST_EXECUTED),157.num_queries = 2,158};159160static const struct nvc0_hw_metric_query_cfg161sm20_issued_ipc =162{163.type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,164.queries[0] = _SM(INST_ISSUED),165.queries[1] = _SM(ACTIVE_CYCLES),166.num_queries = 2,167};168169static const struct nvc0_hw_metric_query_cfg170sm20_issue_slot_utilization =171{172.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,173.queries[0] = _SM(INST_ISSUED),174.queries[1] = _SM(ACTIVE_CYCLES),175.num_queries = 2,176};177178static const struct nvc0_hw_metric_query_cfg179sm20_ipc =180{181.type = NVC0_HW_METRIC_QUERY_IPC,182.queries[0] = _SM(INST_EXECUTED),183.queries[1] = _SM(ACTIVE_CYCLES),184.num_queries = 2,185};186187static const struct nvc0_hw_metric_query_cfg *sm20_hw_metric_queries[] =188{189&sm20_achieved_occupancy,190&sm20_branch_efficiency,191&sm20_inst_per_wrap,192&sm20_inst_replay_overhead,193&sm20_ipc,194&sm20_issued_ipc,195&sm20_issue_slot_utilization,196};197198/* ==== Compute capability 2.1 (GF108+ except GF110) ==== */199static const struct nvc0_hw_metric_query_cfg200sm21_inst_issued =201{202.type = NVC0_HW_METRIC_QUERY_INST_ISSUED,203.queries[0] = _SM(INST_ISSUED1_0),204.queries[1] = _SM(INST_ISSUED1_1),205.queries[2] = _SM(INST_ISSUED2_0),206.queries[3] = _SM(INST_ISSUED2_1),207.num_queries = 4,208};209210static const struct nvc0_hw_metric_query_cfg211sm21_inst_replay_overhead =212{213.type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,214.queries[0] = _SM(INST_ISSUED1_0),215.queries[1] = _SM(INST_ISSUED1_1),216.queries[2] = _SM(INST_ISSUED2_0),217.queries[3] = _SM(INST_ISSUED2_1),218.queries[4] = _SM(INST_EXECUTED),219.num_queries = 5,220};221222static const struct nvc0_hw_metric_query_cfg223sm21_issued_ipc =224{225.type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,226.queries[0] = _SM(INST_ISSUED1_0),227.queries[1] = _SM(INST_ISSUED1_1),228.queries[2] = _SM(INST_ISSUED2_0),229.queries[3] = _SM(INST_ISSUED2_1),230.queries[4] = _SM(ACTIVE_CYCLES),231.num_queries = 5,232};233234static const struct nvc0_hw_metric_query_cfg235sm21_issue_slots =236{237.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,238.queries[0] = _SM(INST_ISSUED1_0),239.queries[1] = _SM(INST_ISSUED1_1),240.queries[2] = _SM(INST_ISSUED2_0),241.queries[3] = _SM(INST_ISSUED2_1),242.num_queries = 4,243};244245static const struct nvc0_hw_metric_query_cfg246sm21_issue_slot_utilization =247{248.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,249.queries[0] = _SM(INST_ISSUED1_0),250.queries[1] = _SM(INST_ISSUED1_1),251.queries[2] = _SM(INST_ISSUED2_0),252.queries[3] = _SM(INST_ISSUED2_1),253.queries[4] = _SM(ACTIVE_CYCLES),254.num_queries = 5,255};256257static const struct nvc0_hw_metric_query_cfg *sm21_hw_metric_queries[] =258{259&sm20_achieved_occupancy,260&sm20_branch_efficiency,261&sm21_inst_issued,262&sm20_inst_per_wrap,263&sm21_inst_replay_overhead,264&sm20_ipc,265&sm21_issued_ipc,266&sm21_issue_slots,267&sm21_issue_slot_utilization,268};269270/* ==== Compute capability 3.0 (GK104/GK106/GK107) ==== */271static const struct nvc0_hw_metric_query_cfg272sm30_inst_issued =273{274.type = NVC0_HW_METRIC_QUERY_INST_ISSUED,275.queries[0] = _SM(INST_ISSUED1),276.queries[1] = _SM(INST_ISSUED2),277.num_queries = 2,278};279280static const struct nvc0_hw_metric_query_cfg281sm30_inst_replay_overhead =282{283.type = NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD,284.queries[0] = _SM(INST_ISSUED1),285.queries[1] = _SM(INST_ISSUED2),286.queries[2] = _SM(INST_EXECUTED),287.num_queries = 3,288};289290static const struct nvc0_hw_metric_query_cfg291sm30_issued_ipc =292{293.type = NVC0_HW_METRIC_QUERY_ISSUED_IPC,294.queries[0] = _SM(INST_ISSUED1),295.queries[1] = _SM(INST_ISSUED2),296.queries[2] = _SM(ACTIVE_CYCLES),297.num_queries = 3,298};299300static const struct nvc0_hw_metric_query_cfg301sm30_issue_slots =302{303.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOTS,304.queries[0] = _SM(INST_ISSUED1),305.queries[1] = _SM(INST_ISSUED2),306.num_queries = 2,307};308309static const struct nvc0_hw_metric_query_cfg310sm30_issue_slot_utilization =311{312.type = NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION,313.queries[0] = _SM(INST_ISSUED1),314.queries[1] = _SM(INST_ISSUED2),315.queries[2] = _SM(ACTIVE_CYCLES),316.num_queries = 3,317};318319static const struct nvc0_hw_metric_query_cfg320sm30_shared_replay_overhead =321{322.type = NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD,323.queries[0] = _SM(SHARED_LD_REPLAY),324.queries[1] = _SM(SHARED_ST_REPLAY),325.queries[2] = _SM(INST_EXECUTED),326.num_queries = 3,327};328329static const struct nvc0_hw_metric_query_cfg330sm30_warp_execution_efficiency =331{332.type = NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY,333.queries[0] = _SM(INST_EXECUTED),334.queries[1] = _SM(TH_INST_EXECUTED),335.num_queries = 2,336};337338static const struct nvc0_hw_metric_query_cfg *sm30_hw_metric_queries[] =339{340&sm20_achieved_occupancy,341&sm20_branch_efficiency,342&sm30_inst_issued,343&sm20_inst_per_wrap,344&sm30_inst_replay_overhead,345&sm20_ipc,346&sm30_issued_ipc,347&sm30_issue_slots,348&sm30_issue_slot_utilization,349&sm30_shared_replay_overhead,350&sm30_warp_execution_efficiency,351};352353/* ==== Compute capability 3.5 (GK110/GK208) ==== */354static const struct nvc0_hw_metric_query_cfg355sm35_warp_nonpred_execution_efficiency =356{357.type = NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY,358.queries[0] = _SM(INST_EXECUTED),359.queries[1] = _SM(NOT_PRED_OFF_INST_EXECUTED),360.num_queries = 2,361};362363static const struct nvc0_hw_metric_query_cfg *sm35_hw_metric_queries[] =364{365&sm20_achieved_occupancy,366&sm30_inst_issued,367&sm20_inst_per_wrap,368&sm30_inst_replay_overhead,369&sm20_ipc,370&sm30_issued_ipc,371&sm30_issue_slots,372&sm30_issue_slot_utilization,373&sm30_shared_replay_overhead,374&sm30_warp_execution_efficiency,375&sm35_warp_nonpred_execution_efficiency,376};377378/* ==== Compute capability 5.0 (GM107/GM108) ==== */379static const struct nvc0_hw_metric_query_cfg *sm50_hw_metric_queries[] =380{381&sm20_achieved_occupancy,382&sm20_branch_efficiency,383&sm30_inst_issued,384&sm20_inst_per_wrap,385&sm30_inst_replay_overhead,386&sm20_ipc,387&sm30_issued_ipc,388&sm30_issue_slots,389&sm30_issue_slot_utilization,390&sm30_warp_execution_efficiency,391&sm35_warp_nonpred_execution_efficiency,392};393394#undef _SM395396static inline const struct nvc0_hw_metric_query_cfg **397nvc0_hw_metric_get_queries(struct nvc0_screen *screen)398{399struct nouveau_device *dev = screen->base.device;400401switch (screen->base.class_3d) {402case GM200_3D_CLASS:403case GM107_3D_CLASS:404return sm50_hw_metric_queries;405case NVF0_3D_CLASS:406return sm35_hw_metric_queries;407case NVE4_3D_CLASS:408return sm30_hw_metric_queries;409case NVC0_3D_CLASS:410case NVC1_3D_CLASS:411case NVC8_3D_CLASS:412if (dev->chipset == 0xc0 || dev->chipset == 0xc8)413return sm20_hw_metric_queries;414return sm21_hw_metric_queries;415}416assert(0);417return NULL;418}419420unsigned421nvc0_hw_metric_get_num_queries(struct nvc0_screen *screen)422{423struct nouveau_device *dev = screen->base.device;424425switch (screen->base.class_3d) {426case GM200_3D_CLASS:427case GM107_3D_CLASS:428return ARRAY_SIZE(sm50_hw_metric_queries);429case NVF0_3D_CLASS:430return ARRAY_SIZE(sm35_hw_metric_queries);431case NVE4_3D_CLASS:432return ARRAY_SIZE(sm30_hw_metric_queries);433case NVC0_3D_CLASS:434case NVC1_3D_CLASS:435case NVC8_3D_CLASS:436if (dev->chipset == 0xc0 || dev->chipset == 0xc8)437return ARRAY_SIZE(sm20_hw_metric_queries);438return ARRAY_SIZE(sm21_hw_metric_queries);439}440return 0;441}442443static const struct nvc0_hw_metric_query_cfg *444nvc0_hw_metric_query_get_cfg(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)445{446const struct nvc0_hw_metric_query_cfg **queries;447struct nvc0_screen *screen = nvc0->screen;448struct nvc0_query *q = &hq->base;449unsigned num_queries;450unsigned i;451452num_queries = nvc0_hw_metric_get_num_queries(screen);453queries = nvc0_hw_metric_get_queries(screen);454455for (i = 0; i < num_queries; i++) {456if (NVC0_HW_METRIC_QUERY(queries[i]->type) == q->type)457return queries[i];458}459assert(0);460return NULL;461}462463static void464nvc0_hw_metric_destroy_query(struct nvc0_context *nvc0,465struct nvc0_hw_query *hq)466{467struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);468unsigned i;469470for (i = 0; i < hmq->num_queries; i++)471if (hmq->queries[i]->funcs->destroy_query)472hmq->queries[i]->funcs->destroy_query(nvc0, hmq->queries[i]);473FREE(hmq);474}475476static bool477nvc0_hw_metric_begin_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)478{479struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);480bool ret = false;481unsigned i;482483for (i = 0; i < hmq->num_queries; i++) {484ret = hmq->queries[i]->funcs->begin_query(nvc0, hmq->queries[i]);485if (!ret)486return ret;487}488return ret;489}490491static void492nvc0_hw_metric_end_query(struct nvc0_context *nvc0, struct nvc0_hw_query *hq)493{494struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);495unsigned i;496497for (i = 0; i < hmq->num_queries; i++)498hmq->queries[i]->funcs->end_query(nvc0, hmq->queries[i]);499}500501static uint64_t502sm20_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])503{504switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {505case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:506/* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */507if (res64[1])508return ((res64[0] / (double)res64[1]) / 48) * 100;509break;510case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:511/* (branch / (branch + divergent_branch)) * 100 */512if (res64[0] + res64[1])513return (res64[0] / (double)(res64[0] + res64[1])) * 100;514break;515case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:516/* inst_executed / warps_launched */517if (res64[1])518return res64[0] / (double)res64[1];519break;520case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:521/* (inst_issued - inst_executed) / inst_executed */522if (res64[1])523return (res64[0] - res64[1]) / (double)res64[1];524break;525case NVC0_HW_METRIC_QUERY_ISSUED_IPC:526/* inst_issued / active_cycles */527if (res64[1])528return res64[0] / (double)res64[1];529break;530case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:531/* ((inst_issued / 2) / active_cycles) * 100 */532if (res64[1])533return ((res64[0] / 2) / (double)res64[1]) * 100;534break;535case NVC0_HW_METRIC_QUERY_IPC:536/* inst_executed / active_cycles */537if (res64[1])538return res64[0] / (double)res64[1];539break;540default:541debug_printf("invalid metric type: %d\n",542hq->base.type - NVC0_HW_METRIC_QUERY(0));543break;544}545return 0;546}547548static uint64_t549sm21_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])550{551switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {552case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:553return sm20_hw_metric_calc_result(hq, res64);554case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:555return sm20_hw_metric_calc_result(hq, res64);556case NVC0_HW_METRIC_QUERY_INST_ISSUED:557/* issued1_0 + issued1_1 + (issued2_0 + issued2_1) * 2 */558return res64[0] + res64[1] + (res64[2] + res64[3]) * 2;559break;560case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:561return sm20_hw_metric_calc_result(hq, res64);562case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:563/* (metric-inst_issued - inst_executed) / inst_executed */564if (res64[4])565return (((res64[0] + res64[1] + (res64[2] + res64[3]) * 2) -566res64[4]) / (double)res64[4]);567break;568case NVC0_HW_METRIC_QUERY_ISSUED_IPC:569/* metric-inst_issued / active_cycles */570if (res64[4])571return (res64[0] + res64[1] + (res64[2] + res64[3]) * 2) /572(double)res64[4];573break;574case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:575/* issued1_0 + issued1_1 + issued2_0 + issued2_1 */576return res64[0] + res64[1] + res64[2] + res64[3];577break;578case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:579/* ((metric-issue_slots / 2) / active_cycles) * 100 */580if (res64[4])581return (((res64[0] + res64[1] + res64[2] + res64[3]) / 2) /582(double)res64[4]) * 100;583break;584case NVC0_HW_METRIC_QUERY_IPC:585return sm20_hw_metric_calc_result(hq, res64);586default:587debug_printf("invalid metric type: %d\n",588hq->base.type - NVC0_HW_METRIC_QUERY(0));589break;590}591return 0;592}593594static uint64_t595sm30_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])596{597switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {598case NVC0_HW_METRIC_QUERY_ACHIEVED_OCCUPANCY:599/* ((active_warps / active_cycles) / max. number of warps on a MP) * 100 */600if (res64[1])601return ((res64[0] / (double)res64[1]) / 64) * 100;602break;603case NVC0_HW_METRIC_QUERY_BRANCH_EFFICIENCY:604return sm20_hw_metric_calc_result(hq, res64);605case NVC0_HW_METRIC_QUERY_INST_ISSUED:606/* inst_issued1 + inst_issued2 * 2 */607return res64[0] + res64[1] * 2;608case NVC0_HW_METRIC_QUERY_INST_PER_WRAP:609return sm20_hw_metric_calc_result(hq, res64);610case NVC0_HW_METRIC_QUERY_INST_REPLAY_OVERHEAD:611/* (metric-inst_issued - inst_executed) / inst_executed */612if (res64[2])613return (((res64[0] + res64[1] * 2) - res64[2]) / (double)res64[2]);614break;615case NVC0_HW_METRIC_QUERY_ISSUED_IPC:616/* metric-inst_issued / active_cycles */617if (res64[2])618return (res64[0] + res64[1] * 2) / (double)res64[2];619break;620case NVC0_HW_METRIC_QUERY_ISSUE_SLOTS:621/* inst_issued1 + inst_issued2 */622return res64[0] + res64[1];623case NVC0_HW_METRIC_QUERY_ISSUE_SLOT_UTILIZATION:624/* ((metric-issue_slots / 2) / active_cycles) * 100 */625if (res64[2])626return (((res64[0] + res64[1]) / 2) / (double)res64[2]) * 100;627break;628case NVC0_HW_METRIC_QUERY_IPC:629return sm20_hw_metric_calc_result(hq, res64);630case NVC0_HW_METRIC_QUERY_SHARED_REPLAY_OVERHEAD:631/* (shared_load_replay + shared_store_replay) / inst_executed */632if (res64[2])633return (res64[0] + res64[1]) / (double)res64[2];634break;635case NVC0_HW_METRIC_QUERY_WARP_EXECUTION_EFFICIENCY:636/* thread_inst_executed / (inst_executed * max. number of threads per637* wrap) * 100 */638if (res64[0])639return (res64[1] / ((double)res64[0] * 32)) * 100;640break;641default:642debug_printf("invalid metric type: %d\n",643hq->base.type - NVC0_HW_METRIC_QUERY(0));644break;645}646return 0;647}648649static uint64_t650sm35_hw_metric_calc_result(struct nvc0_hw_query *hq, uint64_t res64[8])651{652switch (hq->base.type - NVC0_HW_METRIC_QUERY(0)) {653case NVC0_HW_METRIC_QUERY_WARP_NONPRED_EXECUTION_EFFICIENCY:654/* not_predicated_off_thread_inst_executed / (inst_executed * max. number655* of threads per wrap) * 100 */656if (res64[0])657return (res64[1] / ((double)res64[0] * 32)) * 100;658break;659default:660return sm30_hw_metric_calc_result(hq, res64);661}662return 0;663}664665static bool666nvc0_hw_metric_get_query_result(struct nvc0_context *nvc0,667struct nvc0_hw_query *hq, bool wait,668union pipe_query_result *result)669{670struct nvc0_hw_metric_query *hmq = nvc0_hw_metric_query(hq);671struct nvc0_screen *screen = nvc0->screen;672struct nouveau_device *dev = screen->base.device;673union pipe_query_result results[8] = {};674uint64_t res64[8] = {};675uint64_t value = 0;676bool ret = false;677unsigned i;678679for (i = 0; i < hmq->num_queries; i++) {680ret = hmq->queries[i]->funcs->get_query_result(nvc0, hmq->queries[i],681wait, &results[i]);682if (!ret)683return ret;684res64[i] = *(uint64_t *)&results[i];685}686687switch (screen->base.class_3d) {688case GM200_3D_CLASS:689case GM107_3D_CLASS:690case NVF0_3D_CLASS:691value = sm35_hw_metric_calc_result(hq, res64);692break;693case NVE4_3D_CLASS:694value = sm30_hw_metric_calc_result(hq, res64);695break;696default:697if (dev->chipset == 0xc0 || dev->chipset == 0xc8)698value = sm20_hw_metric_calc_result(hq, res64);699else700value = sm21_hw_metric_calc_result(hq, res64);701break;702}703704*(uint64_t *)result = value;705return ret;706}707708static const struct nvc0_hw_query_funcs hw_metric_query_funcs = {709.destroy_query = nvc0_hw_metric_destroy_query,710.begin_query = nvc0_hw_metric_begin_query,711.end_query = nvc0_hw_metric_end_query,712.get_query_result = nvc0_hw_metric_get_query_result,713};714715struct nvc0_hw_query *716nvc0_hw_metric_create_query(struct nvc0_context *nvc0, unsigned type)717{718const struct nvc0_hw_metric_query_cfg *cfg;719struct nvc0_hw_metric_query *hmq;720struct nvc0_hw_query *hq;721unsigned i;722723if (type < NVC0_HW_METRIC_QUERY(0) || type > NVC0_HW_METRIC_QUERY_LAST)724return NULL;725726hmq = CALLOC_STRUCT(nvc0_hw_metric_query);727if (!hmq)728return NULL;729730hq = &hmq->base;731hq->funcs = &hw_metric_query_funcs;732hq->base.type = type;733734cfg = nvc0_hw_metric_query_get_cfg(nvc0, hq);735736for (i = 0; i < cfg->num_queries; i++) {737hmq->queries[i] = nvc0_hw_sm_create_query(nvc0, cfg->queries[i]);738if (!hmq->queries[i]) {739nvc0_hw_metric_destroy_query(nvc0, hq);740return NULL;741}742hmq->num_queries++;743}744745return hq;746}747748int749nvc0_hw_metric_get_driver_query_info(struct nvc0_screen *screen, unsigned id,750struct pipe_driver_query_info *info)751{752int count = 0;753754if (screen->base.drm->version >= 0x01000101) {755if (screen->compute)756count = nvc0_hw_metric_get_num_queries(screen);757}758759if (!info)760return count;761762if (id < count) {763if (screen->compute) {764if (screen->base.class_3d <= GM200_3D_CLASS) {765const struct nvc0_hw_metric_query_cfg **queries =766nvc0_hw_metric_get_queries(screen);767const struct nvc0_hw_metric_cfg *cfg =768nvc0_hw_metric_get_cfg(queries[id]->type);769770info->name = cfg->name;771info->query_type = NVC0_HW_METRIC_QUERY(queries[id]->type);772info->type = cfg->type;773info->group_id = NVC0_HW_METRIC_QUERY_GROUP;774return 1;775}776}777}778return 0;779}780781782