Path: blob/21.2-virgl/src/freedreno/ds/fd_pps_driver.cc
4565 views
/*1* Copyright © 2021 Google, Inc.2*3* SPDX-License-Identifier: MIT4*/56#include "fd_pps_driver.h"78#include <cstring>9#include <iostream>10#include <perfetto.h>1112#include "pps/pps.h"13#include "pps/pps_algorithm.h"1415namespace pps16{1718uint64_t19FreedrenoDriver::get_min_sampling_period_ns()20{21return 100000;22}2324/*25TODO this sees like it would be largely the same for a5xx as well26(ie. same countable names)..27*/28void29FreedrenoDriver::setup_a6xx_counters()30{31/* TODO is there a reason to want more than one group? */32CounterGroup group = {};33group.name = "counters";34groups.clear();35counters.clear();36countables.clear();37enabled_counters.clear();38groups.emplace_back(std::move(group));3940/*41* Create the countables that we'll be using.42*/4344auto PERF_CP_ALWAYS_COUNT = countable("PERF_CP_ALWAYS_COUNT");45auto PERF_CP_BUSY_CYCLES = countable("PERF_CP_BUSY_CYCLES");46auto PERF_RB_3D_PIXELS = countable("PERF_RB_3D_PIXELS");47auto PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS");48auto PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS = countable("PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS");49auto PERF_TP_L1_CACHELINE_MISSES = countable("PERF_TP_L1_CACHELINE_MISSES");50auto PERF_SP_BUSY_CYCLES = countable("PERF_SP_BUSY_CYCLES");5152/*53* And then setup the derived counters that we are exporting to54* pps based on the captured countable values55*/5657counter("GPU Frequency", Counter::Units::Hertz, [=]() {58return PERF_CP_ALWAYS_COUNT / time;59}60);6162counter("GPU % Utilization", Counter::Units::Percent, [=]() {63return 100.0 * (PERF_CP_BUSY_CYCLES / time) / max_freq;64}65);6667// This one is a bit of a guess, but seems plausible..68counter("ALU / Fragment", Counter::Units::None, [=]() {69return (PERF_SP_FS_STAGE_FULL_ALU_INSTRUCTIONS +70PERF_SP_FS_STAGE_HALF_ALU_INSTRUCTIONS / 2) / PERF_RB_3D_PIXELS;71}72);7374counter("TP L1 Cache Misses", Counter::Units::None, [=]() {75return PERF_TP_L1_CACHELINE_MISSES / time;76}77);7879counter("Shader Core Utilization", Counter::Units::Percent, [=]() {80return 100.0 * (PERF_SP_BUSY_CYCLES / time) / (max_freq * info->num_sp_cores);81}82);8384// TODO add more.. see https://gpuinspector.dev/docs/gpu-counters/qualcomm85// for what blob exposes86}8788/**89* Generate an submit the cmdstream to configure the counter/countable90* muxing91*/92void93FreedrenoDriver::configure_counters(bool reset, bool wait)94{95struct fd_submit *submit = fd_submit_new(pipe);96enum fd_ringbuffer_flags flags =97(enum fd_ringbuffer_flags)(FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);98struct fd_ringbuffer *ring = fd_submit_new_ringbuffer(submit, 0x1000, flags);99100for (auto countable : countables)101countable.configure(ring, reset);102103struct fd_submit_fence fence = {};104util_queue_fence_init(&fence.ready);105106fd_submit_flush(submit, -1, &fence);107108util_queue_fence_wait(&fence.ready);109110fd_ringbuffer_del(ring);111fd_submit_del(submit);112113if (wait)114fd_pipe_wait(pipe, &fence.fence);115}116117/**118* Read the current counter values and record the time.119*/120void121FreedrenoDriver::collect_countables()122{123last_dump_ts = perfetto::base::GetBootTimeNs().count();124125for (auto countable : countables)126countable.collect();127}128129bool130FreedrenoDriver::init_perfcnt()131{132uint64_t val;133134dev = fd_device_new(drm_device.fd);135pipe = fd_pipe_new(dev, FD_PIPE_3D);136137if (fd_pipe_get_param(pipe, FD_GPU_ID, &val)) {138PERFETTO_FATAL("Could not get GPU_ID");139return false;140}141gpu_id = val;142143if (fd_pipe_get_param(pipe, FD_MAX_FREQ, &val)) {144PERFETTO_FATAL("Could not get MAX_FREQ");145return false;146}147max_freq = val;148149if (fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val)) {150PERFETTO_ILOG("Could not get SUSPEND_COUNT");151} else {152suspend_count = val;153has_suspend_count = true;154}155156perfcntrs = fd_perfcntrs(gpu_id, &num_perfcntrs);157if (num_perfcntrs == 0) {158PERFETTO_FATAL("No hw counters available");159return false;160}161162assigned_counters.resize(num_perfcntrs);163assigned_counters.assign(assigned_counters.size(), 0);164165switch (gpu_id) {166case 600 ... 699:167setup_a6xx_counters();168break;169default:170PERFETTO_FATAL("Unsupported GPU: a%03u", gpu_id);171return false;172}173174state.resize(next_countable_id);175176for (auto countable : countables)177countable.resolve();178179info = fd_dev_info(gpu_id);180181io = fd_dt_find_io();182if (!io) {183PERFETTO_FATAL("Could not map GPU I/O space");184return false;185}186187configure_counters(true, true);188collect_countables();189190return true;191}192193void194FreedrenoDriver::enable_counter(const uint32_t counter_id)195{196enabled_counters.push_back(counters[counter_id]);197}198199void200FreedrenoDriver::enable_all_counters()201{202enabled_counters.reserve(counters.size());203for (auto &counter : counters) {204enabled_counters.push_back(counter);205}206}207208void209FreedrenoDriver::enable_perfcnt(const uint64_t /* sampling_period_ns */)210{211}212213bool214FreedrenoDriver::dump_perfcnt()215{216if (has_suspend_count) {217uint64_t val;218219fd_pipe_get_param(pipe, FD_SUSPEND_COUNT, &val);220221if (suspend_count != val) {222PERFETTO_ILOG("Device had suspended!");223224suspend_count = val;225226configure_counters(true, true);227collect_countables();228229/* We aren't going to have anything sensible by comparing230* current values to values from prior to the suspend, so231* just skip this sampling period.232*/233return false;234}235}236237auto last_ts = last_dump_ts;238239/* Capture the timestamp from the *start* of the sampling period: */240last_capture_ts = last_dump_ts;241242collect_countables();243244auto elapsed_time_ns = last_dump_ts - last_ts;245246time = (float)elapsed_time_ns / 1000000000.0;247248/* On older kernels that dont' support querying the suspend-249* count, just send configuration cmdstream regularly to keep250* the GPU alive and correctly configured for the countables251* we want252*/253if (!has_suspend_count) {254configure_counters(false, false);255}256257return true;258}259260uint64_t FreedrenoDriver::next()261{262auto ret = last_capture_ts;263last_capture_ts = 0;264return ret;265}266267void FreedrenoDriver::disable_perfcnt()268{269/* There isn't really any disable, only reconfiguring which countables270* get muxed to which counters271*/272}273274/*275* Countable276*/277278FreedrenoDriver::Countable279FreedrenoDriver::countable(std::string name)280{281auto countable = Countable(this, name);282countables.emplace_back(countable);283return countable;284}285286FreedrenoDriver::Countable::Countable(FreedrenoDriver *d, std::string name)287: id {d->next_countable_id++}, d {d}, name {name}288{289}290291/* Emit register writes on ring to configure counter/countable muxing: */292void293FreedrenoDriver::Countable::configure(struct fd_ringbuffer *ring, bool reset)294{295const struct fd_perfcntr_countable *countable = d->state[id].countable;296const struct fd_perfcntr_counter *counter = d->state[id].counter;297298OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);299300if (counter->enable && reset) {301OUT_PKT4(ring, counter->enable, 1);302OUT_RING(ring, 0);303}304305if (counter->clear && reset) {306OUT_PKT4(ring, counter->clear, 1);307OUT_RING(ring, 1);308309OUT_PKT4(ring, counter->clear, 1);310OUT_RING(ring, 0);311}312313OUT_PKT4(ring, counter->select_reg, 1);314OUT_RING(ring, countable->selector);315316if (counter->enable && reset) {317OUT_PKT4(ring, counter->enable, 1);318OUT_RING(ring, 1);319}320}321322/* Collect current counter value and calculate delta since last sample: */323void324FreedrenoDriver::Countable::collect()325{326const struct fd_perfcntr_counter *counter = d->state[id].counter;327328d->state[id].last_value = d->state[id].value;329330uint32_t *reg_lo = (uint32_t *)d->io + counter->counter_reg_lo;331uint32_t *reg_hi = (uint32_t *)d->io + counter->counter_reg_hi;332333uint32_t lo = *reg_lo;334uint32_t hi = *reg_hi;335336d->state[id].value = lo | ((uint64_t)hi << 32);337}338339/* Resolve the countable and assign next counter from it's group: */340void341FreedrenoDriver::Countable::resolve()342{343for (unsigned i = 0; i < d->num_perfcntrs; i++) {344const struct fd_perfcntr_group *g = &d->perfcntrs[i];345for (unsigned j = 0; j < g->num_countables; j++) {346const struct fd_perfcntr_countable *c = &g->countables[j];347if (name == c->name) {348d->state[id].countable = c;349350/* Assign a counter from the same group: */351assert(d->assigned_counters[i] < g->num_counters);352d->state[id].counter = &g->counters[d->assigned_counters[i]++];353354std::cout << "Countable: " << name << ", group=" << g->name <<355", counter=" << d->assigned_counters[i] - 1 << "\n";356357return;358}359}360}361unreachable("no such countable!");362}363364uint64_t365FreedrenoDriver::Countable::get_value() const366{367return d->state[id].value - d->state[id].last_value;368}369370/*371* DerivedCounter372*/373374FreedrenoDriver::DerivedCounter::DerivedCounter(FreedrenoDriver *d, std::string name,375Counter::Units units,376std::function<int64_t()> derive)377: Counter(d->next_counter_id++, name, 0)378{379std::cout << "DerivedCounter: " << name << ", id=" << id << "\n";380this->units = units;381set_getter([=](const Counter &c, const Driver &d) {382return derive();383}384);385}386387FreedrenoDriver::DerivedCounter388FreedrenoDriver::counter(std::string name, Counter::Units units,389std::function<int64_t()> derive)390{391auto counter = DerivedCounter(this, name, units, derive);392counters.emplace_back(counter);393return counter;394}395396} // namespace pps397398399