Path: blob/21.2-virgl/src/freedreno/perfcntrs/fdperf.c
4565 views
/*1* Copyright (C) 2016 Rob Clark <[email protected]>2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* the rights to use, copy, modify, merge, publish, distribute, sublicense,8* and/or sell copies of the Software, and to permit persons to whom the9* Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL18* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR19* OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE,20* ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR21* OTHER DEALINGS IN THE SOFTWARE.22*/2324#include <assert.h>25#include <curses.h>26#include <err.h>27#include <inttypes.h>28#include <libconfig.h>29#include <locale.h>30#include <stdint.h>31#include <stdio.h>32#include <stdlib.h>33#include <string.h>34#include <time.h>35#include <xf86drm.h>3637#include "drm/freedreno_drmif.h"38#include "drm/freedreno_ringbuffer.h"3940#include "util/os_file.h"4142#include "freedreno_dt.h"43#include "freedreno_perfcntr.h"4445#define MAX_CNTR_PER_GROUP 244647/* NOTE first counter group should always be CP, since we unconditionally48* use CP counter to measure the gpu freq.49*/5051struct counter_group {52const struct fd_perfcntr_group *group;5354struct {55const struct fd_perfcntr_counter *counter;56uint16_t select_val;57volatile uint32_t *val_hi;58volatile uint32_t *val_lo;59} counter[MAX_CNTR_PER_GROUP];6061/* last sample time: */62uint32_t stime[MAX_CNTR_PER_GROUP];63/* for now just care about the low 32b value.. at least then we don't64* have to really care that we can't sample both hi and lo regs at the65* same time:66*/67uint32_t last[MAX_CNTR_PER_GROUP];68/* current value, ie. by how many did the counter increase in last69* sampling period divided by the sampling period:70*/71float current[MAX_CNTR_PER_GROUP];72/* name of currently selected counters (for UI): */73const char *label[MAX_CNTR_PER_GROUP];74};7576static struct {77void *io;78uint32_t chipid;79uint32_t min_freq;80uint32_t max_freq;81/* per-generation table of counters: */82unsigned ngroups;83struct counter_group *groups;84/* drm device (for writing select regs via ring): */85struct fd_device *dev;86struct fd_pipe *pipe;87struct fd_submit *submit;88struct fd_ringbuffer *ring;89} dev;9091static void config_save(void);92static void config_restore(void);93static void restore_counter_groups(void);9495/*96* helpers97*/9899static uint32_t100gettime_us(void)101{102struct timespec ts;103clock_gettime(CLOCK_MONOTONIC, &ts);104return (ts.tv_sec * 1000000) + (ts.tv_nsec / 1000);105}106107static uint32_t108delta(uint32_t a, uint32_t b)109{110/* deal with rollover: */111if (a > b)112return 0xffffffff - a + b;113else114return b - a;115}116117static void118find_device(void)119{120int ret, fd;121122fd = drmOpenWithType("msm", NULL, DRM_NODE_RENDER);123if (fd < 0)124err(1, "could not open drm device");125126dev.dev = fd_device_new(fd);127dev.pipe = fd_pipe_new(dev.dev, FD_PIPE_3D);128129uint64_t val;130ret = fd_pipe_get_param(dev.pipe, FD_CHIP_ID, &val);131if (ret) {132err(1, "could not get gpu-id");133}134dev.chipid = val;135136#define CHIP_FMT "d%d%d.%d"137#define CHIP_ARGS(chipid) \138((chipid) >> 24) & 0xff, ((chipid) >> 16) & 0xff, ((chipid) >> 8) & 0xff, \139((chipid) >> 0) & 0xff140printf("device: a%" CHIP_FMT "\n", CHIP_ARGS(dev.chipid));141142/* try MAX_FREQ first as that will work regardless of old dt143* dt bindings vs upstream bindings:144*/145ret = fd_pipe_get_param(dev.pipe, FD_MAX_FREQ, &val);146if (ret) {147printf("falling back to parsing DT bindings for freq\n");148if (!fd_dt_find_freqs(&dev.min_freq, &dev.max_freq))149err(1, "could not find GPU freqs");150} else {151dev.min_freq = 0;152dev.max_freq = val;153}154155printf("min_freq=%u, max_freq=%u\n", dev.min_freq, dev.max_freq);156157dev.io = fd_dt_find_io();158if (!dev.io) {159err(1, "could not map device");160}161}162163/*164* perf-monitor165*/166167static void168flush_ring(void)169{170int ret;171172if (!dev.submit)173return;174175struct fd_submit_fence fence = {};176util_queue_fence_init(&fence.ready);177178ret = fd_submit_flush(dev.submit, -1, &fence);179180if (ret)181errx(1, "submit failed: %d", ret);182util_queue_fence_wait(&fence.ready);183fd_ringbuffer_del(dev.ring);184fd_submit_del(dev.submit);185186dev.ring = NULL;187dev.submit = NULL;188}189190static void191select_counter(struct counter_group *group, int ctr, int n)192{193assert(n < group->group->num_countables);194assert(ctr < group->group->num_counters);195196group->label[ctr] = group->group->countables[n].name;197group->counter[ctr].select_val = n;198199if (!dev.submit) {200dev.submit = fd_submit_new(dev.pipe);201dev.ring = fd_submit_new_ringbuffer(202dev.submit, 0x1000, FD_RINGBUFFER_PRIMARY | FD_RINGBUFFER_GROWABLE);203}204205/* bashing select register directly while gpu is active will end206* in tears.. so we need to write it via the ring:207*208* TODO it would help startup time, if gpu is loaded, to batch209* all the initial writes and do a single flush.. although that210* makes things more complicated for capturing inital sample value211*/212struct fd_ringbuffer *ring = dev.ring;213switch (dev.chipid >> 24) {214case 2:215case 3:216case 4:217OUT_PKT3(ring, CP_WAIT_FOR_IDLE, 1);218OUT_RING(ring, 0x00000000);219220if (group->group->counters[ctr].enable) {221OUT_PKT0(ring, group->group->counters[ctr].enable, 1);222OUT_RING(ring, 0);223}224225if (group->group->counters[ctr].clear) {226OUT_PKT0(ring, group->group->counters[ctr].clear, 1);227OUT_RING(ring, 1);228229OUT_PKT0(ring, group->group->counters[ctr].clear, 1);230OUT_RING(ring, 0);231}232233OUT_PKT0(ring, group->group->counters[ctr].select_reg, 1);234OUT_RING(ring, n);235236if (group->group->counters[ctr].enable) {237OUT_PKT0(ring, group->group->counters[ctr].enable, 1);238OUT_RING(ring, 1);239}240241break;242case 5:243case 6:244OUT_PKT7(ring, CP_WAIT_FOR_IDLE, 0);245246if (group->group->counters[ctr].enable) {247OUT_PKT4(ring, group->group->counters[ctr].enable, 1);248OUT_RING(ring, 0);249}250251if (group->group->counters[ctr].clear) {252OUT_PKT4(ring, group->group->counters[ctr].clear, 1);253OUT_RING(ring, 1);254255OUT_PKT4(ring, group->group->counters[ctr].clear, 1);256OUT_RING(ring, 0);257}258259OUT_PKT4(ring, group->group->counters[ctr].select_reg, 1);260OUT_RING(ring, n);261262if (group->group->counters[ctr].enable) {263OUT_PKT4(ring, group->group->counters[ctr].enable, 1);264OUT_RING(ring, 1);265}266267break;268}269270group->last[ctr] = *group->counter[ctr].val_lo;271group->stime[ctr] = gettime_us();272}273274static void275resample_counter(struct counter_group *group, int ctr)276{277uint32_t val = *group->counter[ctr].val_lo;278uint32_t t = gettime_us();279uint32_t dt = delta(group->stime[ctr], t);280uint32_t dval = delta(group->last[ctr], val);281group->current[ctr] = (float)dval * 1000000.0 / (float)dt;282group->last[ctr] = val;283group->stime[ctr] = t;284}285286#define REFRESH_MS 500287288/* sample all the counters: */289static void290resample(void)291{292static uint64_t last_time;293uint64_t current_time = gettime_us();294295if ((current_time - last_time) < (REFRESH_MS * 1000 / 2))296return;297298last_time = current_time;299300for (unsigned i = 0; i < dev.ngroups; i++) {301struct counter_group *group = &dev.groups[i];302for (unsigned j = 0; j < group->group->num_counters; j++) {303resample_counter(group, j);304}305}306}307308/*309* The UI310*/311312#define COLOR_GROUP_HEADER 1313#define COLOR_FOOTER 2314#define COLOR_INVERSE 3315316static int w, h;317static int ctr_width;318static int max_rows, current_cntr = 1;319320static void321redraw_footer(WINDOW *win)322{323char *footer;324int n;325326n = asprintf(&footer, " fdperf: a%" CHIP_FMT " (%.2fMHz..%.2fMHz)",327CHIP_ARGS(dev.chipid), ((float)dev.min_freq) / 1000000.0,328((float)dev.max_freq) / 1000000.0);329330wmove(win, h - 1, 0);331wattron(win, COLOR_PAIR(COLOR_FOOTER));332waddstr(win, footer);333whline(win, ' ', w - n);334wattroff(win, COLOR_PAIR(COLOR_FOOTER));335336free(footer);337}338339static void340redraw_group_header(WINDOW *win, int row, const char *name)341{342wmove(win, row, 0);343wattron(win, A_BOLD);344wattron(win, COLOR_PAIR(COLOR_GROUP_HEADER));345waddstr(win, name);346whline(win, ' ', w - strlen(name));347wattroff(win, COLOR_PAIR(COLOR_GROUP_HEADER));348wattroff(win, A_BOLD);349}350351static void352redraw_counter_label(WINDOW *win, int row, const char *name, bool selected)353{354int n = strlen(name);355assert(n <= ctr_width);356wmove(win, row, 0);357whline(win, ' ', ctr_width - n);358wmove(win, row, ctr_width - n);359if (selected)360wattron(win, COLOR_PAIR(COLOR_INVERSE));361waddstr(win, name);362if (selected)363wattroff(win, COLOR_PAIR(COLOR_INVERSE));364waddstr(win, ": ");365}366367static void368redraw_counter_value_cycles(WINDOW *win, float val)369{370char *str;371int x = getcurx(win);372int valwidth = w - x;373int barwidth, n;374375/* convert to fraction of max freq: */376val = val / (float)dev.max_freq;377378/* figure out percentage-bar width: */379barwidth = (int)(val * valwidth);380381/* sometimes things go over 100%.. idk why, could be382* things running faster than base clock, or counter383* summing up cycles in multiple cores?384*/385barwidth = MIN2(barwidth, valwidth - 1);386387n = asprintf(&str, "%.2f%%", 100.0 * val);388wattron(win, COLOR_PAIR(COLOR_INVERSE));389waddnstr(win, str, barwidth);390if (barwidth > n) {391whline(win, ' ', barwidth - n);392wmove(win, getcury(win), x + barwidth);393}394wattroff(win, COLOR_PAIR(COLOR_INVERSE));395if (barwidth < n)396waddstr(win, str + barwidth);397whline(win, ' ', w - getcurx(win));398399free(str);400}401402static void403redraw_counter_value_raw(WINDOW *win, float val)404{405char *str;406(void)asprintf(&str, "%'.2f", val);407waddstr(win, str);408whline(win, ' ', w - getcurx(win));409free(str);410}411412static void413redraw_counter(WINDOW *win, int row, struct counter_group *group, int ctr,414bool selected)415{416redraw_counter_label(win, row, group->label[ctr], selected);417418/* quick hack, if the label has "CYCLE" in the name, it is419* probably a cycle counter ;-)420* Perhaps add more info in rnndb schema to know how to421* treat individual counters (ie. which are cycles, and422* for those we want to present as a percentage do we423* need to scale the result.. ie. is it running at some424* multiple or divisor of core clk, etc)425*426* TODO it would be much more clever to get this from xml427* Also.. in some cases I think we want to know how many428* units the counter is counting for, ie. if a320 has 2x429* shader as a306 we might need to scale the result..430*/431if (strstr(group->label[ctr], "CYCLE") ||432strstr(group->label[ctr], "BUSY") || strstr(group->label[ctr], "IDLE"))433redraw_counter_value_cycles(win, group->current[ctr]);434else435redraw_counter_value_raw(win, group->current[ctr]);436}437438static void439redraw(WINDOW *win)440{441static int scroll = 0;442int max, row = 0;443444w = getmaxx(win);445h = getmaxy(win);446447max = h - 3;448449if ((current_cntr - scroll) > (max - 1)) {450scroll = current_cntr - (max - 1);451} else if ((current_cntr - 1) < scroll) {452scroll = current_cntr - 1;453}454455for (unsigned i = 0; i < dev.ngroups; i++) {456struct counter_group *group = &dev.groups[i];457unsigned j = 0;458459/* NOTE skip CP the first CP counter */460if (i == 0)461j++;462463if (j < group->group->num_counters) {464if ((scroll <= row) && ((row - scroll) < max))465redraw_group_header(win, row - scroll, group->group->name);466row++;467}468469for (; j < group->group->num_counters; j++) {470if ((scroll <= row) && ((row - scroll) < max))471redraw_counter(win, row - scroll, group, j, row == current_cntr);472row++;473}474}475476/* convert back to physical (unscrolled) offset: */477row = max;478479redraw_group_header(win, row, "Status");480row++;481482/* Draw GPU freq row: */483redraw_counter_label(win, row, "Freq (MHz)", false);484redraw_counter_value_raw(win, dev.groups[0].current[0] / 1000000.0);485row++;486487redraw_footer(win);488489refresh();490}491492static struct counter_group *493current_counter(int *ctr)494{495int n = 0;496497for (unsigned i = 0; i < dev.ngroups; i++) {498struct counter_group *group = &dev.groups[i];499unsigned j = 0;500501/* NOTE skip the first CP counter (CP_ALWAYS_COUNT) */502if (i == 0)503j++;504505/* account for group header: */506if (j < group->group->num_counters) {507/* cannot select group header.. return null to indicate this508* main_ui():509*/510if (n == current_cntr)511return NULL;512n++;513}514515for (; j < group->group->num_counters; j++) {516if (n == current_cntr) {517if (ctr)518*ctr = j;519return group;520}521n++;522}523}524525assert(0);526return NULL;527}528529static void530counter_dialog(void)531{532WINDOW *dialog;533struct counter_group *group;534int cnt = 0, current = 0, scroll;535536/* figure out dialog size: */537int dh = h / 2;538int dw = ctr_width + 2;539540group = current_counter(&cnt);541542/* find currently selected idx (note there can be discontinuities543* so the selected value does not map 1:1 to current idx)544*/545uint32_t selected = group->counter[cnt].select_val;546for (int i = 0; i < group->group->num_countables; i++) {547if (group->group->countables[i].selector == selected) {548current = i;549break;550}551}552553/* scrolling offset, if dialog is too small for all the choices: */554scroll = 0;555556dialog = newwin(dh, dw, (h - dh) / 2, (w - dw) / 2);557box(dialog, 0, 0);558wrefresh(dialog);559keypad(dialog, TRUE);560561while (true) {562int max = MIN2(dh - 2, group->group->num_countables);563int selector = -1;564565if ((current - scroll) >= (dh - 3)) {566scroll = current - (dh - 3);567} else if (current < scroll) {568scroll = current;569}570571for (int i = 0; i < max; i++) {572int n = scroll + i;573wmove(dialog, i + 1, 1);574if (n == current) {575assert(n < group->group->num_countables);576selector = group->group->countables[n].selector;577wattron(dialog, COLOR_PAIR(COLOR_INVERSE));578}579if (n < group->group->num_countables)580waddstr(dialog, group->group->countables[n].name);581whline(dialog, ' ', dw - getcurx(dialog) - 1);582if (n == current)583wattroff(dialog, COLOR_PAIR(COLOR_INVERSE));584}585586assert(selector >= 0);587588switch (wgetch(dialog)) {589case KEY_UP:590current = MAX2(0, current - 1);591break;592case KEY_DOWN:593current = MIN2(group->group->num_countables - 1, current + 1);594break;595case KEY_LEFT:596case KEY_ENTER:597/* select new sampler */598select_counter(group, cnt, selector);599flush_ring();600config_save();601goto out;602case 'q':603goto out;604default:605/* ignore */606break;607}608609resample();610}611612out:613wborder(dialog, ' ', ' ', ' ', ' ', ' ', ' ', ' ', ' ');614delwin(dialog);615}616617static void618scroll_cntr(int amount)619{620if (amount < 0) {621current_cntr = MAX2(1, current_cntr + amount);622if (current_counter(NULL) == NULL) {623current_cntr = MAX2(1, current_cntr - 1);624}625} else {626current_cntr = MIN2(max_rows - 1, current_cntr + amount);627if (current_counter(NULL) == NULL)628current_cntr = MIN2(max_rows - 1, current_cntr + 1);629}630}631632static void633main_ui(void)634{635WINDOW *mainwin;636uint32_t last_time = gettime_us();637638/* curses setup: */639mainwin = initscr();640if (!mainwin)641goto out;642643cbreak();644wtimeout(mainwin, REFRESH_MS);645noecho();646keypad(mainwin, TRUE);647curs_set(0);648start_color();649init_pair(COLOR_GROUP_HEADER, COLOR_WHITE, COLOR_GREEN);650init_pair(COLOR_FOOTER, COLOR_WHITE, COLOR_BLUE);651init_pair(COLOR_INVERSE, COLOR_BLACK, COLOR_WHITE);652653while (true) {654switch (wgetch(mainwin)) {655case KEY_UP:656scroll_cntr(-1);657break;658case KEY_DOWN:659scroll_cntr(+1);660break;661case KEY_NPAGE: /* page-down */662/* TODO figure out # of rows visible? */663scroll_cntr(+15);664break;665case KEY_PPAGE: /* page-up */666/* TODO figure out # of rows visible? */667scroll_cntr(-15);668break;669case KEY_RIGHT:670counter_dialog();671break;672case 'q':673goto out;674break;675default:676/* ignore */677break;678}679resample();680redraw(mainwin);681682/* restore the counters every 0.5s in case the GPU has suspended,683* in which case the current selected countables will have reset:684*/685uint32_t t = gettime_us();686if (delta(last_time, t) > 500000) {687restore_counter_groups();688flush_ring();689last_time = t;690}691}692693/* restore settings.. maybe we need an atexit()??*/694out:695delwin(mainwin);696endwin();697refresh();698}699700static void701restore_counter_groups(void)702{703for (unsigned i = 0; i < dev.ngroups; i++) {704struct counter_group *group = &dev.groups[i];705unsigned j = 0;706707/* NOTE skip CP the first CP counter */708if (i == 0)709j++;710711for (; j < group->group->num_counters; j++) {712select_counter(group, j, group->counter[j].select_val);713}714}715}716717static void718setup_counter_groups(const struct fd_perfcntr_group *groups)719{720for (unsigned i = 0; i < dev.ngroups; i++) {721struct counter_group *group = &dev.groups[i];722723group->group = &groups[i];724725max_rows += group->group->num_counters + 1;726727/* the first CP counter is hidden: */728if (i == 0) {729max_rows--;730if (group->group->num_counters <= 1)731max_rows--;732}733734for (unsigned j = 0; j < group->group->num_counters; j++) {735group->counter[j].counter = &group->group->counters[j];736737group->counter[j].val_hi =738dev.io + (group->counter[j].counter->counter_reg_hi * 4);739group->counter[j].val_lo =740dev.io + (group->counter[j].counter->counter_reg_lo * 4);741742group->counter[j].select_val = j;743}744745for (unsigned j = 0; j < group->group->num_countables; j++) {746ctr_width =747MAX2(ctr_width, strlen(group->group->countables[j].name) + 1);748}749}750}751752/*753* configuration / persistence754*/755756static config_t cfg;757static config_setting_t *setting;758759static void760config_save(void)761{762for (unsigned i = 0; i < dev.ngroups; i++) {763struct counter_group *group = &dev.groups[i];764unsigned j = 0;765766/* NOTE skip CP the first CP counter */767if (i == 0)768j++;769770config_setting_t *sect =771config_setting_get_member(setting, group->group->name);772773for (; j < group->group->num_counters; j++) {774char name[] = "counter0000";775sprintf(name, "counter%d", j);776config_setting_t *s = config_setting_lookup(sect, name);777config_setting_set_int(s, group->counter[j].select_val);778}779}780781config_write_file(&cfg, "fdperf.cfg");782}783784static void785config_restore(void)786{787char *str;788789config_init(&cfg);790791/* Read the file. If there is an error, report it and exit. */792if (!config_read_file(&cfg, "fdperf.cfg")) {793warn("could not restore settings");794}795796config_setting_t *root = config_root_setting(&cfg);797798/* per device settings: */799(void)asprintf(&str, "a%dxx", dev.chipid >> 24);800setting = config_setting_get_member(root, str);801if (!setting)802setting = config_setting_add(root, str, CONFIG_TYPE_GROUP);803free(str);804805for (unsigned i = 0; i < dev.ngroups; i++) {806struct counter_group *group = &dev.groups[i];807unsigned j = 0;808809/* NOTE skip CP the first CP counter */810if (i == 0)811j++;812813config_setting_t *sect =814config_setting_get_member(setting, group->group->name);815816if (!sect) {817sect =818config_setting_add(setting, group->group->name, CONFIG_TYPE_GROUP);819}820821for (; j < group->group->num_counters; j++) {822char name[] = "counter0000";823sprintf(name, "counter%d", j);824config_setting_t *s = config_setting_lookup(sect, name);825if (!s) {826config_setting_add(sect, name, CONFIG_TYPE_INT);827continue;828}829select_counter(group, j, config_setting_get_int(s));830}831}832}833834/*835* main836*/837838int839main(int argc, char **argv)840{841find_device();842843const struct fd_perfcntr_group *groups;844groups = fd_perfcntrs((dev.chipid >> 24) * 100, &dev.ngroups);845if (!groups) {846errx(1, "no perfcntr support");847}848849dev.groups = calloc(dev.ngroups, sizeof(struct counter_group));850851setlocale(LC_NUMERIC, "en_US.UTF-8");852853setup_counter_groups(groups);854restore_counter_groups();855config_restore();856flush_ring();857858main_ui();859860return 0;861}862863864