Path: blob/21.2-virgl/src/gallium/drivers/r600/r600_gpu_load.c
4570 views
/*1* Copyright 2015 Advanced Micro Devices, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*22* Authors: Marek Olšák <[email protected]>23*24*/2526/* The GPU load is measured as follows.27*28* There is a thread which samples the GRBM_STATUS register at a certain29* frequency and the "busy" or "idle" counter is incremented based on30* whether the GUI_ACTIVE bit is set or not.31*32* Then, the user can sample the counters twice and calculate the average33* GPU load between the two samples.34*/3536#include "r600_pipe_common.h"37#include "r600_query.h"38#include "util/os_time.h"3940/* For good accuracy at 1000 fps or lower. This will be inaccurate for higher41* fps (there are too few samples per frame). */42#define SAMPLES_PER_SEC 100004344#define GRBM_STATUS 0x801045#define TA_BUSY(x) (((x) >> 14) & 0x1)46#define GDS_BUSY(x) (((x) >> 15) & 0x1)47#define VGT_BUSY(x) (((x) >> 17) & 0x1)48#define IA_BUSY(x) (((x) >> 19) & 0x1)49#define SX_BUSY(x) (((x) >> 20) & 0x1)50#define WD_BUSY(x) (((x) >> 21) & 0x1)51#define SPI_BUSY(x) (((x) >> 22) & 0x1)52#define BCI_BUSY(x) (((x) >> 23) & 0x1)53#define SC_BUSY(x) (((x) >> 24) & 0x1)54#define PA_BUSY(x) (((x) >> 25) & 0x1)55#define DB_BUSY(x) (((x) >> 26) & 0x1)56#define CP_BUSY(x) (((x) >> 29) & 0x1)57#define CB_BUSY(x) (((x) >> 30) & 0x1)58#define GUI_ACTIVE(x) (((x) >> 31) & 0x1)5960#define SRBM_STATUS2 0x0e4c61#define SDMA_BUSY(x) (((x) >> 5) & 0x1)6263#define CP_STAT 0x868064#define PFP_BUSY(x) (((x) >> 15) & 0x1)65#define MEQ_BUSY(x) (((x) >> 16) & 0x1)66#define ME_BUSY(x) (((x) >> 17) & 0x1)67#define SURFACE_SYNC_BUSY(x) (((x) >> 21) & 0x1)68#define DMA_BUSY(x) (((x) >> 22) & 0x1)69#define SCRATCH_RAM_BUSY(x) (((x) >> 24) & 0x1)7071#define IDENTITY(x) x7273#define UPDATE_COUNTER(field, mask) \74do { \75if (mask(value)) \76p_atomic_inc(&counters->named.field.busy); \77else \78p_atomic_inc(&counters->named.field.idle); \79} while (0)8081static void r600_update_mmio_counters(struct r600_common_screen *rscreen,82union r600_mmio_counters *counters)83{84uint32_t value = 0;85bool gui_busy, sdma_busy = false;8687/* GRBM_STATUS */88rscreen->ws->read_registers(rscreen->ws, GRBM_STATUS, 1, &value);8990UPDATE_COUNTER(ta, TA_BUSY);91UPDATE_COUNTER(gds, GDS_BUSY);92UPDATE_COUNTER(vgt, VGT_BUSY);93UPDATE_COUNTER(ia, IA_BUSY);94UPDATE_COUNTER(sx, SX_BUSY);95UPDATE_COUNTER(wd, WD_BUSY);96UPDATE_COUNTER(spi, SPI_BUSY);97UPDATE_COUNTER(bci, BCI_BUSY);98UPDATE_COUNTER(sc, SC_BUSY);99UPDATE_COUNTER(pa, PA_BUSY);100UPDATE_COUNTER(db, DB_BUSY);101UPDATE_COUNTER(cp, CP_BUSY);102UPDATE_COUNTER(cb, CB_BUSY);103UPDATE_COUNTER(gui, GUI_ACTIVE);104gui_busy = GUI_ACTIVE(value);105106value = gui_busy || sdma_busy;107UPDATE_COUNTER(gpu, IDENTITY);108}109110#undef UPDATE_COUNTER111112static int113r600_gpu_load_thread(void *param)114{115struct r600_common_screen *rscreen = (struct r600_common_screen*)param;116const int period_us = 1000000 / SAMPLES_PER_SEC;117int sleep_us = period_us;118int64_t cur_time, last_time = os_time_get();119120while (!p_atomic_read(&rscreen->gpu_load_stop_thread)) {121if (sleep_us)122os_time_sleep(sleep_us);123124/* Make sure we sleep the ideal amount of time to match125* the expected frequency. */126cur_time = os_time_get();127128if (os_time_timeout(last_time, last_time + period_us,129cur_time))130sleep_us = MAX2(sleep_us - 1, 1);131else132sleep_us += 1;133134/*printf("Hz: %.1f\n", 1000000.0 / (cur_time - last_time));*/135last_time = cur_time;136137/* Update the counters. */138r600_update_mmio_counters(rscreen, &rscreen->mmio_counters);139}140p_atomic_dec(&rscreen->gpu_load_stop_thread);141return 0;142}143144void r600_gpu_load_kill_thread(struct r600_common_screen *rscreen)145{146if (!rscreen->gpu_load_thread)147return;148149p_atomic_inc(&rscreen->gpu_load_stop_thread);150thrd_join(rscreen->gpu_load_thread, NULL);151rscreen->gpu_load_thread = 0;152}153154static uint64_t r600_read_mmio_counter(struct r600_common_screen *rscreen,155unsigned busy_index)156{157/* Start the thread if needed. */158if (!rscreen->gpu_load_thread) {159mtx_lock(&rscreen->gpu_load_mutex);160/* Check again inside the mutex. */161if (!rscreen->gpu_load_thread)162rscreen->gpu_load_thread =163u_thread_create(r600_gpu_load_thread, rscreen);164mtx_unlock(&rscreen->gpu_load_mutex);165}166167unsigned busy = p_atomic_read(&rscreen->mmio_counters.array[busy_index]);168unsigned idle = p_atomic_read(&rscreen->mmio_counters.array[busy_index + 1]);169170return busy | ((uint64_t)idle << 32);171}172173static unsigned r600_end_mmio_counter(struct r600_common_screen *rscreen,174uint64_t begin, unsigned busy_index)175{176uint64_t end = r600_read_mmio_counter(rscreen, busy_index);177unsigned busy = (end & 0xffffffff) - (begin & 0xffffffff);178unsigned idle = (end >> 32) - (begin >> 32);179180/* Calculate the % of time the busy counter was being incremented.181*182* If no counters were incremented, return the current counter status.183* It's for the case when the load is queried faster than184* the counters are updated.185*/186if (idle || busy) {187return busy*100 / (busy + idle);188} else {189union r600_mmio_counters counters;190191memset(&counters, 0, sizeof(counters));192r600_update_mmio_counters(rscreen, &counters);193return counters.array[busy_index] ? 100 : 0;194}195}196197#define BUSY_INDEX(rscreen, field) (&rscreen->mmio_counters.named.field.busy - \198rscreen->mmio_counters.array)199200static unsigned busy_index_from_type(struct r600_common_screen *rscreen,201unsigned type)202{203switch (type) {204case R600_QUERY_GPU_LOAD:205return BUSY_INDEX(rscreen, gpu);206case R600_QUERY_GPU_SHADERS_BUSY:207return BUSY_INDEX(rscreen, spi);208case R600_QUERY_GPU_TA_BUSY:209return BUSY_INDEX(rscreen, ta);210case R600_QUERY_GPU_GDS_BUSY:211return BUSY_INDEX(rscreen, gds);212case R600_QUERY_GPU_VGT_BUSY:213return BUSY_INDEX(rscreen, vgt);214case R600_QUERY_GPU_IA_BUSY:215return BUSY_INDEX(rscreen, ia);216case R600_QUERY_GPU_SX_BUSY:217return BUSY_INDEX(rscreen, sx);218case R600_QUERY_GPU_WD_BUSY:219return BUSY_INDEX(rscreen, wd);220case R600_QUERY_GPU_BCI_BUSY:221return BUSY_INDEX(rscreen, bci);222case R600_QUERY_GPU_SC_BUSY:223return BUSY_INDEX(rscreen, sc);224case R600_QUERY_GPU_PA_BUSY:225return BUSY_INDEX(rscreen, pa);226case R600_QUERY_GPU_DB_BUSY:227return BUSY_INDEX(rscreen, db);228case R600_QUERY_GPU_CP_BUSY:229return BUSY_INDEX(rscreen, cp);230case R600_QUERY_GPU_CB_BUSY:231return BUSY_INDEX(rscreen, cb);232case R600_QUERY_GPU_SDMA_BUSY:233return BUSY_INDEX(rscreen, sdma);234case R600_QUERY_GPU_PFP_BUSY:235return BUSY_INDEX(rscreen, pfp);236case R600_QUERY_GPU_MEQ_BUSY:237return BUSY_INDEX(rscreen, meq);238case R600_QUERY_GPU_ME_BUSY:239return BUSY_INDEX(rscreen, me);240case R600_QUERY_GPU_SURF_SYNC_BUSY:241return BUSY_INDEX(rscreen, surf_sync);242case R600_QUERY_GPU_CP_DMA_BUSY:243return BUSY_INDEX(rscreen, cp_dma);244case R600_QUERY_GPU_SCRATCH_RAM_BUSY:245return BUSY_INDEX(rscreen, scratch_ram);246default:247unreachable("invalid query type");248}249}250251uint64_t r600_begin_counter(struct r600_common_screen *rscreen, unsigned type)252{253unsigned busy_index = busy_index_from_type(rscreen, type);254return r600_read_mmio_counter(rscreen, busy_index);255}256257unsigned r600_end_counter(struct r600_common_screen *rscreen, unsigned type,258uint64_t begin)259{260unsigned busy_index = busy_index_from_type(rscreen, type);261return r600_end_mmio_counter(rscreen, begin, busy_index);262}263264265