Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_debug.c
4570 views
/*1* Copyright 2015 Advanced Micro Devices, Inc.2* All Rights Reserved.3*4* Permission is hereby granted, free of charge, to any person obtaining a5* copy of this software and associated documentation files (the "Software"),6* to deal in the Software without restriction, including without limitation7* on the rights to use, copy, modify, merge, publish, distribute, sub8* license, and/or sell copies of the Software, and to permit persons to whom9* the Software is furnished to do so, subject to the following conditions:10*11* The above copyright notice and this permission notice (including the next12* paragraph) shall be included in all copies or substantial portions of the13* Software.14*15* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR16* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,17* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL18* THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,19* DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR20* OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE21* USE OR OTHER DEALINGS IN THE SOFTWARE.22*/2324#include "ac_debug.h"25#include "ac_rtld.h"26#include "driver_ddebug/dd_util.h"27#include "si_compute.h"28#include "si_pipe.h"29#include "sid.h"30#include "sid_tables.h"31#include "tgsi/tgsi_from_mesa.h"32#include "util/u_dump.h"33#include "util/u_log.h"34#include "util/u_memory.h"35#include "util/u_string.h"3637static void si_dump_bo_list(struct si_context *sctx, const struct radeon_saved_cs *saved, FILE *f);3839DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)4041/**42* Store a linearized copy of all chunks of \p cs together with the buffer43* list in \p saved.44*/45void si_save_cs(struct radeon_winsys *ws, struct radeon_cmdbuf *cs, struct radeon_saved_cs *saved,46bool get_buffer_list)47{48uint32_t *buf;49unsigned i;5051/* Save the IB chunks. */52saved->num_dw = cs->prev_dw + cs->current.cdw;53saved->ib = MALLOC(4 * saved->num_dw);54if (!saved->ib)55goto oom;5657buf = saved->ib;58for (i = 0; i < cs->num_prev; ++i) {59memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4);60buf += cs->prev[i].cdw;61}62memcpy(buf, cs->current.buf, cs->current.cdw * 4);6364if (!get_buffer_list)65return;6667/* Save the buffer list. */68saved->bo_count = ws->cs_get_buffer_list(cs, NULL);69saved->bo_list = CALLOC(saved->bo_count, sizeof(saved->bo_list[0]));70if (!saved->bo_list) {71FREE(saved->ib);72goto oom;73}74ws->cs_get_buffer_list(cs, saved->bo_list);7576return;7778oom:79fprintf(stderr, "%s: out of memory\n", __func__);80memset(saved, 0, sizeof(*saved));81}8283void si_clear_saved_cs(struct radeon_saved_cs *saved)84{85FREE(saved->ib);86FREE(saved->bo_list);8788memset(saved, 0, sizeof(*saved));89}9091void si_destroy_saved_cs(struct si_saved_cs *scs)92{93si_clear_saved_cs(&scs->gfx);94si_resource_reference(&scs->trace_buf, NULL);95free(scs);96}9798static void si_dump_shader(struct si_screen *sscreen, struct si_shader *shader, FILE *f)99{100if (shader->shader_log)101fwrite(shader->shader_log, shader->shader_log_size, 1, f);102else103si_shader_dump(sscreen, shader, NULL, f, false);104105if (shader->bo && sscreen->options.dump_shader_binary) {106unsigned size = shader->bo->b.b.width0;107fprintf(f, "BO: VA=%" PRIx64 " Size=%u\n", shader->bo->gpu_address, size);108109const char *mapped = sscreen->ws->buffer_map(sscreen->ws,110shader->bo->buf, NULL,111PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_READ | RADEON_MAP_TEMPORARY);112113for (unsigned i = 0; i < size; i += 4) {114fprintf(f, " %4x: %08x\n", i, *(uint32_t *)(mapped + i));115}116117sscreen->ws->buffer_unmap(sscreen->ws, shader->bo->buf);118119fprintf(f, "\n");120}121}122123struct si_log_chunk_shader {124/* The shader destroy code assumes a current context for unlinking of125* PM4 packets etc.126*127* While we should be able to destroy shaders without a context, doing128* so would happen only very rarely and be therefore likely to fail129* just when you're trying to debug something. Let's just remember the130* current context in the chunk.131*/132struct si_context *ctx;133struct si_shader *shader;134135/* For keep-alive reference counts */136struct si_shader_selector *sel;137struct si_compute *program;138};139140static void si_log_chunk_shader_destroy(void *data)141{142struct si_log_chunk_shader *chunk = data;143si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);144si_compute_reference(&chunk->program, NULL);145FREE(chunk);146}147148static void si_log_chunk_shader_print(void *data, FILE *f)149{150struct si_log_chunk_shader *chunk = data;151struct si_screen *sscreen = chunk->ctx->screen;152si_dump_shader(sscreen, chunk->shader, f);153}154155static struct u_log_chunk_type si_log_chunk_type_shader = {156.destroy = si_log_chunk_shader_destroy,157.print = si_log_chunk_shader_print,158};159160static void si_dump_gfx_shader(struct si_context *ctx, const struct si_shader_ctx_state *state,161struct u_log_context *log)162{163struct si_shader *current = state->current;164165if (!state->cso || !current)166return;167168struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);169chunk->ctx = ctx;170chunk->shader = current;171si_shader_selector_reference(ctx, &chunk->sel, current->selector);172u_log_chunk(log, &si_log_chunk_type_shader, chunk);173}174175static void si_dump_compute_shader(struct si_context *ctx, struct u_log_context *log)176{177const struct si_cs_shader_state *state = &ctx->cs_shader_state;178179if (!state->program)180return;181182struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);183chunk->ctx = ctx;184chunk->shader = &state->program->shader;185si_compute_reference(&chunk->program, state->program);186u_log_chunk(log, &si_log_chunk_type_shader, chunk);187}188189/**190* Shader compiles can be overridden with arbitrary ELF objects by setting191* the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]192*193* TODO: key this off some hash194*/195bool si_replace_shader(unsigned num, struct si_shader_binary *binary)196{197const char *p = debug_get_option_replace_shaders();198const char *semicolon;199char *copy = NULL;200FILE *f;201long filesize, nread;202bool replaced = false;203204if (!p)205return false;206207while (*p) {208unsigned long i;209char *endp;210i = strtoul(p, &endp, 0);211212p = endp;213if (*p != ':') {214fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");215exit(1);216}217++p;218219if (i == num)220break;221222p = strchr(p, ';');223if (!p)224return false;225++p;226}227if (!*p)228return false;229230semicolon = strchr(p, ';');231if (semicolon) {232p = copy = strndup(p, semicolon - p);233if (!copy) {234fprintf(stderr, "out of memory\n");235return false;236}237}238239fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);240241f = fopen(p, "r");242if (!f) {243perror("radeonsi: failed to open file");244goto out_free;245}246247if (fseek(f, 0, SEEK_END) != 0)248goto file_error;249250filesize = ftell(f);251if (filesize < 0)252goto file_error;253254if (fseek(f, 0, SEEK_SET) != 0)255goto file_error;256257binary->elf_buffer = MALLOC(filesize);258if (!binary->elf_buffer) {259fprintf(stderr, "out of memory\n");260goto out_close;261}262263nread = fread((void *)binary->elf_buffer, 1, filesize, f);264if (nread != filesize) {265FREE((void *)binary->elf_buffer);266binary->elf_buffer = NULL;267goto file_error;268}269270binary->elf_size = nread;271replaced = true;272273out_close:274fclose(f);275out_free:276free(copy);277return replaced;278279file_error:280perror("radeonsi: reading shader");281goto out_close;282}283284/* Parsed IBs are difficult to read without colors. Use "less -R file" to285* read them, or use "aha -b -f file" to convert them to html.286*/287#define COLOR_RESET "\033[0m"288#define COLOR_RED "\033[31m"289#define COLOR_GREEN "\033[1;32m"290#define COLOR_YELLOW "\033[1;33m"291#define COLOR_CYAN "\033[1;36m"292293static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f, unsigned offset)294{295struct radeon_winsys *ws = sctx->ws;296uint32_t value;297298if (ws->read_registers(ws, offset, 1, &value))299ac_dump_reg(f, sctx->chip_class, offset, value, ~0);300}301302static void si_dump_debug_registers(struct si_context *sctx, FILE *f)303{304if (!sctx->screen->info.has_read_registers_query)305return;306307fprintf(f, "Memory-mapped registers:\n");308si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);309310/* No other registers can be read on DRM < 3.1.0. */311if (!sctx->screen->info.is_amdgpu || sctx->screen->info.drm_minor < 1) {312fprintf(f, "\n");313return;314}315316si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);317si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);318si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);319si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);320si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);321si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);322si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);323if (sctx->chip_class <= GFX8) {324si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);325si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);326si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);327}328si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);329si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);330si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);331si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);332si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);333si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);334si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);335si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);336si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);337si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);338fprintf(f, "\n");339}340341struct si_log_chunk_cs {342struct si_context *ctx;343struct si_saved_cs *cs;344bool dump_bo_list;345unsigned gfx_begin, gfx_end;346unsigned compute_begin, compute_end;347};348349static void si_log_chunk_type_cs_destroy(void *data)350{351struct si_log_chunk_cs *chunk = data;352si_saved_cs_reference(&chunk->cs, NULL);353free(chunk);354}355356static void si_parse_current_ib(FILE *f, struct radeon_cmdbuf *cs, unsigned begin, unsigned end,357int *last_trace_id, unsigned trace_id_count, const char *name,358enum chip_class chip_class)359{360unsigned orig_end = end;361362assert(begin <= end);363364fprintf(f, "------------------ %s begin (dw = %u) ------------------\n", name, begin);365366for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {367struct radeon_cmdbuf_chunk *chunk = &cs->prev[prev_idx];368369if (begin < chunk->cdw) {370ac_parse_ib_chunk(f, chunk->buf + begin, MIN2(end, chunk->cdw) - begin, last_trace_id,371trace_id_count, chip_class, NULL, NULL);372}373374if (end <= chunk->cdw)375return;376377if (begin < chunk->cdw)378fprintf(f, "\n---------- Next %s Chunk ----------\n\n", name);379380begin -= MIN2(begin, chunk->cdw);381end -= chunk->cdw;382}383384assert(end <= cs->current.cdw);385386ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id, trace_id_count,387chip_class, NULL, NULL);388389fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n", name, orig_end);390}391392static void si_log_chunk_type_cs_print(void *data, FILE *f)393{394struct si_log_chunk_cs *chunk = data;395struct si_context *ctx = chunk->ctx;396struct si_saved_cs *scs = chunk->cs;397int last_trace_id = -1;398int last_compute_trace_id = -1;399400/* We are expecting that the ddebug pipe has already401* waited for the context, so this buffer should be idle.402* If the GPU is hung, there is no point in waiting for it.403*/404uint32_t *map = ctx->ws->buffer_map(ctx->ws, scs->trace_buf->buf, NULL,405PIPE_MAP_UNSYNCHRONIZED | PIPE_MAP_READ);406if (map) {407last_trace_id = map[0];408last_compute_trace_id = map[1];409}410411if (chunk->gfx_end != chunk->gfx_begin) {412if (chunk->gfx_begin == 0) {413if (ctx->cs_preamble_state)414ac_parse_ib(f, ctx->cs_preamble_state->pm4, ctx->cs_preamble_state->ndw, NULL, 0,415"IB2: Init config", ctx->chip_class, NULL, NULL);416417if (ctx->cs_preamble_gs_rings)418ac_parse_ib(f, ctx->cs_preamble_gs_rings->pm4, ctx->cs_preamble_gs_rings->ndw, NULL, 0,419"IB2: Init GS rings", ctx->chip_class, NULL, NULL);420}421422if (scs->flushed) {423ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin, chunk->gfx_end - chunk->gfx_begin,424&last_trace_id, map ? 1 : 0, "IB", ctx->chip_class, NULL, NULL);425} else {426si_parse_current_ib(f, &ctx->gfx_cs, chunk->gfx_begin, chunk->gfx_end, &last_trace_id,427map ? 1 : 0, "IB", ctx->chip_class);428}429}430431if (chunk->compute_end != chunk->compute_begin) {432assert(ctx->prim_discard_compute_cs.priv);433434if (scs->flushed) {435ac_parse_ib(f, scs->compute.ib + chunk->compute_begin,436chunk->compute_end - chunk->compute_begin, &last_compute_trace_id, map ? 1 : 0,437"Compute IB", ctx->chip_class, NULL, NULL);438} else {439si_parse_current_ib(f, &ctx->prim_discard_compute_cs, chunk->compute_begin,440chunk->compute_end, &last_compute_trace_id, map ? 1 : 0, "Compute IB",441ctx->chip_class);442}443}444445if (chunk->dump_bo_list) {446fprintf(f, "Flushing. Time: ");447util_dump_ns(f, scs->time_flush);448fprintf(f, "\n\n");449si_dump_bo_list(ctx, &scs->gfx, f);450}451}452453static const struct u_log_chunk_type si_log_chunk_type_cs = {454.destroy = si_log_chunk_type_cs_destroy,455.print = si_log_chunk_type_cs_print,456};457458static void si_log_cs(struct si_context *ctx, struct u_log_context *log, bool dump_bo_list)459{460assert(ctx->current_saved_cs);461462struct si_saved_cs *scs = ctx->current_saved_cs;463unsigned gfx_cur = ctx->gfx_cs.prev_dw + ctx->gfx_cs.current.cdw;464unsigned compute_cur = 0;465466if (ctx->prim_discard_compute_cs.priv)467compute_cur =468ctx->prim_discard_compute_cs.prev_dw + ctx->prim_discard_compute_cs.current.cdw;469470if (!dump_bo_list && gfx_cur == scs->gfx_last_dw && compute_cur == scs->compute_last_dw)471return;472473struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));474475chunk->ctx = ctx;476si_saved_cs_reference(&chunk->cs, scs);477chunk->dump_bo_list = dump_bo_list;478479chunk->gfx_begin = scs->gfx_last_dw;480chunk->gfx_end = gfx_cur;481scs->gfx_last_dw = gfx_cur;482483chunk->compute_begin = scs->compute_last_dw;484chunk->compute_end = compute_cur;485scs->compute_last_dw = compute_cur;486487u_log_chunk(log, &si_log_chunk_type_cs, chunk);488}489490void si_auto_log_cs(void *data, struct u_log_context *log)491{492struct si_context *ctx = (struct si_context *)data;493si_log_cs(ctx, log, false);494}495496void si_log_hw_flush(struct si_context *sctx)497{498if (!sctx->log)499return;500501si_log_cs(sctx, sctx->log, true);502503if (&sctx->b == sctx->screen->aux_context) {504/* The aux context isn't captured by the ddebug wrapper,505* so we dump it on a flush-by-flush basis here.506*/507FILE *f = dd_get_debug_file(false);508if (!f) {509fprintf(stderr, "radeonsi: error opening aux context dump file.\n");510} else {511dd_write_header(f, &sctx->screen->b, 0);512513fprintf(f, "Aux context dump:\n\n");514u_log_new_page_print(sctx->log, f);515516fclose(f);517}518}519}520521static const char *priority_to_string(enum radeon_bo_priority priority)522{523#define ITEM(x) [RADEON_PRIO_##x] = #x524static const char *table[64] = {525ITEM(FENCE),526ITEM(TRACE),527ITEM(SO_FILLED_SIZE),528ITEM(QUERY),529ITEM(IB1),530ITEM(IB2),531ITEM(DRAW_INDIRECT),532ITEM(INDEX_BUFFER),533ITEM(CP_DMA),534ITEM(CONST_BUFFER),535ITEM(DESCRIPTORS),536ITEM(BORDER_COLORS),537ITEM(SAMPLER_BUFFER),538ITEM(VERTEX_BUFFER),539ITEM(SHADER_RW_BUFFER),540ITEM(COMPUTE_GLOBAL),541ITEM(SAMPLER_TEXTURE),542ITEM(SHADER_RW_IMAGE),543ITEM(SAMPLER_TEXTURE_MSAA),544ITEM(COLOR_BUFFER),545ITEM(DEPTH_BUFFER),546ITEM(COLOR_BUFFER_MSAA),547ITEM(DEPTH_BUFFER_MSAA),548ITEM(SEPARATE_META),549ITEM(SHADER_BINARY),550ITEM(SHADER_RINGS),551ITEM(SCRATCH_BUFFER),552};553#undef ITEM554555assert(priority < ARRAY_SIZE(table));556return table[priority];557}558559static int bo_list_compare_va(const struct radeon_bo_list_item *a,560const struct radeon_bo_list_item *b)561{562return a->vm_address < b->vm_address ? -1 : a->vm_address > b->vm_address ? 1 : 0;563}564565static void si_dump_bo_list(struct si_context *sctx, const struct radeon_saved_cs *saved, FILE *f)566{567unsigned i, j;568569if (!saved->bo_list)570return;571572/* Sort the list according to VM adddresses first. */573qsort(saved->bo_list, saved->bo_count, sizeof(saved->bo_list[0]), (void *)bo_list_compare_va);574575fprintf(f, "Buffer list (in units of pages = 4kB):\n" COLOR_YELLOW576" Size VM start page "577"VM end page Usage" COLOR_RESET "\n");578579for (i = 0; i < saved->bo_count; i++) {580/* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */581const unsigned page_size = sctx->screen->info.gart_page_size;582uint64_t va = saved->bo_list[i].vm_address;583uint64_t size = saved->bo_list[i].bo_size;584bool hit = false;585586/* If there's unused virtual memory between 2 buffers, print it. */587if (i) {588uint64_t previous_va_end =589saved->bo_list[i - 1].vm_address + saved->bo_list[i - 1].bo_size;590591if (va > previous_va_end) {592fprintf(f, " %10" PRIu64 " -- hole --\n", (va - previous_va_end) / page_size);593}594}595596/* Print the buffer. */597fprintf(f, " %10" PRIu64 " 0x%013" PRIX64 " 0x%013" PRIX64 " ",598size / page_size, va / page_size, (va + size) / page_size);599600/* Print the usage. */601for (j = 0; j < 32; j++) {602if (!(saved->bo_list[i].priority_usage & (1u << j)))603continue;604605fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));606hit = true;607}608fprintf(f, "\n");609}610fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"611" Other buffers can still be allocated there.\n\n");612}613614static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)615{616struct pipe_framebuffer_state *state = &sctx->framebuffer.state;617struct si_texture *tex;618int i;619620for (i = 0; i < state->nr_cbufs; i++) {621if (!state->cbufs[i])622continue;623624tex = (struct si_texture *)state->cbufs[i]->texture;625u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);626si_print_texture_info(sctx->screen, tex, log);627u_log_printf(log, "\n");628}629630if (state->zsbuf) {631tex = (struct si_texture *)state->zsbuf->texture;632u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");633si_print_texture_info(sctx->screen, tex, log);634u_log_printf(log, "\n");635}636}637638typedef unsigned (*slot_remap_func)(unsigned);639640struct si_log_chunk_desc_list {641/** Pointer to memory map of buffer where the list is uploader */642uint32_t *gpu_list;643/** Reference of buffer where the list is uploaded, so that gpu_list644* is kept live. */645struct si_resource *buf;646647const char *shader_name;648const char *elem_name;649slot_remap_func slot_remap;650enum chip_class chip_class;651unsigned element_dw_size;652unsigned num_elements;653654uint32_t list[0];655};656657static void si_log_chunk_desc_list_destroy(void *data)658{659struct si_log_chunk_desc_list *chunk = data;660si_resource_reference(&chunk->buf, NULL);661FREE(chunk);662}663664static void si_log_chunk_desc_list_print(void *data, FILE *f)665{666struct si_log_chunk_desc_list *chunk = data;667unsigned sq_img_rsrc_word0 =668chunk->chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;669670for (unsigned i = 0; i < chunk->num_elements; i++) {671unsigned cpu_dw_offset = i * chunk->element_dw_size;672unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;673const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";674uint32_t *cpu_list = chunk->list + cpu_dw_offset;675uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;676677fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n", chunk->shader_name,678chunk->elem_name, i, list_note);679680switch (chunk->element_dw_size) {681case 4:682for (unsigned j = 0; j < 4; j++)683ac_dump_reg(f, chunk->chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[j],6840xffffffff);685break;686case 8:687for (unsigned j = 0; j < 8; j++)688ac_dump_reg(f, chunk->chip_class, sq_img_rsrc_word0 + j * 4, gpu_list[j], 0xffffffff);689690fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");691for (unsigned j = 0; j < 4; j++)692ac_dump_reg(f, chunk->chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[4 + j],6930xffffffff);694break;695case 16:696for (unsigned j = 0; j < 8; j++)697ac_dump_reg(f, chunk->chip_class, sq_img_rsrc_word0 + j * 4, gpu_list[j], 0xffffffff);698699fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");700for (unsigned j = 0; j < 4; j++)701ac_dump_reg(f, chunk->chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, gpu_list[4 + j],7020xffffffff);703704fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");705for (unsigned j = 0; j < 8; j++)706ac_dump_reg(f, chunk->chip_class, sq_img_rsrc_word0 + j * 4, gpu_list[8 + j],7070xffffffff);708709fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");710for (unsigned j = 0; j < 4; j++)711ac_dump_reg(f, chunk->chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, gpu_list[12 + j],7120xffffffff);713break;714}715716if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {717fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!" COLOR_RESET "\n");718}719720fprintf(f, "\n");721}722}723724static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {725.destroy = si_log_chunk_desc_list_destroy,726.print = si_log_chunk_desc_list_print,727};728729static void si_dump_descriptor_list(struct si_screen *screen, struct si_descriptors *desc,730const char *shader_name, const char *elem_name,731unsigned element_dw_size, unsigned num_elements,732slot_remap_func slot_remap, struct u_log_context *log)733{734if (!desc->list)735return;736737/* In some cases, the caller doesn't know how many elements are really738* uploaded. Reduce num_elements to fit in the range of active slots. */739unsigned active_range_dw_begin = desc->first_active_slot * desc->element_dw_size;740unsigned active_range_dw_end =741active_range_dw_begin + desc->num_active_slots * desc->element_dw_size;742743while (num_elements > 0) {744int i = slot_remap(num_elements - 1);745unsigned dw_begin = i * element_dw_size;746unsigned dw_end = dw_begin + element_dw_size;747748if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end)749break;750751num_elements--;752}753754struct si_log_chunk_desc_list *chunk =755CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list, 4 * element_dw_size * num_elements);756chunk->shader_name = shader_name;757chunk->elem_name = elem_name;758chunk->element_dw_size = element_dw_size;759chunk->num_elements = num_elements;760chunk->slot_remap = slot_remap;761chunk->chip_class = screen->info.chip_class;762763si_resource_reference(&chunk->buf, desc->buffer);764chunk->gpu_list = desc->gpu_list;765766for (unsigned i = 0; i < num_elements; ++i) {767memcpy(&chunk->list[i * element_dw_size], &desc->list[slot_remap(i) * element_dw_size],7684 * element_dw_size);769}770771u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);772}773774static unsigned si_identity(unsigned slot)775{776return slot;777}778779static void si_dump_descriptors(struct si_context *sctx, gl_shader_stage stage,780const struct si_shader_info *info, struct u_log_context *log)781{782enum pipe_shader_type processor = pipe_shader_type_from_mesa(stage);783struct si_descriptors *descs =784&sctx->descriptors[SI_DESCS_FIRST_SHADER + processor * SI_NUM_SHADER_DESCS];785static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};786const char *name = shader_name[processor];787unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;788unsigned enabled_images;789790if (info) {791enabled_constbuf = u_bit_consecutive(0, info->base.num_ubos);792enabled_shaderbuf = u_bit_consecutive(0, info->base.num_ssbos);793enabled_samplers = info->base.textures_used[0];794enabled_images = u_bit_consecutive(0, info->base.num_images);795} else {796enabled_constbuf =797sctx->const_and_shader_buffers[processor].enabled_mask >> SI_NUM_SHADER_BUFFERS;798enabled_shaderbuf = 0;799for (int i = 0; i < SI_NUM_SHADER_BUFFERS; i++) {800enabled_shaderbuf |=801(sctx->const_and_shader_buffers[processor].enabled_mask &8021llu << (SI_NUM_SHADER_BUFFERS - i - 1)) << i;803}804enabled_samplers = sctx->samplers[processor].enabled_mask;805enabled_images = sctx->images[processor].enabled_mask;806}807808if (stage == MESA_SHADER_VERTEX && sctx->vb_descriptors_buffer &&809sctx->vb_descriptors_gpu_list) {810assert(info); /* only CS may not have an info struct */811struct si_descriptors desc = {};812813desc.buffer = sctx->vb_descriptors_buffer;814desc.list = sctx->vb_descriptors_gpu_list;815desc.gpu_list = sctx->vb_descriptors_gpu_list;816desc.element_dw_size = 4;817desc.num_active_slots = sctx->vertex_elements->vb_desc_list_alloc_size / 16;818819si_dump_descriptor_list(sctx->screen, &desc, name, " - Vertex buffer", 4, info->num_inputs,820si_identity, log);821}822823si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], name,824" - Constant buffer", 4, util_last_bit(enabled_constbuf),825si_get_constbuf_slot, log);826si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], name,827" - Shader buffer", 4, util_last_bit(enabled_shaderbuf),828si_get_shaderbuf_slot, log);829si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], name,830" - Sampler", 16, util_last_bit(enabled_samplers), si_get_sampler_slot,831log);832si_dump_descriptor_list(sctx->screen, &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], name,833" - Image", 8, util_last_bit(enabled_images), si_get_image_slot, log);834}835836static void si_dump_gfx_descriptors(struct si_context *sctx,837const struct si_shader_ctx_state *state,838struct u_log_context *log)839{840if (!state->cso || !state->current)841return;842843si_dump_descriptors(sctx, state->cso->info.stage, &state->cso->info, log);844}845846static void si_dump_compute_descriptors(struct si_context *sctx, struct u_log_context *log)847{848if (!sctx->cs_shader_state.program)849return;850851si_dump_descriptors(sctx, MESA_SHADER_COMPUTE, NULL, log);852}853854struct si_shader_inst {855const char *text; /* start of disassembly for this instruction */856unsigned textlen;857unsigned size; /* instruction size = 4 or 8 */858uint64_t addr; /* instruction address */859};860861/**862* Open the given \p binary as \p rtld_binary and split the contained863* disassembly string into instructions and add them to the array864* pointed to by \p instructions, which must be sufficiently large.865*866* Labels are considered to be part of the following instruction.867*868* The caller must keep \p rtld_binary alive as long as \p instructions are869* used and then close it afterwards.870*/871static void si_add_split_disasm(struct si_screen *screen, struct ac_rtld_binary *rtld_binary,872struct si_shader_binary *binary, uint64_t *addr, unsigned *num,873struct si_shader_inst *instructions,874gl_shader_stage stage, unsigned wave_size)875{876if (!ac_rtld_open(rtld_binary, (struct ac_rtld_open_info){877.info = &screen->info,878.shader_type = stage,879.wave_size = wave_size,880.num_parts = 1,881.elf_ptrs = &binary->elf_buffer,882.elf_sizes = &binary->elf_size}))883return;884885const char *disasm;886size_t nbytes;887if (!ac_rtld_get_section_by_name(rtld_binary, ".AMDGPU.disasm", &disasm, &nbytes))888return;889890const char *end = disasm + nbytes;891while (disasm < end) {892const char *semicolon = memchr(disasm, ';', end - disasm);893if (!semicolon)894break;895896struct si_shader_inst *inst = &instructions[(*num)++];897const char *inst_end = memchr(semicolon + 1, '\n', end - semicolon - 1);898if (!inst_end)899inst_end = end;900901inst->text = disasm;902inst->textlen = inst_end - disasm;903904inst->addr = *addr;905/* More than 16 chars after ";" means the instruction is 8 bytes long. */906inst->size = inst_end - semicolon > 16 ? 8 : 4;907*addr += inst->size;908909if (inst_end == end)910break;911disasm = inst_end + 1;912}913}914915/* If the shader is being executed, print its asm instructions, and annotate916* those that are being executed right now with information about waves that917* execute them. This is most useful during a GPU hang.918*/919static void si_print_annotated_shader(struct si_shader *shader, struct ac_wave_info *waves,920unsigned num_waves, FILE *f)921{922if (!shader)923return;924925struct si_screen *screen = shader->selector->screen;926gl_shader_stage stage = shader->selector->info.stage;927uint64_t start_addr = shader->bo->gpu_address;928uint64_t end_addr = start_addr + shader->bo->b.b.width0;929unsigned i;930931/* See if any wave executes the shader. */932for (i = 0; i < num_waves; i++) {933if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)934break;935}936if (i == num_waves)937return; /* the shader is not being executed */938939/* Remember the first found wave. The waves are sorted according to PC. */940waves = &waves[i];941num_waves -= i;942943/* Get the list of instructions.944* Buffer size / 4 is the upper bound of the instruction count.945*/946unsigned num_inst = 0;947uint64_t inst_addr = start_addr;948unsigned wave_size = si_get_shader_wave_size(shader);949struct ac_rtld_binary rtld_binaries[5] = {};950struct si_shader_inst *instructions =951calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));952953if (shader->prolog) {954si_add_split_disasm(screen, &rtld_binaries[0], &shader->prolog->binary, &inst_addr, &num_inst,955instructions, stage, wave_size);956}957if (shader->previous_stage) {958si_add_split_disasm(screen, &rtld_binaries[1], &shader->previous_stage->binary, &inst_addr,959&num_inst, instructions, stage, wave_size);960}961if (shader->prolog2) {962si_add_split_disasm(screen, &rtld_binaries[2], &shader->prolog2->binary, &inst_addr,963&num_inst, instructions, stage, wave_size);964}965si_add_split_disasm(screen, &rtld_binaries[3], &shader->binary, &inst_addr, &num_inst,966instructions, stage, wave_size);967if (shader->epilog) {968si_add_split_disasm(screen, &rtld_binaries[4], &shader->epilog->binary, &inst_addr, &num_inst,969instructions, stage, wave_size);970}971972fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",973si_get_shader_name(shader));974975/* Print instructions with annotations. */976for (i = 0; i < num_inst; i++) {977struct si_shader_inst *inst = &instructions[i];978979fprintf(f, "%.*s [PC=0x%" PRIx64 ", size=%u]\n", inst->textlen, inst->text, inst->addr,980inst->size);981982/* Print which waves execute the instruction right now. */983while (num_waves && inst->addr == waves->pc) {984fprintf(f,985" " COLOR_GREEN "^ SE%u SH%u CU%u "986"SIMD%u WAVE%u EXEC=%016" PRIx64 " ",987waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);988989if (inst->size == 4) {990fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);991} else {992fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);993}994995waves->matched = true;996waves = &waves[1];997num_waves--;998}999}10001001fprintf(f, "\n\n");1002free(instructions);1003for (unsigned i = 0; i < ARRAY_SIZE(rtld_binaries); ++i)1004ac_rtld_close(&rtld_binaries[i]);1005}10061007static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)1008{1009struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];1010unsigned num_waves = ac_get_wave_info(sctx->chip_class, waves);10111012fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);10131014si_print_annotated_shader(sctx->shader.vs.current, waves, num_waves, f);1015si_print_annotated_shader(sctx->shader.tcs.current, waves, num_waves, f);1016si_print_annotated_shader(sctx->shader.tes.current, waves, num_waves, f);1017si_print_annotated_shader(sctx->shader.gs.current, waves, num_waves, f);1018si_print_annotated_shader(sctx->shader.ps.current, waves, num_waves, f);10191020/* Print waves executing shaders that are not currently bound. */1021unsigned i;1022bool found = false;1023for (i = 0; i < num_waves; i++) {1024if (waves[i].matched)1025continue;10261027if (!found) {1028fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");1029found = true;1030}1031fprintf(f,1032" SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx641033"\n",1034waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,1035waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);1036}1037if (found)1038fprintf(f, "\n\n");1039}10401041static void si_dump_command(const char *title, const char *command, FILE *f)1042{1043char line[2000];10441045FILE *p = popen(command, "r");1046if (!p)1047return;10481049fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);1050while (fgets(line, sizeof(line), p))1051fputs(line, f);1052fprintf(f, "\n\n");1053pclose(p);1054}10551056static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, unsigned flags)1057{1058struct si_context *sctx = (struct si_context *)ctx;10591060if (sctx->log)1061u_log_flush(sctx->log);10621063if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {1064si_dump_debug_registers(sctx, f);10651066si_dump_annotated_shaders(sctx, f);1067si_dump_command("Active waves (raw data)", "umr -O halt_waves -wa | column -t", f);1068si_dump_command("Wave information", "umr -O halt_waves,bits -wa", f);1069}1070}10711072void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)1073{1074struct si_shader_ctx_state *tcs_shader;10751076if (!log)1077return;10781079tcs_shader = &sctx->shader.tcs;1080if (sctx->shader.tes.cso && !sctx->shader.tcs.cso)1081tcs_shader = &sctx->fixed_func_tcs_shader;10821083si_dump_framebuffer(sctx, log);10841085si_dump_gfx_shader(sctx, &sctx->shader.vs, log);1086si_dump_gfx_shader(sctx, tcs_shader, log);1087si_dump_gfx_shader(sctx, &sctx->shader.tes, log);1088si_dump_gfx_shader(sctx, &sctx->shader.gs, log);1089si_dump_gfx_shader(sctx, &sctx->shader.ps, log);10901091si_dump_descriptor_list(sctx->screen, &sctx->descriptors[SI_DESCS_INTERNAL], "", "RW buffers",10924, sctx->descriptors[SI_DESCS_INTERNAL].num_active_slots, si_identity,1093log);1094si_dump_gfx_descriptors(sctx, &sctx->shader.vs, log);1095si_dump_gfx_descriptors(sctx, tcs_shader, log);1096si_dump_gfx_descriptors(sctx, &sctx->shader.tes, log);1097si_dump_gfx_descriptors(sctx, &sctx->shader.gs, log);1098si_dump_gfx_descriptors(sctx, &sctx->shader.ps, log);1099}11001101void si_log_compute_state(struct si_context *sctx, struct u_log_context *log)1102{1103if (!log)1104return;11051106si_dump_compute_shader(sctx, log);1107si_dump_compute_descriptors(sctx, log);1108}11091110void si_check_vm_faults(struct si_context *sctx, struct radeon_saved_cs *saved, enum ring_type ring)1111{1112struct pipe_screen *screen = sctx->b.screen;1113FILE *f;1114uint64_t addr;1115char cmd_line[4096];11161117if (!ac_vm_fault_occured(sctx->chip_class, &sctx->dmesg_timestamp, &addr))1118return;11191120f = dd_get_debug_file(false);1121if (!f)1122return;11231124fprintf(f, "VM fault report.\n\n");1125if (os_get_command_line(cmd_line, sizeof(cmd_line)))1126fprintf(f, "Command: %s\n", cmd_line);1127fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));1128fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));1129fprintf(f, "Device name: %s\n\n", screen->get_name(screen));1130fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);11311132if (sctx->apitrace_call_number)1133fprintf(f, "Last apitrace call: %u\n\n", sctx->apitrace_call_number);11341135switch (ring) {1136case RING_GFX: {1137struct u_log_context log;1138u_log_context_init(&log);11391140si_log_draw_state(sctx, &log);1141si_log_compute_state(sctx, &log);1142si_log_cs(sctx, &log, true);11431144u_log_new_page_print(&log, f);1145u_log_context_destroy(&log);1146break;1147}11481149default:1150break;1151}11521153fclose(f);11541155fprintf(stderr, "Detected a VM fault, exiting...\n");1156exit(0);1157}11581159void si_init_debug_functions(struct si_context *sctx)1160{1161sctx->b.dump_debug_state = si_dump_debug_state;11621163/* Set the initial dmesg timestamp for this context, so that1164* only new messages will be checked for VM faults.1165*/1166if (sctx->screen->debug_flags & DBG(CHECK_VM))1167ac_vm_fault_occured(sctx->chip_class, &sctx->dmesg_timestamp, NULL);1168}116911701171