Path: blob/21.2-virgl/src/amd/vulkan/radv_debug.c
7104 views
/*1* Copyright © 2016 Red Hat.2* Copyright © 2016 Bas Nieuwenhuizen3*4* based in part on anv driver which is:5* Copyright © 2015 Intel Corporation6*7* Permission is hereby granted, free of charge, to any person obtaining a8* copy of this software and associated documentation files (the "Software"),9* to deal in the Software without restriction, including without limitation10* the rights to use, copy, modify, merge, publish, distribute, sublicense,11* and/or sell copies of the Software, and to permit persons to whom the12* Software is furnished to do so, subject to the following conditions:13*14* The above copyright notice and this permission notice (including the next15* paragraph) shall be included in all copies or substantial portions of the16* Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR19* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,20* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL21* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER22* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING23* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS24* IN THE SOFTWARE.25*/2627#include <stdio.h>28#include <stdlib.h>29#ifndef _WIN3230#include <sys/utsname.h>31#endif32#include <sys/stat.h>3334#include "util/mesa-sha1.h"35#include "ac_debug.h"36#include "radv_debug.h"37#include "radv_shader.h"38#include "sid.h"3940#define TRACE_BO_SIZE 409641#define TMA_BO_SIZE 40964243#define COLOR_RESET "\033[0m"44#define COLOR_RED "\033[31m"45#define COLOR_GREEN "\033[1;32m"46#define COLOR_YELLOW "\033[1;33m"47#define COLOR_CYAN "\033[1;36m"4849#define RADV_DUMP_DIR "radv_dumps"5051/* Trace BO layout (offsets are 4 bytes):52*53* [0]: primary trace ID54* [1]: secondary trace ID55* [2-3]: 64-bit GFX ring pipeline pointer56* [4-5]: 64-bit COMPUTE ring pipeline pointer57* [6-7]: Vertex descriptors pointer58* [8-9]: 64-bit descriptor set #0 pointer59* ...60* [68-69]: 64-bit descriptor set #31 pointer61*/6263bool64radv_init_trace(struct radv_device *device)65{66struct radeon_winsys *ws = device->ws;67VkResult result;6869result = ws->buffer_create(70ws, TRACE_BO_SIZE, 8, RADEON_DOMAIN_VRAM,71RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING | RADEON_FLAG_ZERO_VRAM,72RADV_BO_PRIORITY_UPLOAD_BUFFER, 0, &device->trace_bo);73if (result != VK_SUCCESS)74return false;7576result = ws->buffer_make_resident(ws, device->trace_bo, true);77if (result != VK_SUCCESS)78return false;7980device->trace_id_ptr = ws->buffer_map(device->trace_bo);81if (!device->trace_id_ptr)82return false;8384ac_vm_fault_occured(device->physical_device->rad_info.chip_class, &device->dmesg_timestamp,85NULL);8687return true;88}8990void91radv_finish_trace(struct radv_device *device)92{93struct radeon_winsys *ws = device->ws;9495if (unlikely(device->trace_bo)) {96ws->buffer_make_resident(ws, device->trace_bo, false);97ws->buffer_destroy(ws, device->trace_bo);98}99}100101static void102radv_dump_trace(struct radv_device *device, struct radeon_cmdbuf *cs, FILE *f)103{104fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr);105device->ws->cs_dump(cs, f, (const int *)device->trace_id_ptr, 2);106}107108static void109radv_dump_mmapped_reg(struct radv_device *device, FILE *f, unsigned offset)110{111struct radeon_winsys *ws = device->ws;112uint32_t value;113114if (ws->read_registers(ws, offset, 1, &value))115ac_dump_reg(f, device->physical_device->rad_info.chip_class, offset, value, ~0);116}117118static void119radv_dump_debug_registers(struct radv_device *device, FILE *f)120{121struct radeon_info *info = &device->physical_device->rad_info;122123fprintf(f, "Memory-mapped registers:\n");124radv_dump_mmapped_reg(device, f, R_008010_GRBM_STATUS);125126radv_dump_mmapped_reg(device, f, R_008008_GRBM_STATUS2);127radv_dump_mmapped_reg(device, f, R_008014_GRBM_STATUS_SE0);128radv_dump_mmapped_reg(device, f, R_008018_GRBM_STATUS_SE1);129radv_dump_mmapped_reg(device, f, R_008038_GRBM_STATUS_SE2);130radv_dump_mmapped_reg(device, f, R_00803C_GRBM_STATUS_SE3);131radv_dump_mmapped_reg(device, f, R_00D034_SDMA0_STATUS_REG);132radv_dump_mmapped_reg(device, f, R_00D834_SDMA1_STATUS_REG);133if (info->chip_class <= GFX8) {134radv_dump_mmapped_reg(device, f, R_000E50_SRBM_STATUS);135radv_dump_mmapped_reg(device, f, R_000E4C_SRBM_STATUS2);136radv_dump_mmapped_reg(device, f, R_000E54_SRBM_STATUS3);137}138radv_dump_mmapped_reg(device, f, R_008680_CP_STAT);139radv_dump_mmapped_reg(device, f, R_008674_CP_STALLED_STAT1);140radv_dump_mmapped_reg(device, f, R_008678_CP_STALLED_STAT2);141radv_dump_mmapped_reg(device, f, R_008670_CP_STALLED_STAT3);142radv_dump_mmapped_reg(device, f, R_008210_CP_CPC_STATUS);143radv_dump_mmapped_reg(device, f, R_008214_CP_CPC_BUSY_STAT);144radv_dump_mmapped_reg(device, f, R_008218_CP_CPC_STALLED_STAT1);145radv_dump_mmapped_reg(device, f, R_00821C_CP_CPF_STATUS);146radv_dump_mmapped_reg(device, f, R_008220_CP_CPF_BUSY_STAT);147radv_dump_mmapped_reg(device, f, R_008224_CP_CPF_STALLED_STAT1);148fprintf(f, "\n");149}150151static void152radv_dump_buffer_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)153{154fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n");155for (unsigned j = 0; j < 4; j++)156ac_dump_reg(f, chip_class, R_008F00_SQ_BUF_RSRC_WORD0 + j * 4, desc[j], 0xffffffff);157}158159static void160radv_dump_image_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)161{162unsigned sq_img_rsrc_word0 =163chip_class >= GFX10 ? R_00A000_SQ_IMG_RSRC_WORD0 : R_008F10_SQ_IMG_RSRC_WORD0;164165fprintf(f, COLOR_CYAN " Image:" COLOR_RESET "\n");166for (unsigned j = 0; j < 8; j++)167ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[j], 0xffffffff);168169fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n");170for (unsigned j = 0; j < 8; j++)171ac_dump_reg(f, chip_class, sq_img_rsrc_word0 + j * 4, desc[8 + j], 0xffffffff);172}173174static void175radv_dump_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc, FILE *f)176{177fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n");178for (unsigned j = 0; j < 4; j++) {179ac_dump_reg(f, chip_class, R_008F30_SQ_IMG_SAMP_WORD0 + j * 4, desc[j], 0xffffffff);180}181}182183static void184radv_dump_combined_image_sampler_descriptor(enum chip_class chip_class, const uint32_t *desc,185FILE *f)186{187radv_dump_image_descriptor(chip_class, desc, f);188radv_dump_sampler_descriptor(chip_class, desc + 16, f);189}190191static void192radv_dump_descriptor_set(struct radv_device *device, struct radv_descriptor_set *set, unsigned id,193FILE *f)194{195enum chip_class chip_class = device->physical_device->rad_info.chip_class;196const struct radv_descriptor_set_layout *layout;197int i;198199if (!set)200return;201layout = set->header.layout;202203for (i = 0; i < set->header.layout->binding_count; i++) {204uint32_t *desc = set->header.mapped_ptr + layout->binding[i].offset / 4;205206switch (layout->binding[i].type) {207case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:208case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:209case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:210case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:211radv_dump_buffer_descriptor(chip_class, desc, f);212break;213case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:214case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:215case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:216radv_dump_image_descriptor(chip_class, desc, f);217break;218case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:219radv_dump_combined_image_sampler_descriptor(chip_class, desc, f);220break;221case VK_DESCRIPTOR_TYPE_SAMPLER:222radv_dump_sampler_descriptor(chip_class, desc, f);223break;224case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:225case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:226case VK_DESCRIPTOR_TYPE_MUTABLE_VALVE:227case VK_DESCRIPTOR_TYPE_ACCELERATION_STRUCTURE_KHR:228/* todo */229break;230default:231assert(!"unknown descriptor type");232break;233}234fprintf(f, "\n");235}236fprintf(f, "\n\n");237}238239static void240radv_dump_descriptors(struct radv_device *device, FILE *f)241{242uint64_t *ptr = (uint64_t *)device->trace_id_ptr;243int i;244245fprintf(f, "Descriptors:\n");246for (i = 0; i < MAX_SETS; i++) {247struct radv_descriptor_set *set = *(struct radv_descriptor_set **)(ptr + i + 4);248249radv_dump_descriptor_set(device, set, i, f);250}251}252253struct radv_shader_inst {254char text[160]; /* one disasm line */255unsigned offset; /* instruction offset */256unsigned size; /* instruction size = 4 or 8 */257};258259/* Split a disassembly string into lines and add them to the array pointed260* to by "instructions". */261static void262si_add_split_disasm(const char *disasm, uint64_t start_addr, unsigned *num,263struct radv_shader_inst *instructions)264{265struct radv_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;266char *next;267268while ((next = strchr(disasm, '\n'))) {269struct radv_shader_inst *inst = &instructions[*num];270unsigned len = next - disasm;271272if (!memchr(disasm, ';', len)) {273/* Ignore everything that is not an instruction. */274disasm = next + 1;275continue;276}277278assert(len < ARRAY_SIZE(inst->text));279memcpy(inst->text, disasm, len);280inst->text[len] = 0;281inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;282283const char *semicolon = strchr(disasm, ';');284assert(semicolon);285/* More than 16 chars after ";" means the instruction is 8 bytes long. */286inst->size = next - semicolon > 16 ? 8 : 4;287288snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,289" [PC=0x%" PRIx64 ", off=%u, size=%u]", start_addr + inst->offset, inst->offset,290inst->size);291292last_inst = inst;293(*num)++;294disasm = next + 1;295}296}297298static void299radv_dump_annotated_shader(struct radv_shader_variant *shader, gl_shader_stage stage,300struct ac_wave_info *waves, unsigned num_waves, FILE *f)301{302uint64_t start_addr, end_addr;303unsigned i;304305if (!shader)306return;307308start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;309end_addr = start_addr + shader->code_size;310311/* See if any wave executes the shader. */312for (i = 0; i < num_waves; i++) {313if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)314break;315}316317if (i == num_waves)318return; /* the shader is not being executed */319320/* Remember the first found wave. The waves are sorted according to PC. */321waves = &waves[i];322num_waves -= i;323324/* Get the list of instructions.325* Buffer size / 4 is the upper bound of the instruction count.326*/327unsigned num_inst = 0;328struct radv_shader_inst *instructions =329calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));330331si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);332333fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",334radv_get_shader_name(&shader->info, stage));335336/* Print instructions with annotations. */337for (i = 0; i < num_inst; i++) {338struct radv_shader_inst *inst = &instructions[i];339340fprintf(f, "%s\n", inst->text);341342/* Print which waves execute the instruction right now. */343while (num_waves && start_addr + inst->offset == waves->pc) {344fprintf(f,345" " COLOR_GREEN "^ SE%u SH%u CU%u "346"SIMD%u WAVE%u EXEC=%016" PRIx64 " ",347waves->se, waves->sh, waves->cu, waves->simd, waves->wave, waves->exec);348349if (inst->size == 4) {350fprintf(f, "INST32=%08X" COLOR_RESET "\n", waves->inst_dw0);351} else {352fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", waves->inst_dw0, waves->inst_dw1);353}354355waves->matched = true;356waves = &waves[1];357num_waves--;358}359}360361fprintf(f, "\n\n");362free(instructions);363}364365static void366radv_dump_annotated_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,367FILE *f)368{369struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];370enum chip_class chip_class = pipeline->device->physical_device->rad_info.chip_class;371unsigned num_waves = ac_get_wave_info(chip_class, waves);372373fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET "\n\n", num_waves);374375/* Dump annotated active graphics shaders. */376unsigned stages = active_stages;377while (stages) {378int stage = u_bit_scan(&stages);379380radv_dump_annotated_shader(pipeline->shaders[stage], stage, waves, num_waves, f);381}382383/* Print waves executing shaders that are not currently bound. */384unsigned i;385bool found = false;386for (i = 0; i < num_waves; i++) {387if (waves[i].matched)388continue;389390if (!found) {391fprintf(f, COLOR_CYAN "Waves not executing currently-bound shaders:" COLOR_RESET "\n");392found = true;393}394fprintf(f,395" SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016" PRIx64 " INST=%08X %08X PC=%" PRIx64396"\n",397waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, waves[i].wave, waves[i].exec,398waves[i].inst_dw0, waves[i].inst_dw1, waves[i].pc);399}400if (found)401fprintf(f, "\n\n");402}403404static void405radv_dump_spirv(struct radv_shader_variant *shader, const char *sha1, const char *dump_dir)406{407char dump_path[512];408FILE *f;409410snprintf(dump_path, sizeof(dump_path), "%s/%s.spv", dump_dir, sha1);411412f = fopen(dump_path, "w+");413if (f) {414fwrite(shader->spirv, shader->spirv_size, 1, f);415fclose(f);416}417}418419static void420radv_dump_shader(struct radv_pipeline *pipeline, struct radv_shader_variant *shader,421gl_shader_stage stage, const char *dump_dir, FILE *f)422{423if (!shader)424return;425426fprintf(f, "%s:\n\n", radv_get_shader_name(&shader->info, stage));427428if (shader->spirv) {429unsigned char sha1[21];430char sha1buf[41];431432_mesa_sha1_compute(shader->spirv, shader->spirv_size, sha1);433_mesa_sha1_format(sha1buf, sha1);434435fprintf(f, "SPIRV (see %s.spv)\n\n", sha1buf);436radv_dump_spirv(shader, sha1buf, dump_dir);437}438439if (shader->nir_string) {440fprintf(f, "NIR:\n%s\n", shader->nir_string);441}442443fprintf(f, "%s IR:\n%s\n", pipeline->device->physical_device->use_llvm ? "LLVM" : "ACO",444shader->ir_string);445fprintf(f, "DISASM:\n%s\n", shader->disasm_string);446447radv_dump_shader_stats(pipeline->device, pipeline, stage, f);448}449450static void451radv_dump_shaders(struct radv_pipeline *pipeline, VkShaderStageFlagBits active_stages,452const char *dump_dir, FILE *f)453{454/* Dump active graphics shaders. */455unsigned stages = active_stages;456while (stages) {457int stage = u_bit_scan(&stages);458459radv_dump_shader(pipeline, pipeline->shaders[stage], stage, dump_dir, f);460}461}462463static void464radv_dump_vertex_descriptors(struct radv_pipeline *pipeline, FILE *f)465{466void *ptr = (uint64_t *)pipeline->device->trace_id_ptr;467uint32_t count = util_bitcount(pipeline->vb_desc_usage_mask);468uint32_t *vb_ptr = &((uint32_t *)ptr)[3];469470if (!count)471return;472473fprintf(f, "Num vertex %s: %d\n",474pipeline->use_per_attribute_vb_descs ? "attributes" : "bindings", count);475for (uint32_t i = 0; i < count; i++) {476uint32_t *desc = &((uint32_t *)vb_ptr)[i * 4];477uint64_t va = 0;478479va |= desc[0];480va |= (uint64_t)G_008F04_BASE_ADDRESS_HI(desc[1]) << 32;481482fprintf(f, "VBO#%d:\n", i);483fprintf(f, "\tVA: 0x%" PRIx64 "\n", va);484fprintf(f, "\tStride: %d\n", G_008F04_STRIDE(desc[1]));485fprintf(f, "\tNum records: %d (0x%x)\n", desc[2], desc[2]);486}487}488489static struct radv_pipeline *490radv_get_saved_pipeline(struct radv_device *device, enum ring_type ring)491{492uint64_t *ptr = (uint64_t *)device->trace_id_ptr;493int offset = ring == RING_GFX ? 1 : 2;494495return *(struct radv_pipeline **)(ptr + offset);496}497498static void499radv_dump_queue_state(struct radv_queue *queue, const char *dump_dir, FILE *f)500{501enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);502struct radv_pipeline *pipeline;503504fprintf(f, "RING_%s:\n", ring == RING_GFX ? "GFX" : "COMPUTE");505506pipeline = radv_get_saved_pipeline(queue->device, ring);507if (pipeline) {508radv_dump_shaders(pipeline, pipeline->active_stages, dump_dir, f);509if (!(queue->device->instance->debug_flags & RADV_DEBUG_NO_UMR))510radv_dump_annotated_shaders(pipeline, pipeline->active_stages, f);511radv_dump_vertex_descriptors(pipeline, f);512radv_dump_descriptors(queue->device, f);513}514}515516static void517radv_dump_cmd(const char *cmd, FILE *f)518{519#ifndef _WIN32520char line[2048];521FILE *p;522523p = popen(cmd, "r");524if (p) {525while (fgets(line, sizeof(line), p))526fputs(line, f);527fprintf(f, "\n");528pclose(p);529}530#endif531}532533static void534radv_dump_dmesg(FILE *f)535{536fprintf(f, "\nLast 60 lines of dmesg:\n\n");537radv_dump_cmd("dmesg | tail -n60", f);538}539540void541radv_dump_enabled_options(struct radv_device *device, FILE *f)542{543uint64_t mask;544545if (device->instance->debug_flags) {546fprintf(f, "Enabled debug options: ");547548mask = device->instance->debug_flags;549while (mask) {550int i = u_bit_scan64(&mask);551fprintf(f, "%s, ", radv_get_debug_option_name(i));552}553fprintf(f, "\n");554}555556if (device->instance->perftest_flags) {557fprintf(f, "Enabled perftest options: ");558559mask = device->instance->perftest_flags;560while (mask) {561int i = u_bit_scan64(&mask);562fprintf(f, "%s, ", radv_get_perftest_option_name(i));563}564fprintf(f, "\n");565}566}567568static void569radv_dump_app_info(struct radv_device *device, FILE *f)570{571struct radv_instance *instance = device->instance;572573fprintf(f, "Application name: %s\n", instance->vk.app_info.app_name);574fprintf(f, "Application version: %d\n", instance->vk.app_info.app_version);575fprintf(f, "Engine name: %s\n", instance->vk.app_info.engine_name);576fprintf(f, "Engine version: %d\n", instance->vk.app_info.engine_version);577fprintf(f, "API version: %d.%d.%d\n", VK_VERSION_MAJOR(instance->vk.app_info.api_version),578VK_VERSION_MINOR(instance->vk.app_info.api_version),579VK_VERSION_PATCH(instance->vk.app_info.api_version));580581radv_dump_enabled_options(device, f);582}583584static void585radv_dump_device_name(struct radv_device *device, FILE *f)586{587struct radeon_info *info = &device->physical_device->rad_info;588#ifndef _WIN32589char kernel_version[128] = {0};590struct utsname uname_data;591#endif592const char *chip_name;593594chip_name = device->ws->get_chip_name(device->ws);595596#ifdef _WIN32597fprintf(f, "Device name: %s (%s / DRM %i.%i.%i)\n\n", chip_name, device->physical_device->name,598info->drm_major, info->drm_minor, info->drm_patchlevel);599#else600if (uname(&uname_data) == 0)601snprintf(kernel_version, sizeof(kernel_version), " / %s", uname_data.release);602603fprintf(f, "Device name: %s (%s / DRM %i.%i.%i%s)\n\n", chip_name, device->physical_device->name,604info->drm_major, info->drm_minor, info->drm_patchlevel, kernel_version);605#endif606}607608static void609radv_dump_umr_ring(struct radv_queue *queue, FILE *f)610{611enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);612struct radv_device *device = queue->device;613char cmd[128];614615/* TODO: Dump compute ring. */616if (ring != RING_GFX)617return;618619sprintf(cmd, "umr -R %s 2>&1",620device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");621622fprintf(f, "\nUMR GFX ring:\n\n");623radv_dump_cmd(cmd, f);624}625626static void627radv_dump_umr_waves(struct radv_queue *queue, FILE *f)628{629enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);630struct radv_device *device = queue->device;631char cmd[128];632633/* TODO: Dump compute ring. */634if (ring != RING_GFX)635return;636637sprintf(cmd, "umr -O bits,halt_waves -wa %s 2>&1",638device->physical_device->rad_info.chip_class >= GFX10 ? "gfx_0.0.0" : "gfx");639640fprintf(f, "\nUMR GFX waves:\n\n");641radv_dump_cmd(cmd, f);642}643644static bool645radv_gpu_hang_occured(struct radv_queue *queue, enum ring_type ring)646{647struct radeon_winsys *ws = queue->device->ws;648649if (!ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx))650return true;651652return false;653}654655void656radv_check_gpu_hangs(struct radv_queue *queue, struct radeon_cmdbuf *cs)657{658struct radv_device *device = queue->device;659enum ring_type ring;660uint64_t addr;661662ring = radv_queue_family_to_ring(queue->queue_family_index);663664bool hang_occurred = radv_gpu_hang_occured(queue, ring);665bool vm_fault_occurred = false;666if (queue->device->instance->debug_flags & RADV_DEBUG_VM_FAULTS)667vm_fault_occurred = ac_vm_fault_occured(device->physical_device->rad_info.chip_class,668&device->dmesg_timestamp, &addr);669if (!hang_occurred && !vm_fault_occurred)670return;671672fprintf(stderr, "radv: GPU hang detected...\n");673674#ifndef _WIN32675/* Create a directory into $HOME/radv_dumps_<pid>_<time> to save676* various debugging info about that GPU hang.677*/678struct tm *timep, result;679time_t raw_time;680FILE *f;681char dump_dir[256], dump_path[512], buf_time[128];682683time(&raw_time);684timep = os_localtime(&raw_time, &result);685strftime(buf_time, sizeof(buf_time), "%Y.%m.%d_%H.%M.%S", timep);686687snprintf(dump_dir, sizeof(dump_dir), "%s/" RADV_DUMP_DIR "_%d_%s", debug_get_option("HOME", "."),688getpid(), buf_time);689if (mkdir(dump_dir, 0774) && errno != EEXIST) {690fprintf(stderr, "radv: can't create directory '%s' (%i).\n", dump_dir, errno);691abort();692}693694fprintf(stderr, "radv: GPU hang report will be saved to '%s'!\n", dump_dir);695696/* Dump trace file. */697snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "trace.log");698f = fopen(dump_path, "w+");699if (f) {700radv_dump_trace(queue->device, cs, f);701fclose(f);702}703704/* Dump pipeline state. */705snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "pipeline.log");706f = fopen(dump_path, "w+");707if (f) {708radv_dump_queue_state(queue, dump_dir, f);709fclose(f);710}711712if (!(device->instance->debug_flags & RADV_DEBUG_NO_UMR)) {713/* Dump UMR ring. */714snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_ring.log");715f = fopen(dump_path, "w+");716if (f) {717radv_dump_umr_ring(queue, f);718fclose(f);719}720721/* Dump UMR waves. */722snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "umr_waves.log");723f = fopen(dump_path, "w+");724if (f) {725radv_dump_umr_waves(queue, f);726fclose(f);727}728}729730/* Dump debug registers. */731snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "registers.log");732f = fopen(dump_path, "w+");733if (f) {734radv_dump_debug_registers(device, f);735fclose(f);736}737738/* Dump BO ranges. */739snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_ranges.log");740f = fopen(dump_path, "w+");741if (f) {742device->ws->dump_bo_ranges(device->ws, f);743fclose(f);744}745746/* Dump BO log. */747snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "bo_history.log");748f = fopen(dump_path, "w+");749if (f) {750device->ws->dump_bo_log(device->ws, f);751fclose(f);752}753754/* Dump VM fault info. */755if (vm_fault_occurred) {756snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "vm_fault.log");757f = fopen(dump_path, "w+");758if (f) {759fprintf(f, "VM fault report.\n\n");760fprintf(f, "Failing VM page: 0x%08" PRIx64 "\n\n", addr);761fclose(f);762}763}764765/* Dump app info. */766snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "app_info.log");767f = fopen(dump_path, "w+");768if (f) {769radv_dump_app_info(device, f);770fclose(f);771}772773/* Dump GPU info. */774snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "gpu_info.log");775f = fopen(dump_path, "w+");776if (f) {777radv_dump_device_name(device, f);778ac_print_gpu_info(&device->physical_device->rad_info, f);779fclose(f);780}781782/* Dump dmesg. */783snprintf(dump_path, sizeof(dump_path), "%s/%s", dump_dir, "dmesg.log");784f = fopen(dump_path, "w+");785if (f) {786radv_dump_dmesg(f);787fclose(f);788}789#endif790791fprintf(stderr, "radv: GPU hang report saved successfully!\n");792abort();793}794795void796radv_print_spirv(const char *data, uint32_t size, FILE *fp)797{798#ifndef _WIN32799char path[] = "/tmp/fileXXXXXX";800char command[128];801int fd;802803/* Dump the binary into a temporary file. */804fd = mkstemp(path);805if (fd < 0)806return;807808if (write(fd, data, size) == -1)809goto fail;810811/* Disassemble using spirv-dis if installed. */812sprintf(command, "spirv-dis %s", path);813radv_dump_cmd(command, fp);814815fail:816close(fd);817unlink(path);818#endif819}820821bool822radv_trap_handler_init(struct radv_device *device)823{824struct radeon_winsys *ws = device->ws;825VkResult result;826827/* Create the trap handler shader and upload it like other shaders. */828device->trap_handler_shader = radv_create_trap_handler_shader(device);829if (!device->trap_handler_shader) {830fprintf(stderr, "radv: failed to create the trap handler shader.\n");831return false;832}833834result = ws->buffer_make_resident(ws, device->trap_handler_shader->bo, true);835if (result != VK_SUCCESS)836return false;837838result = ws->buffer_create(ws, TMA_BO_SIZE, 256, RADEON_DOMAIN_VRAM,839RADEON_FLAG_CPU_ACCESS | RADEON_FLAG_NO_INTERPROCESS_SHARING |840RADEON_FLAG_ZERO_VRAM | RADEON_FLAG_32BIT,841RADV_BO_PRIORITY_SCRATCH, 0, &device->tma_bo);842if (result != VK_SUCCESS)843return false;844845result = ws->buffer_make_resident(ws, device->tma_bo, true);846if (result != VK_SUCCESS)847return false;848849device->tma_ptr = ws->buffer_map(device->tma_bo);850if (!device->tma_ptr)851return false;852853/* Upload a buffer descriptor to store various info from the trap. */854uint64_t tma_va = radv_buffer_get_va(device->tma_bo) + 16;855uint32_t desc[4];856857desc[0] = tma_va;858desc[1] = S_008F04_BASE_ADDRESS_HI(tma_va >> 32);859desc[2] = TMA_BO_SIZE;860desc[3] = S_008F0C_DST_SEL_X(V_008F0C_SQ_SEL_X) | S_008F0C_DST_SEL_Y(V_008F0C_SQ_SEL_Y) |861S_008F0C_DST_SEL_Z(V_008F0C_SQ_SEL_Z) | S_008F0C_DST_SEL_W(V_008F0C_SQ_SEL_W) |862S_008F0C_DATA_FORMAT(V_008F0C_BUF_DATA_FORMAT_32);863864memcpy(device->tma_ptr, desc, sizeof(desc));865866return true;867}868869void870radv_trap_handler_finish(struct radv_device *device)871{872struct radeon_winsys *ws = device->ws;873874if (unlikely(device->trap_handler_shader)) {875ws->buffer_make_resident(ws, device->trap_handler_shader->bo, false);876radv_shader_variant_destroy(device, device->trap_handler_shader);877}878879if (unlikely(device->tma_bo)) {880ws->buffer_make_resident(ws, device->tma_bo, false);881ws->buffer_destroy(ws, device->tma_bo);882}883}884885static struct radv_shader_variant *886radv_get_faulty_shader(struct radv_device *device, uint64_t faulty_pc)887{888struct radv_shader_variant *shader = NULL;889890mtx_lock(&device->shader_slab_mutex);891892list_for_each_entry(struct radv_shader_slab, slab, &device->shader_slabs, slabs)893{894#ifdef __GNUC__895#pragma GCC diagnostic push896#pragma GCC diagnostic ignored "-Wshadow"897#endif898list_for_each_entry(struct radv_shader_variant, s, &slab->shaders, slab_list)899{900#ifdef __GNUC__901#pragma GCC diagnostic pop902#endif903uint64_t offset = align_u64(s->bo_offset + s->code_size, 256);904uint64_t va = radv_buffer_get_va(s->bo);905906if (faulty_pc >= va + s->bo_offset && faulty_pc < va + offset) {907mtx_unlock(&device->shader_slab_mutex);908return s;909}910}911}912mtx_unlock(&device->shader_slab_mutex);913914return shader;915}916917static void918radv_dump_faulty_shader(struct radv_device *device, uint64_t faulty_pc)919{920struct radv_shader_variant *shader;921uint64_t start_addr, end_addr;922uint32_t instr_offset;923924shader = radv_get_faulty_shader(device, faulty_pc);925if (!shader)926return;927928start_addr = radv_buffer_get_va(shader->bo) + shader->bo_offset;929end_addr = start_addr + shader->code_size;930instr_offset = faulty_pc - start_addr;931932fprintf(stderr,933"Faulty shader found "934"VA=[0x%" PRIx64 "-0x%" PRIx64 "], instr_offset=%d\n",935start_addr, end_addr, instr_offset);936937/* Get the list of instructions.938* Buffer size / 4 is the upper bound of the instruction count.939*/940unsigned num_inst = 0;941struct radv_shader_inst *instructions =942calloc(shader->code_size / 4, sizeof(struct radv_shader_inst));943944/* Split the disassembly string into instructions. */945si_add_split_disasm(shader->disasm_string, start_addr, &num_inst, instructions);946947/* Print instructions with annotations. */948for (unsigned i = 0; i < num_inst; i++) {949struct radv_shader_inst *inst = &instructions[i];950951if (start_addr + inst->offset == faulty_pc) {952fprintf(stderr, "\n!!! Faulty instruction below !!!\n");953fprintf(stderr, "%s\n", inst->text);954fprintf(stderr, "\n");955} else {956fprintf(stderr, "%s\n", inst->text);957}958}959960free(instructions);961}962963struct radv_sq_hw_reg {964uint32_t status;965uint32_t trap_sts;966uint32_t hw_id;967uint32_t ib_sts;968};969970static void971radv_dump_sq_hw_regs(struct radv_device *device)972{973struct radv_sq_hw_reg *regs = (struct radv_sq_hw_reg *)&device->tma_ptr[6];974975fprintf(stderr, "\nHardware registers:\n");976if (device->physical_device->rad_info.chip_class >= GFX10) {977ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000408_SQ_WAVE_STATUS,978regs->status, ~0);979ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00040C_SQ_WAVE_TRAPSTS,980regs->trap_sts, ~0);981ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00045C_SQ_WAVE_HW_ID1,982regs->hw_id, ~0);983ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00041C_SQ_WAVE_IB_STS,984regs->ib_sts, ~0);985} else {986ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000048_SQ_WAVE_STATUS,987regs->status, ~0);988ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00004C_SQ_WAVE_TRAPSTS,989regs->trap_sts, ~0);990ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_000050_SQ_WAVE_HW_ID,991regs->hw_id, ~0);992ac_dump_reg(stderr, device->physical_device->rad_info.chip_class, R_00005C_SQ_WAVE_IB_STS,993regs->ib_sts, ~0);994}995fprintf(stderr, "\n\n");996}997998void999radv_check_trap_handler(struct radv_queue *queue)1000{1001enum ring_type ring = radv_queue_family_to_ring(queue->queue_family_index);1002struct radv_device *device = queue->device;1003struct radeon_winsys *ws = device->ws;10041005/* Wait for the context to be idle in a finite time. */1006ws->ctx_wait_idle(queue->hw_ctx, ring, queue->queue_idx);10071008/* Try to detect if the trap handler has been reached by the hw by1009* looking at ttmp0 which should be non-zero if a shader exception1010* happened.1011*/1012if (!device->tma_ptr[4])1013return;10141015#if 01016fprintf(stderr, "tma_ptr:\n");1017for (unsigned i = 0; i < 10; i++)1018fprintf(stderr, "tma_ptr[%d]=0x%x\n", i, device->tma_ptr[i]);1019#endif10201021radv_dump_sq_hw_regs(device);10221023uint32_t ttmp0 = device->tma_ptr[4];1024uint32_t ttmp1 = device->tma_ptr[5];10251026/* According to the ISA docs, 3.10 Trap and Exception Registers:1027*1028* "{ttmp1, ttmp0} = {3'h0, pc_rewind[3:0], HT[0], trapID[7:0], PC[47:0]}"1029*1030* "When the trap handler is entered, the PC of the faulting1031* instruction is: (PC - PC_rewind * 4)."1032* */1033uint8_t trap_id = (ttmp1 >> 16) & 0xff;1034uint8_t ht = (ttmp1 >> 24) & 0x1;1035uint8_t pc_rewind = (ttmp1 >> 25) & 0xf;1036uint64_t pc = (ttmp0 | ((ttmp1 & 0x0000ffffull) << 32)) - (pc_rewind * 4);10371038fprintf(stderr, "PC=0x%" PRIx64 ", trapID=%d, HT=%d, PC_rewind=%d\n", pc, trap_id, ht,1039pc_rewind);10401041radv_dump_faulty_shader(device, pc);10421043abort();1044}104510461047