Path: blob/21.2-virgl/src/freedreno/decode/cffdec.c
4565 views
/*1* Copyright (c) 2012 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include <assert.h>24#include <ctype.h>25#include <err.h>26#include <errno.h>27#include <fcntl.h>28#include <inttypes.h>29#include <signal.h>30#include <stdarg.h>31#include <stdbool.h>32#include <stdint.h>33#include <stdio.h>34#include <stdlib.h>35#include <string.h>36#include <unistd.h>37#include <sys/stat.h>38#include <sys/types.h>39#include <sys/wait.h>4041#include "freedreno_pm4.h"4243#include "buffers.h"44#include "cffdec.h"45#include "disasm.h"46#include "redump.h"47#include "rnnutil.h"48#include "script.h"4950/* ************************************************************************* */51/* originally based on kernel recovery dump code: */5253static const struct cffdec_options *options;5455static bool needs_wfi = false;56static bool summary = false;57static bool in_summary = false;58static int vertices;5960static inline unsigned61regcnt(void)62{63if (options->gpu_id >= 500)64return 0xffff;65else66return 0x7fff;67}6869static int70is_64b(void)71{72return options->gpu_id >= 500;73}7475static int draws[4];76static struct {77uint64_t base;78uint32_t size; /* in dwords */79/* Generally cmdstream consists of multiple IB calls to different80* buffers, which are themselves often re-used for each tile. The81* triggered flag serves two purposes to help make it more clear82* what part of the cmdstream is before vs after the the GPU hang:83*84* 1) if in IB2 we are passed the point within the IB2 buffer where85* the GPU hung, but IB1 is not passed the point within its86* buffer where the GPU had hung, then we know the GPU hang87* happens on a future use of that IB2 buffer.88*89* 2) if in an IB1 or IB2 buffer that is not the one where the GPU90* hung, but we've already passed the trigger point at the same91* IB level, we know that we are passed the point where the GPU92* had hung.93*94* So this is a one way switch, false->true. And a higher #'d95* IB level isn't considered triggered unless the lower #'d IB96* level is.97*/98bool triggered;99} ibs[4];100static int ib;101102static int draw_count;103static int current_draw_count;104105/* query mode.. to handle symbolic register name queries, we need to106* defer parsing query string until after gpu_id is know and rnn db107* loaded:108*/109static int *queryvals;110111static bool112quiet(int lvl)113{114if ((options->draw_filter != -1) &&115(options->draw_filter != current_draw_count))116return true;117if ((lvl >= 3) && (summary || options->querystrs || options->script))118return true;119if ((lvl >= 2) && (options->querystrs || options->script))120return true;121return false;122}123124void125printl(int lvl, const char *fmt, ...)126{127va_list args;128if (quiet(lvl))129return;130va_start(args, fmt);131vprintf(fmt, args);132va_end(args);133}134135static const char *levels[] = {136"\t",137"\t\t",138"\t\t\t",139"\t\t\t\t",140"\t\t\t\t\t",141"\t\t\t\t\t\t",142"\t\t\t\t\t\t\t",143"\t\t\t\t\t\t\t\t",144"\t\t\t\t\t\t\t\t\t",145"x",146"x",147"x",148"x",149"x",150"x",151};152153enum state_src_t {154STATE_SRC_DIRECT,155STATE_SRC_INDIRECT,156STATE_SRC_BINDLESS,157};158159/* SDS (CP_SET_DRAW_STATE) helpers: */160static void load_all_groups(int level);161static void disable_all_groups(void);162163static void dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit,164int level);165static void dump_tex_const(uint32_t *texsamp, int num_unit, int level);166167static bool168highlight_gpuaddr(uint64_t gpuaddr)169{170if (!options->color)171return false;172173if (!options->ibs[ib].base)174return false;175176if ((ib > 0) && options->ibs[ib - 1].base && !ibs[ib - 1].triggered)177return false;178179if (ibs[ib].triggered)180return true;181182if (options->ibs[ib].base != ibs[ib].base)183return false;184185uint64_t start = ibs[ib].base + 4 * (ibs[ib].size - options->ibs[ib].rem);186uint64_t end = ibs[ib].base + 4 * ibs[ib].size;187188bool triggered = (start <= gpuaddr) && (gpuaddr <= end);189190ibs[ib].triggered |= triggered;191192if (triggered)193printf("ESTIMATED CRASH LOCATION!\n");194195return triggered;196}197198static void199dump_hex(uint32_t *dwords, uint32_t sizedwords, int level)200{201int i, j;202int lastzero = 1;203204if (quiet(2))205return;206207for (i = 0; i < sizedwords; i += 8) {208int zero = 1;209210/* always show first row: */211if (i == 0)212zero = 0;213214for (j = 0; (j < 8) && (i + j < sizedwords) && zero; j++)215if (dwords[i + j])216zero = 0;217218if (zero && !lastzero)219printf("*\n");220221lastzero = zero;222223if (zero)224continue;225226uint64_t addr = gpuaddr(&dwords[i]);227bool highlight = highlight_gpuaddr(addr);228229if (highlight)230printf("\x1b[0;1;31m");231232if (is_64b()) {233printf("%016" PRIx64 ":%s", addr, levels[level]);234} else {235printf("%08x:%s", (uint32_t)addr, levels[level]);236}237238if (highlight)239printf("\x1b[0m");240241printf("%04x:", i * 4);242243for (j = 0; (j < 8) && (i + j < sizedwords); j++) {244printf(" %08x", dwords[i + j]);245}246247printf("\n");248}249}250251static void252dump_float(float *dwords, uint32_t sizedwords, int level)253{254int i;255for (i = 0; i < sizedwords; i++) {256if ((i % 8) == 0) {257if (is_64b()) {258printf("%016" PRIx64 ":%s", gpuaddr(dwords), levels[level]);259} else {260printf("%08x:%s", (uint32_t)gpuaddr(dwords), levels[level]);261}262} else {263printf(" ");264}265printf("%8f", *(dwords++));266if ((i % 8) == 7)267printf("\n");268}269if (i % 8)270printf("\n");271}272273/* I believe the surface format is low bits:274#define RB_COLOR_INFO__COLOR_FORMAT_MASK 0x0000000fL275comments in sys2gmem_tex_const indicate that address is [31:12], but276looks like at least some of the bits above the format have different meaning..277*/278static void279parse_dword_addr(uint32_t dword, uint32_t *gpuaddr, uint32_t *flags,280uint32_t mask)281{282assert(!is_64b()); /* this is only used on a2xx */283*gpuaddr = dword & ~mask;284*flags = dword & mask;285}286287static uint32_t type0_reg_vals[0xffff + 1];288static uint8_t type0_reg_rewritten[sizeof(type0_reg_vals) /2898]; /* written since last draw */290static uint8_t type0_reg_written[sizeof(type0_reg_vals) / 8];291static uint32_t lastvals[ARRAY_SIZE(type0_reg_vals)];292293static bool294reg_rewritten(uint32_t regbase)295{296return !!(type0_reg_rewritten[regbase / 8] & (1 << (regbase % 8)));297}298299bool300reg_written(uint32_t regbase)301{302return !!(type0_reg_written[regbase / 8] & (1 << (regbase % 8)));303}304305static void306clear_rewritten(void)307{308memset(type0_reg_rewritten, 0, sizeof(type0_reg_rewritten));309}310311static void312clear_written(void)313{314memset(type0_reg_written, 0, sizeof(type0_reg_written));315clear_rewritten();316}317318uint32_t319reg_lastval(uint32_t regbase)320{321return lastvals[regbase];322}323324static void325clear_lastvals(void)326{327memset(lastvals, 0, sizeof(lastvals));328}329330uint32_t331reg_val(uint32_t regbase)332{333return type0_reg_vals[regbase];334}335336void337reg_set(uint32_t regbase, uint32_t val)338{339assert(regbase < regcnt());340type0_reg_vals[regbase] = val;341type0_reg_written[regbase / 8] |= (1 << (regbase % 8));342type0_reg_rewritten[regbase / 8] |= (1 << (regbase % 8));343}344345static void346reg_dump_scratch(const char *name, uint32_t dword, int level)347{348unsigned r;349350if (quiet(3))351return;352353r = regbase("CP_SCRATCH[0].REG");354355// if not, try old a2xx/a3xx version:356if (!r)357r = regbase("CP_SCRATCH_REG0");358359if (!r)360return;361362printf("%s:%u,%u,%u,%u\n", levels[level], reg_val(r + 4), reg_val(r + 5),363reg_val(r + 6), reg_val(r + 7));364}365366static void367dump_gpuaddr_size(uint64_t gpuaddr, int level, int sizedwords, int quietlvl)368{369void *buf;370371if (quiet(quietlvl))372return;373374buf = hostptr(gpuaddr);375if (buf) {376dump_hex(buf, sizedwords, level + 1);377}378}379380static void381dump_gpuaddr(uint64_t gpuaddr, int level)382{383dump_gpuaddr_size(gpuaddr, level, 64, 3);384}385386static void387reg_dump_gpuaddr(const char *name, uint32_t dword, int level)388{389dump_gpuaddr(dword, level);390}391392uint32_t gpuaddr_lo;393static void394reg_gpuaddr_lo(const char *name, uint32_t dword, int level)395{396gpuaddr_lo = dword;397}398399static void400reg_dump_gpuaddr_hi(const char *name, uint32_t dword, int level)401{402dump_gpuaddr(gpuaddr_lo | (((uint64_t)dword) << 32), level);403}404405static void406reg_dump_gpuaddr64(const char *name, uint64_t qword, int level)407{408dump_gpuaddr(qword, level);409}410411static void412dump_shader(const char *ext, void *buf, int bufsz)413{414if (options->dump_shaders) {415static int n = 0;416char filename[16];417int fd;418sprintf(filename, "%04d.%s", n++, ext);419fd = open(filename, O_WRONLY | O_TRUNC | O_CREAT, 0644);420if (fd != -1) {421write(fd, buf, bufsz);422close(fd);423}424}425}426427static void428disasm_gpuaddr(const char *name, uint64_t gpuaddr, int level)429{430void *buf;431432gpuaddr &= 0xfffffffffffffff0;433434if (quiet(3))435return;436437buf = hostptr(gpuaddr);438if (buf) {439uint32_t sizedwords = hostlen(gpuaddr) / 4;440const char *ext;441442dump_hex(buf, min(64, sizedwords), level + 1);443try_disasm_a3xx(buf, sizedwords, level + 2, stdout, options->gpu_id);444445/* this is a bit ugly way, but oh well.. */446if (strstr(name, "SP_VS_OBJ")) {447ext = "vo3";448} else if (strstr(name, "SP_FS_OBJ")) {449ext = "fo3";450} else if (strstr(name, "SP_GS_OBJ")) {451ext = "go3";452} else if (strstr(name, "SP_CS_OBJ")) {453ext = "co3";454} else {455ext = NULL;456}457458if (ext)459dump_shader(ext, buf, sizedwords * 4);460}461}462463static void464reg_disasm_gpuaddr(const char *name, uint32_t dword, int level)465{466disasm_gpuaddr(name, dword, level);467}468469static void470reg_disasm_gpuaddr_hi(const char *name, uint32_t dword, int level)471{472disasm_gpuaddr(name, gpuaddr_lo | (((uint64_t)dword) << 32), level);473}474475static void476reg_disasm_gpuaddr64(const char *name, uint64_t qword, int level)477{478disasm_gpuaddr(name, qword, level);479}480481/* Find the value of the TEX_COUNT register that corresponds to the named482* TEX_SAMP/TEX_CONST reg.483*484* Note, this kinda assumes an equal # of samplers and textures, but not485* really sure if there is a much better option. I suppose on a6xx we486* could instead decode the bitfields in SP_xS_CONFIG487*/488static int489get_tex_count(const char *name)490{491char count_reg[strlen(name) + 5];492char *p;493494p = strstr(name, "CONST");495if (!p)496p = strstr(name, "SAMP");497if (!p)498return 0;499500int n = p - name;501strncpy(count_reg, name, n);502strcpy(count_reg + n, "COUNT");503504return reg_val(regbase(count_reg));505}506507static void508reg_dump_tex_samp_hi(const char *name, uint32_t dword, int level)509{510if (!in_summary)511return;512513int num_unit = get_tex_count(name);514uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);515void *buf = hostptr(gpuaddr);516517if (!buf)518return;519520dump_tex_samp(buf, STATE_SRC_DIRECT, num_unit, level + 1);521}522523static void524reg_dump_tex_const_hi(const char *name, uint32_t dword, int level)525{526if (!in_summary)527return;528529int num_unit = get_tex_count(name);530uint64_t gpuaddr = gpuaddr_lo | (((uint64_t)dword) << 32);531void *buf = hostptr(gpuaddr);532533if (!buf)534return;535536dump_tex_const(buf, num_unit, level + 1);537}538539/*540* Registers with special handling (rnndec_decode() handles rest):541*/542#define REG(x, fxn) { #x, fxn }543#define REG64(x, fxn) { #x, .fxn64 = fxn, .is_reg64 = true }544static struct {545const char *regname;546void (*fxn)(const char *name, uint32_t dword, int level);547void (*fxn64)(const char *name, uint64_t qword, int level);548uint32_t regbase;549bool is_reg64;550} reg_a2xx[] = {551REG(CP_SCRATCH_REG0, reg_dump_scratch),552REG(CP_SCRATCH_REG1, reg_dump_scratch),553REG(CP_SCRATCH_REG2, reg_dump_scratch),554REG(CP_SCRATCH_REG3, reg_dump_scratch),555REG(CP_SCRATCH_REG4, reg_dump_scratch),556REG(CP_SCRATCH_REG5, reg_dump_scratch),557REG(CP_SCRATCH_REG6, reg_dump_scratch),558REG(CP_SCRATCH_REG7, reg_dump_scratch),559{NULL},560}, reg_a3xx[] = {561REG(CP_SCRATCH_REG0, reg_dump_scratch),562REG(CP_SCRATCH_REG1, reg_dump_scratch),563REG(CP_SCRATCH_REG2, reg_dump_scratch),564REG(CP_SCRATCH_REG3, reg_dump_scratch),565REG(CP_SCRATCH_REG4, reg_dump_scratch),566REG(CP_SCRATCH_REG5, reg_dump_scratch),567REG(CP_SCRATCH_REG6, reg_dump_scratch),568REG(CP_SCRATCH_REG7, reg_dump_scratch),569REG(VSC_SIZE_ADDRESS, reg_dump_gpuaddr),570REG(SP_VS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),571REG(SP_FS_PVT_MEM_ADDR_REG, reg_dump_gpuaddr),572REG(SP_VS_OBJ_START_REG, reg_disasm_gpuaddr),573REG(SP_FS_OBJ_START_REG, reg_disasm_gpuaddr),574REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),575{NULL},576}, reg_a4xx[] = {577REG(CP_SCRATCH[0].REG, reg_dump_scratch),578REG(CP_SCRATCH[0x1].REG, reg_dump_scratch),579REG(CP_SCRATCH[0x2].REG, reg_dump_scratch),580REG(CP_SCRATCH[0x3].REG, reg_dump_scratch),581REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),582REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),583REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),584REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),585REG(SP_VS_PVT_MEM_ADDR, reg_dump_gpuaddr),586REG(SP_FS_PVT_MEM_ADDR, reg_dump_gpuaddr),587REG(SP_GS_PVT_MEM_ADDR, reg_dump_gpuaddr),588REG(SP_HS_PVT_MEM_ADDR, reg_dump_gpuaddr),589REG(SP_DS_PVT_MEM_ADDR, reg_dump_gpuaddr),590REG(SP_CS_PVT_MEM_ADDR, reg_dump_gpuaddr),591REG(SP_VS_OBJ_START, reg_disasm_gpuaddr),592REG(SP_FS_OBJ_START, reg_disasm_gpuaddr),593REG(SP_GS_OBJ_START, reg_disasm_gpuaddr),594REG(SP_HS_OBJ_START, reg_disasm_gpuaddr),595REG(SP_DS_OBJ_START, reg_disasm_gpuaddr),596REG(SP_CS_OBJ_START, reg_disasm_gpuaddr),597REG(TPL1_TP_VS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),598REG(TPL1_TP_HS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),599REG(TPL1_TP_DS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),600REG(TPL1_TP_GS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),601REG(TPL1_TP_FS_BORDER_COLOR_BASE_ADDR, reg_dump_gpuaddr),602{NULL},603}, reg_a5xx[] = {604REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),605REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),606REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),607REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),608REG(SP_VS_OBJ_START_LO, reg_gpuaddr_lo),609REG(SP_VS_OBJ_START_HI, reg_disasm_gpuaddr_hi),610REG(SP_HS_OBJ_START_LO, reg_gpuaddr_lo),611REG(SP_HS_OBJ_START_HI, reg_disasm_gpuaddr_hi),612REG(SP_DS_OBJ_START_LO, reg_gpuaddr_lo),613REG(SP_DS_OBJ_START_HI, reg_disasm_gpuaddr_hi),614REG(SP_GS_OBJ_START_LO, reg_gpuaddr_lo),615REG(SP_GS_OBJ_START_HI, reg_disasm_gpuaddr_hi),616REG(SP_FS_OBJ_START_LO, reg_gpuaddr_lo),617REG(SP_FS_OBJ_START_HI, reg_disasm_gpuaddr_hi),618REG(SP_CS_OBJ_START_LO, reg_gpuaddr_lo),619REG(SP_CS_OBJ_START_HI, reg_disasm_gpuaddr_hi),620REG(TPL1_VS_TEX_CONST_LO, reg_gpuaddr_lo),621REG(TPL1_VS_TEX_CONST_HI, reg_dump_tex_const_hi),622REG(TPL1_VS_TEX_SAMP_LO, reg_gpuaddr_lo),623REG(TPL1_VS_TEX_SAMP_HI, reg_dump_tex_samp_hi),624REG(TPL1_HS_TEX_CONST_LO, reg_gpuaddr_lo),625REG(TPL1_HS_TEX_CONST_HI, reg_dump_tex_const_hi),626REG(TPL1_HS_TEX_SAMP_LO, reg_gpuaddr_lo),627REG(TPL1_HS_TEX_SAMP_HI, reg_dump_tex_samp_hi),628REG(TPL1_DS_TEX_CONST_LO, reg_gpuaddr_lo),629REG(TPL1_DS_TEX_CONST_HI, reg_dump_tex_const_hi),630REG(TPL1_DS_TEX_SAMP_LO, reg_gpuaddr_lo),631REG(TPL1_DS_TEX_SAMP_HI, reg_dump_tex_samp_hi),632REG(TPL1_GS_TEX_CONST_LO, reg_gpuaddr_lo),633REG(TPL1_GS_TEX_CONST_HI, reg_dump_tex_const_hi),634REG(TPL1_GS_TEX_SAMP_LO, reg_gpuaddr_lo),635REG(TPL1_GS_TEX_SAMP_HI, reg_dump_tex_samp_hi),636REG(TPL1_FS_TEX_CONST_LO, reg_gpuaddr_lo),637REG(TPL1_FS_TEX_CONST_HI, reg_dump_tex_const_hi),638REG(TPL1_FS_TEX_SAMP_LO, reg_gpuaddr_lo),639REG(TPL1_FS_TEX_SAMP_HI, reg_dump_tex_samp_hi),640REG(TPL1_CS_TEX_CONST_LO, reg_gpuaddr_lo),641REG(TPL1_CS_TEX_CONST_HI, reg_dump_tex_const_hi),642REG(TPL1_CS_TEX_SAMP_LO, reg_gpuaddr_lo),643REG(TPL1_CS_TEX_SAMP_HI, reg_dump_tex_samp_hi),644REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_LO, reg_gpuaddr_lo),645REG(TPL1_TP_BORDER_COLOR_BASE_ADDR_HI, reg_dump_gpuaddr_hi),646// REG(RB_MRT_FLAG_BUFFER[0].ADDR_LO, reg_gpuaddr_lo),647// REG(RB_MRT_FLAG_BUFFER[0].ADDR_HI, reg_dump_gpuaddr_hi),648// REG(RB_MRT_FLAG_BUFFER[1].ADDR_LO, reg_gpuaddr_lo),649// REG(RB_MRT_FLAG_BUFFER[1].ADDR_HI, reg_dump_gpuaddr_hi),650// REG(RB_MRT_FLAG_BUFFER[2].ADDR_LO, reg_gpuaddr_lo),651// REG(RB_MRT_FLAG_BUFFER[2].ADDR_HI, reg_dump_gpuaddr_hi),652// REG(RB_MRT_FLAG_BUFFER[3].ADDR_LO, reg_gpuaddr_lo),653// REG(RB_MRT_FLAG_BUFFER[3].ADDR_HI, reg_dump_gpuaddr_hi),654// REG(RB_MRT_FLAG_BUFFER[4].ADDR_LO, reg_gpuaddr_lo),655// REG(RB_MRT_FLAG_BUFFER[4].ADDR_HI, reg_dump_gpuaddr_hi),656// REG(RB_MRT_FLAG_BUFFER[5].ADDR_LO, reg_gpuaddr_lo),657// REG(RB_MRT_FLAG_BUFFER[5].ADDR_HI, reg_dump_gpuaddr_hi),658// REG(RB_MRT_FLAG_BUFFER[6].ADDR_LO, reg_gpuaddr_lo),659// REG(RB_MRT_FLAG_BUFFER[6].ADDR_HI, reg_dump_gpuaddr_hi),660// REG(RB_MRT_FLAG_BUFFER[7].ADDR_LO, reg_gpuaddr_lo),661// REG(RB_MRT_FLAG_BUFFER[7].ADDR_HI, reg_dump_gpuaddr_hi),662// REG(RB_BLIT_FLAG_DST_LO, reg_gpuaddr_lo),663// REG(RB_BLIT_FLAG_DST_HI, reg_dump_gpuaddr_hi),664// REG(RB_MRT[0].BASE_LO, reg_gpuaddr_lo),665// REG(RB_MRT[0].BASE_HI, reg_dump_gpuaddr_hi),666// REG(RB_DEPTH_BUFFER_BASE_LO, reg_gpuaddr_lo),667// REG(RB_DEPTH_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),668// REG(RB_DEPTH_FLAG_BUFFER_BASE_LO, reg_gpuaddr_lo),669// REG(RB_DEPTH_FLAG_BUFFER_BASE_HI, reg_dump_gpuaddr_hi),670// REG(RB_BLIT_DST_LO, reg_gpuaddr_lo),671// REG(RB_BLIT_DST_HI, reg_dump_gpuaddr_hi),672673// REG(RB_2D_SRC_LO, reg_gpuaddr_lo),674// REG(RB_2D_SRC_HI, reg_dump_gpuaddr_hi),675// REG(RB_2D_SRC_FLAGS_LO, reg_gpuaddr_lo),676// REG(RB_2D_SRC_FLAGS_HI, reg_dump_gpuaddr_hi),677// REG(RB_2D_DST_LO, reg_gpuaddr_lo),678// REG(RB_2D_DST_HI, reg_dump_gpuaddr_hi),679// REG(RB_2D_DST_FLAGS_LO, reg_gpuaddr_lo),680// REG(RB_2D_DST_FLAGS_HI, reg_dump_gpuaddr_hi),681682{NULL},683}, reg_a6xx[] = {684REG(CP_SCRATCH[0x4].REG, reg_dump_scratch),685REG(CP_SCRATCH[0x5].REG, reg_dump_scratch),686REG(CP_SCRATCH[0x6].REG, reg_dump_scratch),687REG(CP_SCRATCH[0x7].REG, reg_dump_scratch),688689REG64(SP_VS_OBJ_START, reg_disasm_gpuaddr64),690REG64(SP_HS_OBJ_START, reg_disasm_gpuaddr64),691REG64(SP_DS_OBJ_START, reg_disasm_gpuaddr64),692REG64(SP_GS_OBJ_START, reg_disasm_gpuaddr64),693REG64(SP_FS_OBJ_START, reg_disasm_gpuaddr64),694REG64(SP_CS_OBJ_START, reg_disasm_gpuaddr64),695696REG64(SP_VS_TEX_CONST, reg_dump_gpuaddr64),697REG64(SP_VS_TEX_SAMP, reg_dump_gpuaddr64),698REG64(SP_HS_TEX_CONST, reg_dump_gpuaddr64),699REG64(SP_HS_TEX_SAMP, reg_dump_gpuaddr64),700REG64(SP_DS_TEX_CONST, reg_dump_gpuaddr64),701REG64(SP_DS_TEX_SAMP, reg_dump_gpuaddr64),702REG64(SP_GS_TEX_CONST, reg_dump_gpuaddr64),703REG64(SP_GS_TEX_SAMP, reg_dump_gpuaddr64),704REG64(SP_FS_TEX_CONST, reg_dump_gpuaddr64),705REG64(SP_FS_TEX_SAMP, reg_dump_gpuaddr64),706REG64(SP_CS_TEX_CONST, reg_dump_gpuaddr64),707REG64(SP_CS_TEX_SAMP, reg_dump_gpuaddr64),708709{NULL},710}, *type0_reg;711712static struct rnn *rnn;713714static void715init_rnn(const char *gpuname)716{717rnn = rnn_new(!options->color);718719rnn_load(rnn, gpuname);720721if (options->querystrs) {722int i;723queryvals = calloc(options->nquery, sizeof(queryvals[0]));724725for (i = 0; i < options->nquery; i++) {726int val = strtol(options->querystrs[i], NULL, 0);727728if (val == 0)729val = regbase(options->querystrs[i]);730731queryvals[i] = val;732printf("querystr: %s -> 0x%x\n", options->querystrs[i], queryvals[i]);733}734}735736for (unsigned idx = 0; type0_reg[idx].regname; idx++) {737type0_reg[idx].regbase = regbase(type0_reg[idx].regname);738if (!type0_reg[idx].regbase) {739printf("invalid register name: %s\n", type0_reg[idx].regname);740exit(1);741}742}743}744745void746reset_regs(void)747{748clear_written();749clear_lastvals();750memset(&ibs, 0, sizeof(ibs));751}752753void754cffdec_init(const struct cffdec_options *_options)755{756options = _options;757summary = options->summary;758759/* in case we're decoding multiple files: */760free(queryvals);761reset_regs();762draw_count = 0;763764/* TODO we need an API to free/cleanup any previous rnn */765766switch (options->gpu_id) {767case 200 ... 299:768type0_reg = reg_a2xx;769init_rnn("a2xx");770break;771case 300 ... 399:772type0_reg = reg_a3xx;773init_rnn("a3xx");774break;775case 400 ... 499:776type0_reg = reg_a4xx;777init_rnn("a4xx");778break;779case 500 ... 599:780type0_reg = reg_a5xx;781init_rnn("a5xx");782break;783case 600 ... 699:784type0_reg = reg_a6xx;785init_rnn("a6xx");786break;787default:788errx(-1, "unsupported gpu");789}790}791792const char *793pktname(unsigned opc)794{795return rnn_enumname(rnn, "adreno_pm4_type3_packets", opc);796}797798const char *799regname(uint32_t regbase, int color)800{801return rnn_regname(rnn, regbase, color);802}803804uint32_t805regbase(const char *name)806{807return rnn_regbase(rnn, name);808}809810static int811endswith(uint32_t regbase, const char *suffix)812{813const char *name = regname(regbase, 0);814const char *s = strstr(name, suffix);815if (!s)816return 0;817return (s - strlen(name) + strlen(suffix)) == name;818}819820void821dump_register_val(uint32_t regbase, uint32_t dword, int level)822{823struct rnndecaddrinfo *info = rnn_reginfo(rnn, regbase);824825if (info && info->typeinfo) {826uint64_t gpuaddr = 0;827char *decoded = rnndec_decodeval(rnn->vc, info->typeinfo, dword);828printf("%s%s: %s", levels[level], info->name, decoded);829830/* Try and figure out if we are looking at a gpuaddr.. this831* might be useful for other gen's too, but at least a5xx has832* the _HI/_LO suffix we can look for. Maybe a better approach833* would be some special annotation in the xml..834* for a6xx use "address" and "waddress" types835*/836if (options->gpu_id >= 600) {837if (!strcmp(info->typeinfo->name, "address") ||838!strcmp(info->typeinfo->name, "waddress")) {839gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;840}841} else if (options->gpu_id >= 500) {842if (endswith(regbase, "_HI") && endswith(regbase - 1, "_LO")) {843gpuaddr = (((uint64_t)dword) << 32) | reg_val(regbase - 1);844} else if (endswith(regbase, "_LO") && endswith(regbase + 1, "_HI")) {845gpuaddr = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;846}847}848849if (gpuaddr && hostptr(gpuaddr)) {850printf("\t\tbase=%" PRIx64 ", offset=%" PRIu64 ", size=%u",851gpubaseaddr(gpuaddr), gpuaddr - gpubaseaddr(gpuaddr),852hostlen(gpubaseaddr(gpuaddr)));853}854855printf("\n");856857free(decoded);858} else if (info) {859printf("%s%s: %08x\n", levels[level], info->name, dword);860} else {861printf("%s<%04x>: %08x\n", levels[level], regbase, dword);862}863864if (info) {865free(info->name);866free(info);867}868}869870static void871dump_register(uint32_t regbase, uint32_t dword, int level)872{873if (!quiet(3)) {874dump_register_val(regbase, dword, level);875}876877for (unsigned idx = 0; type0_reg[idx].regname; idx++) {878if (type0_reg[idx].regbase == regbase) {879if (type0_reg[idx].is_reg64) {880uint64_t qword = (((uint64_t)reg_val(regbase + 1)) << 32) | dword;881type0_reg[idx].fxn64(type0_reg[idx].regname, qword, level);882} else {883type0_reg[idx].fxn(type0_reg[idx].regname, dword, level);884}885break;886}887}888}889890static bool891is_banked_reg(uint32_t regbase)892{893return (0x2000 <= regbase) && (regbase < 0x2400);894}895896static void897dump_registers(uint32_t regbase, uint32_t *dwords, uint32_t sizedwords,898int level)899{900while (sizedwords--) {901int last_summary = summary;902903/* access to non-banked registers needs a WFI:904* TODO banked register range for a2xx??905*/906if (needs_wfi && !is_banked_reg(regbase))907printl(2, "NEEDS WFI: %s (%x)\n", regname(regbase, 1), regbase);908909reg_set(regbase, *dwords);910dump_register(regbase, *dwords, level);911regbase++;912dwords++;913summary = last_summary;914}915}916917static void918dump_domain(uint32_t *dwords, uint32_t sizedwords, int level, const char *name)919{920struct rnndomain *dom;921int i;922923dom = rnn_finddomain(rnn->db, name);924925if (!dom)926return;927928if (script_packet)929script_packet(dwords, sizedwords, rnn, dom);930931if (quiet(2))932return;933934for (i = 0; i < sizedwords; i++) {935struct rnndecaddrinfo *info = rnndec_decodeaddr(rnn->vc, dom, i, 0);936char *decoded;937if (!(info && info->typeinfo))938break;939uint64_t value = dwords[i];940if (info->typeinfo->high >= 32 && i < sizedwords - 1) {941value |= (uint64_t)dwords[i + 1] << 32;942i++; /* skip the next dword since we're printing it now */943}944decoded = rnndec_decodeval(rnn->vc, info->typeinfo, value);945/* Unlike the register printing path, we don't print the name946* of the register, so if it doesn't contain other named947* things (i.e. it isn't a bitset) then print the register948* name as if it's a bitset with a single entry. This avoids949* having to create a dummy register with a single entry to950* get a name in the decoding.951*/952if (info->typeinfo->type == RNN_TTYPE_BITSET ||953info->typeinfo->type == RNN_TTYPE_INLINE_BITSET) {954printf("%s%s\n", levels[level], decoded);955} else {956printf("%s{ %s%s%s = %s }\n", levels[level], rnn->vc->colors->rname,957info->name, rnn->vc->colors->reset, decoded);958}959free(decoded);960free(info->name);961free(info);962}963}964965static uint32_t bin_x1, bin_x2, bin_y1, bin_y2;966static unsigned mode;967static const char *render_mode;968static enum {969MODE_BINNING = 0x1,970MODE_GMEM = 0x2,971MODE_BYPASS = 0x4,972MODE_ALL = MODE_BINNING | MODE_GMEM | MODE_BYPASS,973} enable_mask = MODE_ALL;974static bool skip_ib2_enable_global;975static bool skip_ib2_enable_local;976977static void978print_mode(int level)979{980if ((options->gpu_id >= 500) && !quiet(2)) {981printf("%smode: %s\n", levels[level], render_mode);982printf("%sskip_ib2: g=%d, l=%d\n", levels[level], skip_ib2_enable_global,983skip_ib2_enable_local);984}985}986987static bool988skip_query(void)989{990switch (options->query_mode) {991case QUERY_ALL:992/* never skip: */993return false;994case QUERY_WRITTEN:995for (int i = 0; i < options->nquery; i++) {996uint32_t regbase = queryvals[i];997if (!reg_written(regbase)) {998continue;999}1000if (reg_rewritten(regbase)) {1001return false;1002}1003}1004return true;1005case QUERY_DELTA:1006for (int i = 0; i < options->nquery; i++) {1007uint32_t regbase = queryvals[i];1008if (!reg_written(regbase)) {1009continue;1010}1011uint32_t lastval = reg_val(regbase);1012if (lastval != lastvals[regbase]) {1013return false;1014}1015}1016return true;1017}1018return true;1019}10201021static void1022__do_query(const char *primtype, uint32_t num_indices)1023{1024int n = 0;10251026if ((500 <= options->gpu_id) && (options->gpu_id < 700)) {1027uint32_t scissor_tl = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_TL"));1028uint32_t scissor_br = reg_val(regbase("GRAS_SC_WINDOW_SCISSOR_BR"));10291030bin_x1 = scissor_tl & 0xffff;1031bin_y1 = scissor_tl >> 16;1032bin_x2 = scissor_br & 0xffff;1033bin_y2 = scissor_br >> 16;1034}10351036for (int i = 0; i < options->nquery; i++) {1037uint32_t regbase = queryvals[i];1038if (reg_written(regbase)) {1039uint32_t lastval = reg_val(regbase);1040printf("%4d: %s(%u,%u-%u,%u):%u:", draw_count, primtype, bin_x1,1041bin_y1, bin_x2, bin_y2, num_indices);1042if (options->gpu_id >= 500)1043printf("%s:", render_mode);1044printf("\t%08x", lastval);1045if (lastval != lastvals[regbase]) {1046printf("!");1047} else {1048printf(" ");1049}1050if (reg_rewritten(regbase)) {1051printf("+");1052} else {1053printf(" ");1054}1055dump_register_val(regbase, lastval, 0);1056n++;1057}1058}10591060if (n > 1)1061printf("\n");1062}10631064static void1065do_query_compare(const char *primtype, uint32_t num_indices)1066{1067unsigned saved_enable_mask = enable_mask;1068const char *saved_render_mode = render_mode;10691070/* in 'query-compare' mode, we want to see if the register is writtten1071* or changed in any mode:1072*1073* (NOTE: this could cause false-positive for 'query-delta' if the reg1074* is written with different values in binning vs sysmem/gmem mode, as1075* we don't track previous values per-mode, but I think we can live with1076* that)1077*/1078enable_mask = MODE_ALL;10791080clear_rewritten();1081load_all_groups(0);10821083if (!skip_query()) {1084/* dump binning pass values: */1085enable_mask = MODE_BINNING;1086render_mode = "BINNING";1087clear_rewritten();1088load_all_groups(0);1089__do_query(primtype, num_indices);10901091/* dump draw pass values: */1092enable_mask = MODE_GMEM | MODE_BYPASS;1093render_mode = "DRAW";1094clear_rewritten();1095load_all_groups(0);1096__do_query(primtype, num_indices);10971098printf("\n");1099}11001101enable_mask = saved_enable_mask;1102render_mode = saved_render_mode;11031104disable_all_groups();1105}11061107/* well, actually query and script..1108* NOTE: call this before dump_register_summary()1109*/1110static void1111do_query(const char *primtype, uint32_t num_indices)1112{1113if (script_draw)1114script_draw(primtype, num_indices);11151116if (options->query_compare) {1117do_query_compare(primtype, num_indices);1118return;1119}11201121if (skip_query())1122return;11231124__do_query(primtype, num_indices);1125}11261127static void1128cp_im_loadi(uint32_t *dwords, uint32_t sizedwords, int level)1129{1130uint32_t start = dwords[1] >> 16;1131uint32_t size = dwords[1] & 0xffff;1132const char *type = NULL, *ext = NULL;1133gl_shader_stage disasm_type;11341135switch (dwords[0]) {1136case 0:1137type = "vertex";1138ext = "vo";1139disasm_type = MESA_SHADER_VERTEX;1140break;1141case 1:1142type = "fragment";1143ext = "fo";1144disasm_type = MESA_SHADER_FRAGMENT;1145break;1146default:1147type = "<unknown>";1148disasm_type = 0;1149break;1150}11511152printf("%s%s shader, start=%04x, size=%04x\n", levels[level], type, start,1153size);1154disasm_a2xx(dwords + 2, sizedwords - 2, level + 2, disasm_type);11551156/* dump raw shader: */1157if (ext)1158dump_shader(ext, dwords + 2, (sizedwords - 2) * 4);1159}11601161static void1162cp_wide_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)1163{1164uint32_t reg = dwords[0] & 0xffff;1165int i;1166for (i = 1; i < sizedwords; i++) {1167dump_register(reg, dwords[i], level + 1);1168reg_set(reg, dwords[i]);1169reg++;1170}1171}11721173enum state_t {1174TEX_SAMP = 1,1175TEX_CONST,1176TEX_MIPADDR, /* a3xx only */1177SHADER_PROG,1178SHADER_CONST,11791180// image/ssbo state:1181SSBO_0,1182SSBO_1,1183SSBO_2,11841185UBO,11861187// unknown things, just to hexdumps:1188UNKNOWN_DWORDS,1189UNKNOWN_2DWORDS,1190UNKNOWN_4DWORDS,1191};11921193enum adreno_state_block {1194SB_VERT_TEX = 0,1195SB_VERT_MIPADDR = 1,1196SB_FRAG_TEX = 2,1197SB_FRAG_MIPADDR = 3,1198SB_VERT_SHADER = 4,1199SB_GEOM_SHADER = 5,1200SB_FRAG_SHADER = 6,1201SB_COMPUTE_SHADER = 7,1202};12031204/* TODO there is probably a clever way to let rnndec parse things so1205* we don't have to care about packet format differences across gens1206*/12071208static void1209a3xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,1210enum state_t *state, enum state_src_t *src)1211{1212unsigned state_block_id = (dwords[0] >> 19) & 0x7;1213unsigned state_type = dwords[1] & 0x3;1214static const struct {1215gl_shader_stage stage;1216enum state_t state;1217} lookup[0xf][0x3] = {1218[SB_VERT_TEX][0] = {MESA_SHADER_VERTEX, TEX_SAMP},1219[SB_VERT_TEX][1] = {MESA_SHADER_VERTEX, TEX_CONST},1220[SB_FRAG_TEX][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},1221[SB_FRAG_TEX][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},1222[SB_VERT_SHADER][0] = {MESA_SHADER_VERTEX, SHADER_PROG},1223[SB_VERT_SHADER][1] = {MESA_SHADER_VERTEX, SHADER_CONST},1224[SB_FRAG_SHADER][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},1225[SB_FRAG_SHADER][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},1226};12271228*stage = lookup[state_block_id][state_type].stage;1229*state = lookup[state_block_id][state_type].state;1230unsigned state_src = (dwords[0] >> 16) & 0x7;1231if (state_src == 0 /* SS_DIRECT */)1232*src = STATE_SRC_DIRECT;1233else1234*src = STATE_SRC_INDIRECT;1235}12361237static enum state_src_t1238_get_state_src(unsigned dword0)1239{1240switch ((dword0 >> 16) & 0x3) {1241case 0: /* SS4_DIRECT / SS6_DIRECT */1242return STATE_SRC_DIRECT;1243case 2: /* SS4_INDIRECT / SS6_INDIRECT */1244return STATE_SRC_INDIRECT;1245case 1: /* SS6_BINDLESS */1246return STATE_SRC_BINDLESS;1247default:1248return STATE_SRC_DIRECT;1249}1250}12511252static void1253_get_state_type(unsigned state_block_id, unsigned state_type,1254gl_shader_stage *stage, enum state_t *state)1255{1256static const struct {1257gl_shader_stage stage;1258enum state_t state;1259} lookup[0x10][0x4] = {1260// SB4_VS_TEX:1261[0x0][0] = {MESA_SHADER_VERTEX, TEX_SAMP},1262[0x0][1] = {MESA_SHADER_VERTEX, TEX_CONST},1263[0x0][2] = {MESA_SHADER_VERTEX, UBO},1264// SB4_HS_TEX:1265[0x1][0] = {MESA_SHADER_TESS_CTRL, TEX_SAMP},1266[0x1][1] = {MESA_SHADER_TESS_CTRL, TEX_CONST},1267[0x1][2] = {MESA_SHADER_TESS_CTRL, UBO},1268// SB4_DS_TEX:1269[0x2][0] = {MESA_SHADER_TESS_EVAL, TEX_SAMP},1270[0x2][1] = {MESA_SHADER_TESS_EVAL, TEX_CONST},1271[0x2][2] = {MESA_SHADER_TESS_EVAL, UBO},1272// SB4_GS_TEX:1273[0x3][0] = {MESA_SHADER_GEOMETRY, TEX_SAMP},1274[0x3][1] = {MESA_SHADER_GEOMETRY, TEX_CONST},1275[0x3][2] = {MESA_SHADER_GEOMETRY, UBO},1276// SB4_FS_TEX:1277[0x4][0] = {MESA_SHADER_FRAGMENT, TEX_SAMP},1278[0x4][1] = {MESA_SHADER_FRAGMENT, TEX_CONST},1279[0x4][2] = {MESA_SHADER_FRAGMENT, UBO},1280// SB4_CS_TEX:1281[0x5][0] = {MESA_SHADER_COMPUTE, TEX_SAMP},1282[0x5][1] = {MESA_SHADER_COMPUTE, TEX_CONST},1283[0x5][2] = {MESA_SHADER_COMPUTE, UBO},1284// SB4_VS_SHADER:1285[0x8][0] = {MESA_SHADER_VERTEX, SHADER_PROG},1286[0x8][1] = {MESA_SHADER_VERTEX, SHADER_CONST},1287[0x8][2] = {MESA_SHADER_VERTEX, UBO},1288// SB4_HS_SHADER1289[0x9][0] = {MESA_SHADER_TESS_CTRL, SHADER_PROG},1290[0x9][1] = {MESA_SHADER_TESS_CTRL, SHADER_CONST},1291[0x9][2] = {MESA_SHADER_TESS_CTRL, UBO},1292// SB4_DS_SHADER1293[0xa][0] = {MESA_SHADER_TESS_EVAL, SHADER_PROG},1294[0xa][1] = {MESA_SHADER_TESS_EVAL, SHADER_CONST},1295[0xa][2] = {MESA_SHADER_TESS_EVAL, UBO},1296// SB4_GS_SHADER1297[0xb][0] = {MESA_SHADER_GEOMETRY, SHADER_PROG},1298[0xb][1] = {MESA_SHADER_GEOMETRY, SHADER_CONST},1299[0xb][2] = {MESA_SHADER_GEOMETRY, UBO},1300// SB4_FS_SHADER:1301[0xc][0] = {MESA_SHADER_FRAGMENT, SHADER_PROG},1302[0xc][1] = {MESA_SHADER_FRAGMENT, SHADER_CONST},1303[0xc][2] = {MESA_SHADER_FRAGMENT, UBO},1304// SB4_CS_SHADER:1305[0xd][0] = {MESA_SHADER_COMPUTE, SHADER_PROG},1306[0xd][1] = {MESA_SHADER_COMPUTE, SHADER_CONST},1307[0xd][2] = {MESA_SHADER_COMPUTE, UBO},1308[0xd][3] = {MESA_SHADER_COMPUTE, SSBO_0}, /* a6xx location */1309// SB4_SSBO (shared across all stages)1310[0xe][0] = {0, SSBO_0}, /* a5xx (and a4xx?) location */1311[0xe][1] = {0, SSBO_1},1312[0xe][2] = {0, SSBO_2},1313// SB4_CS_SSBO1314[0xf][0] = {MESA_SHADER_COMPUTE, SSBO_0},1315[0xf][1] = {MESA_SHADER_COMPUTE, SSBO_1},1316[0xf][2] = {MESA_SHADER_COMPUTE, SSBO_2},1317// unknown things1318/* This looks like combined UBO state for 3d stages (a5xx and1319* before?? I think a6xx has UBO state per shader stage:1320*/1321[0x6][2] = {0, UBO},1322[0x7][1] = {0, UNKNOWN_2DWORDS},1323};13241325*stage = lookup[state_block_id][state_type].stage;1326*state = lookup[state_block_id][state_type].state;1327}13281329static void1330a4xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,1331enum state_t *state, enum state_src_t *src)1332{1333unsigned state_block_id = (dwords[0] >> 18) & 0xf;1334unsigned state_type = dwords[1] & 0x3;1335_get_state_type(state_block_id, state_type, stage, state);1336*src = _get_state_src(dwords[0]);1337}13381339static void1340a6xx_get_state_type(uint32_t *dwords, gl_shader_stage *stage,1341enum state_t *state, enum state_src_t *src)1342{1343unsigned state_block_id = (dwords[0] >> 18) & 0xf;1344unsigned state_type = (dwords[0] >> 14) & 0x3;1345_get_state_type(state_block_id, state_type, stage, state);1346*src = _get_state_src(dwords[0]);1347}13481349static void1350dump_tex_samp(uint32_t *texsamp, enum state_src_t src, int num_unit, int level)1351{1352for (int i = 0; i < num_unit; i++) {1353/* work-around to reduce noise for opencl blob which always1354* writes the max # regardless of # of textures used1355*/1356if ((num_unit == 16) && (texsamp[0] == 0) && (texsamp[1] == 0))1357break;13581359if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {1360dump_domain(texsamp, 2, level + 2, "A3XX_TEX_SAMP");1361dump_hex(texsamp, 2, level + 1);1362texsamp += 2;1363} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {1364dump_domain(texsamp, 2, level + 2, "A4XX_TEX_SAMP");1365dump_hex(texsamp, 2, level + 1);1366texsamp += 2;1367} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {1368dump_domain(texsamp, 4, level + 2, "A5XX_TEX_SAMP");1369dump_hex(texsamp, 4, level + 1);1370texsamp += 4;1371} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {1372dump_domain(texsamp, 4, level + 2, "A6XX_TEX_SAMP");1373dump_hex(texsamp, 4, level + 1);1374texsamp += src == STATE_SRC_BINDLESS ? 16 : 4;1375}1376}1377}13781379static void1380dump_tex_const(uint32_t *texconst, int num_unit, int level)1381{1382for (int i = 0; i < num_unit; i++) {1383/* work-around to reduce noise for opencl blob which always1384* writes the max # regardless of # of textures used1385*/1386if ((num_unit == 16) && (texconst[0] == 0) && (texconst[1] == 0) &&1387(texconst[2] == 0) && (texconst[3] == 0))1388break;13891390if ((300 <= options->gpu_id) && (options->gpu_id < 400)) {1391dump_domain(texconst, 4, level + 2, "A3XX_TEX_CONST");1392dump_hex(texconst, 4, level + 1);1393texconst += 4;1394} else if ((400 <= options->gpu_id) && (options->gpu_id < 500)) {1395dump_domain(texconst, 8, level + 2, "A4XX_TEX_CONST");1396if (options->dump_textures) {1397uint32_t addr = texconst[4] & ~0x1f;1398dump_gpuaddr(addr, level - 2);1399}1400dump_hex(texconst, 8, level + 1);1401texconst += 8;1402} else if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {1403dump_domain(texconst, 12, level + 2, "A5XX_TEX_CONST");1404if (options->dump_textures) {1405uint64_t addr =1406(((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];1407dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);1408}1409dump_hex(texconst, 12, level + 1);1410texconst += 12;1411} else if ((600 <= options->gpu_id) && (options->gpu_id < 700)) {1412dump_domain(texconst, 16, level + 2, "A6XX_TEX_CONST");1413if (options->dump_textures) {1414uint64_t addr =1415(((uint64_t)texconst[5] & 0x1ffff) << 32) | texconst[4];1416dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);1417}1418dump_hex(texconst, 16, level + 1);1419texconst += 16;1420}1421}1422}14231424static void1425cp_load_state(uint32_t *dwords, uint32_t sizedwords, int level)1426{1427gl_shader_stage stage;1428enum state_t state;1429enum state_src_t src;1430uint32_t num_unit = (dwords[0] >> 22) & 0x1ff;1431uint64_t ext_src_addr;1432void *contents;1433int i;14341435if (quiet(2) && !options->script)1436return;14371438if (options->gpu_id >= 600)1439a6xx_get_state_type(dwords, &stage, &state, &src);1440else if (options->gpu_id >= 400)1441a4xx_get_state_type(dwords, &stage, &state, &src);1442else1443a3xx_get_state_type(dwords, &stage, &state, &src);14441445switch (src) {1446case STATE_SRC_DIRECT:1447ext_src_addr = 0;1448break;1449case STATE_SRC_INDIRECT:1450if (is_64b()) {1451ext_src_addr = dwords[1] & 0xfffffffc;1452ext_src_addr |= ((uint64_t)dwords[2]) << 32;1453} else {1454ext_src_addr = dwords[1] & 0xfffffffc;1455}14561457break;1458case STATE_SRC_BINDLESS: {1459const unsigned base_reg = stage == MESA_SHADER_COMPUTE1460? regbase("HLSQ_CS_BINDLESS_BASE[0].ADDR")1461: regbase("HLSQ_BINDLESS_BASE[0].ADDR");14621463if (is_64b()) {1464const unsigned reg = base_reg + (dwords[1] >> 28) * 2;1465ext_src_addr = reg_val(reg) & 0xfffffffc;1466ext_src_addr |= ((uint64_t)reg_val(reg + 1)) << 32;1467} else {1468const unsigned reg = base_reg + (dwords[1] >> 28);1469ext_src_addr = reg_val(reg) & 0xfffffffc;1470}14711472ext_src_addr += 4 * (dwords[1] & 0xffffff);1473break;1474}1475}14761477if (ext_src_addr)1478contents = hostptr(ext_src_addr);1479else1480contents = is_64b() ? dwords + 3 : dwords + 2;14811482if (!contents)1483return;14841485switch (state) {1486case SHADER_PROG: {1487const char *ext = NULL;14881489if (quiet(2))1490return;14911492if (options->gpu_id >= 400)1493num_unit *= 16;1494else if (options->gpu_id >= 300)1495num_unit *= 4;14961497/* shaders:1498*1499* note: num_unit seems to be # of instruction groups, where1500* an instruction group has 4 64bit instructions.1501*/1502if (stage == MESA_SHADER_VERTEX) {1503ext = "vo3";1504} else if (stage == MESA_SHADER_GEOMETRY) {1505ext = "go3";1506} else if (stage == MESA_SHADER_COMPUTE) {1507ext = "co3";1508} else if (stage == MESA_SHADER_FRAGMENT) {1509ext = "fo3";1510}15111512if (contents)1513try_disasm_a3xx(contents, num_unit * 2, level + 2, stdout,1514options->gpu_id);15151516/* dump raw shader: */1517if (ext)1518dump_shader(ext, contents, num_unit * 2 * 4);15191520break;1521}1522case SHADER_CONST: {1523if (quiet(2))1524return;15251526/* uniforms/consts:1527*1528* note: num_unit seems to be # of pairs of dwords??1529*/15301531if (options->gpu_id >= 400)1532num_unit *= 2;15331534dump_float(contents, num_unit * 2, level + 1);1535dump_hex(contents, num_unit * 2, level + 1);15361537break;1538}1539case TEX_MIPADDR: {1540uint32_t *addrs = contents;15411542if (quiet(2))1543return;15441545/* mipmap consts block just appears to be array of num_unit gpu addr's: */1546for (i = 0; i < num_unit; i++) {1547void *ptr = hostptr(addrs[i]);1548printf("%s%2d: %08x\n", levels[level + 1], i, addrs[i]);1549if (options->dump_textures) {1550printf("base=%08x\n", (uint32_t)gpubaseaddr(addrs[i]));1551dump_hex(ptr, hostlen(addrs[i]) / 4, level + 1);1552}1553}1554break;1555}1556case TEX_SAMP: {1557dump_tex_samp(contents, src, num_unit, level);1558break;1559}1560case TEX_CONST: {1561dump_tex_const(contents, num_unit, level);1562break;1563}1564case SSBO_0: {1565uint32_t *ssboconst = (uint32_t *)contents;15661567for (i = 0; i < num_unit; i++) {1568int sz = 4;1569if (400 <= options->gpu_id && options->gpu_id < 500) {1570dump_domain(ssboconst, 4, level + 2, "A4XX_SSBO_0");1571} else if (500 <= options->gpu_id && options->gpu_id < 600) {1572dump_domain(ssboconst, 4, level + 2, "A5XX_SSBO_0");1573} else if (600 <= options->gpu_id && options->gpu_id < 700) {1574sz = 16;1575dump_domain(ssboconst, 16, level + 2, "A6XX_IBO");1576}1577dump_hex(ssboconst, sz, level + 1);1578ssboconst += sz;1579}1580break;1581}1582case SSBO_1: {1583uint32_t *ssboconst = (uint32_t *)contents;15841585for (i = 0; i < num_unit; i++) {1586if (400 <= options->gpu_id && options->gpu_id < 500)1587dump_domain(ssboconst, 2, level + 2, "A4XX_SSBO_1");1588else if (500 <= options->gpu_id && options->gpu_id < 600)1589dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_1");1590dump_hex(ssboconst, 2, level + 1);1591ssboconst += 2;1592}1593break;1594}1595case SSBO_2: {1596uint32_t *ssboconst = (uint32_t *)contents;15971598for (i = 0; i < num_unit; i++) {1599/* TODO a4xx and a5xx might be same: */1600if ((500 <= options->gpu_id) && (options->gpu_id < 600)) {1601dump_domain(ssboconst, 2, level + 2, "A5XX_SSBO_2");1602dump_hex(ssboconst, 2, level + 1);1603}1604if (options->dump_textures) {1605uint64_t addr =1606(((uint64_t)ssboconst[1] & 0x1ffff) << 32) | ssboconst[0];1607dump_gpuaddr_size(addr, level - 2, hostlen(addr) / 4, 3);1608}1609ssboconst += 2;1610}1611break;1612}1613case UBO: {1614uint32_t *uboconst = (uint32_t *)contents;16151616for (i = 0; i < num_unit; i++) {1617// TODO probably similar on a4xx..1618if (500 <= options->gpu_id && options->gpu_id < 600)1619dump_domain(uboconst, 2, level + 2, "A5XX_UBO");1620else if (600 <= options->gpu_id && options->gpu_id < 700)1621dump_domain(uboconst, 2, level + 2, "A6XX_UBO");1622dump_hex(uboconst, 2, level + 1);1623uboconst += src == STATE_SRC_BINDLESS ? 16 : 2;1624}1625break;1626}1627case UNKNOWN_DWORDS: {1628if (quiet(2))1629return;1630dump_hex(contents, num_unit, level + 1);1631break;1632}1633case UNKNOWN_2DWORDS: {1634if (quiet(2))1635return;1636dump_hex(contents, num_unit * 2, level + 1);1637break;1638}1639case UNKNOWN_4DWORDS: {1640if (quiet(2))1641return;1642dump_hex(contents, num_unit * 4, level + 1);1643break;1644}1645default:1646if (quiet(2))1647return;1648/* hmm.. */1649dump_hex(contents, num_unit, level + 1);1650break;1651}1652}16531654static void1655cp_set_bin(uint32_t *dwords, uint32_t sizedwords, int level)1656{1657bin_x1 = dwords[1] & 0xffff;1658bin_y1 = dwords[1] >> 16;1659bin_x2 = dwords[2] & 0xffff;1660bin_y2 = dwords[2] >> 16;1661}16621663static void1664dump_a2xx_tex_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,1665int level)1666{1667uint32_t w, h, p;1668uint32_t gpuaddr, flags, mip_gpuaddr, mip_flags;1669uint32_t min, mag, swiz, clamp_x, clamp_y, clamp_z;1670static const char *filter[] = {1671"point",1672"bilinear",1673"bicubic",1674};1675static const char *clamp[] = {1676"wrap",1677"mirror",1678"clamp-last-texel",1679};1680static const char swiznames[] = "xyzw01??";16811682/* see sys2gmem_tex_const[] in adreno_a2xxx.c */16831684/* Texture, FormatXYZW=Unsigned, ClampXYZ=Wrap/Repeat,1685* RFMode=ZeroClamp-1, Dim=1:2d, pitch1686*/1687p = (dwords[0] >> 22) << 5;1688clamp_x = (dwords[0] >> 10) & 0x3;1689clamp_y = (dwords[0] >> 13) & 0x3;1690clamp_z = (dwords[0] >> 16) & 0x3;16911692/* Format=6:8888_WZYX, EndianSwap=0:None, ReqSize=0:256bit, DimHi=0,1693* NearestClamp=1:OGL Mode1694*/1695parse_dword_addr(dwords[1], &gpuaddr, &flags, 0xfff);16961697/* Width, Height, EndianSwap=0:None */1698w = (dwords[2] & 0x1fff) + 1;1699h = ((dwords[2] >> 13) & 0x1fff) + 1;17001701/* NumFormat=0:RF, DstSelXYZW=XYZW, ExpAdj=0, MagFilt=MinFilt=0:Point,1702* Mip=2:BaseMap1703*/1704mag = (dwords[3] >> 19) & 0x3;1705min = (dwords[3] >> 21) & 0x3;1706swiz = (dwords[3] >> 1) & 0xfff;17071708/* VolMag=VolMin=0:Point, MinMipLvl=0, MaxMipLvl=1, LodBiasH=V=0,1709* Dim3d=01710*/1711// XXX17121713/* BorderColor=0:ABGRBlack, ForceBC=0:diable, TriJuice=0, Aniso=0,1714* Dim=1:2d, MipPacking=01715*/1716parse_dword_addr(dwords[5], &mip_gpuaddr, &mip_flags, 0xfff);17171718printf("%sset texture const %04x\n", levels[level], val);1719printf("%sclamp x/y/z: %s/%s/%s\n", levels[level + 1], clamp[clamp_x],1720clamp[clamp_y], clamp[clamp_z]);1721printf("%sfilter min/mag: %s/%s\n", levels[level + 1], filter[min],1722filter[mag]);1723printf("%sswizzle: %c%c%c%c\n", levels[level + 1],1724swiznames[(swiz >> 0) & 0x7], swiznames[(swiz >> 3) & 0x7],1725swiznames[(swiz >> 6) & 0x7], swiznames[(swiz >> 9) & 0x7]);1726printf("%saddr=%08x (flags=%03x), size=%dx%d, pitch=%d, format=%s\n",1727levels[level + 1], gpuaddr, flags, w, h, p,1728rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf));1729printf("%smipaddr=%08x (flags=%03x)\n", levels[level + 1], mip_gpuaddr,1730mip_flags);1731}17321733static void1734dump_a2xx_shader_const(uint32_t *dwords, uint32_t sizedwords, uint32_t val,1735int level)1736{1737int i;1738printf("%sset shader const %04x\n", levels[level], val);1739for (i = 0; i < sizedwords;) {1740uint32_t gpuaddr, flags;1741parse_dword_addr(dwords[i++], &gpuaddr, &flags, 0xf);1742void *addr = hostptr(gpuaddr);1743if (addr) {1744const char *fmt =1745rnn_enumname(rnn, "a2xx_sq_surfaceformat", flags & 0xf);1746uint32_t size = dwords[i++];1747printf("%saddr=%08x, size=%d, format=%s\n", levels[level + 1], gpuaddr,1748size, fmt);1749// TODO maybe dump these as bytes instead of dwords?1750size = (size + 3) / 4; // for now convert to dwords1751dump_hex(addr, min(size, 64), level + 1);1752if (size > min(size, 64))1753printf("%s\t\t...\n", levels[level + 1]);1754dump_float(addr, min(size, 64), level + 1);1755if (size > min(size, 64))1756printf("%s\t\t...\n", levels[level + 1]);1757}1758}1759}17601761static void1762cp_set_const(uint32_t *dwords, uint32_t sizedwords, int level)1763{1764uint32_t val = dwords[0] & 0xffff;1765switch ((dwords[0] >> 16) & 0xf) {1766case 0x0:1767dump_float((float *)(dwords + 1), sizedwords - 1, level + 1);1768break;1769case 0x1:1770/* need to figure out how const space is partitioned between1771* attributes, textures, etc..1772*/1773if (val < 0x78) {1774dump_a2xx_tex_const(dwords + 1, sizedwords - 1, val, level);1775} else {1776dump_a2xx_shader_const(dwords + 1, sizedwords - 1, val, level);1777}1778break;1779case 0x2:1780printf("%sset bool const %04x\n", levels[level], val);1781break;1782case 0x3:1783printf("%sset loop const %04x\n", levels[level], val);1784break;1785case 0x4:1786val += 0x2000;1787if (dwords[0] & 0x80000000) {1788uint32_t srcreg = dwords[1];1789uint32_t dstval = dwords[2];17901791/* TODO: not sure what happens w/ payload != 2.. */1792assert(sizedwords == 3);1793assert(srcreg < ARRAY_SIZE(type0_reg_vals));17941795/* note: rnn_regname uses a static buf so we can't do1796* two regname() calls for one printf..1797*/1798printf("%s%s = %08x + ", levels[level], regname(val, 1), dstval);1799printf("%s (%08x)\n", regname(srcreg, 1), type0_reg_vals[srcreg]);18001801dstval += type0_reg_vals[srcreg];18021803dump_registers(val, &dstval, 1, level + 1);1804} else {1805dump_registers(val, dwords + 1, sizedwords - 1, level + 1);1806}1807break;1808}1809}18101811static void dump_register_summary(int level);18121813static void1814cp_event_write(uint32_t *dwords, uint32_t sizedwords, int level)1815{1816const char *name = rnn_enumname(rnn, "vgt_event_type", dwords[0]);1817printl(2, "%sevent %s\n", levels[level], name);18181819if (name && (options->gpu_id > 500)) {1820char eventname[64];1821snprintf(eventname, sizeof(eventname), "EVENT:%s", name);1822if (!strcmp(name, "BLIT")) {1823do_query(eventname, 0);1824print_mode(level);1825dump_register_summary(level);1826}1827}1828}18291830static void1831dump_register_summary(int level)1832{1833uint32_t i;1834bool saved_summary = summary;1835summary = false;18361837in_summary = true;18381839/* dump current state of registers: */1840printl(2, "%sdraw[%i] register values\n", levels[level], draw_count);1841for (i = 0; i < regcnt(); i++) {1842uint32_t regbase = i;1843uint32_t lastval = reg_val(regbase);1844/* skip registers that haven't been updated since last draw/blit: */1845if (!(options->allregs || reg_rewritten(regbase)))1846continue;1847if (!reg_written(regbase))1848continue;1849if (lastval != lastvals[regbase]) {1850printl(2, "!");1851lastvals[regbase] = lastval;1852} else {1853printl(2, " ");1854}1855if (reg_rewritten(regbase)) {1856printl(2, "+");1857} else {1858printl(2, " ");1859}1860printl(2, "\t%08x", lastval);1861if (!quiet(2)) {1862dump_register(regbase, lastval, level);1863}1864}18651866clear_rewritten();18671868in_summary = false;18691870draw_count++;1871summary = saved_summary;1872}18731874static uint32_t1875draw_indx_common(uint32_t *dwords, int level)1876{1877uint32_t prim_type = dwords[1] & 0x1f;1878uint32_t source_select = (dwords[1] >> 6) & 0x3;1879uint32_t num_indices = dwords[2];1880const char *primtype;18811882primtype = rnn_enumname(rnn, "pc_di_primtype", prim_type);18831884do_query(primtype, num_indices);18851886printl(2, "%sdraw: %d\n", levels[level], draws[ib]);1887printl(2, "%sprim_type: %s (%d)\n", levels[level], primtype, prim_type);1888printl(2, "%ssource_select: %s (%d)\n", levels[level],1889rnn_enumname(rnn, "pc_di_src_sel", source_select), source_select);1890printl(2, "%snum_indices: %d\n", levels[level], num_indices);18911892vertices += num_indices;18931894draws[ib]++;18951896return num_indices;1897}18981899enum pc_di_index_size {1900INDEX_SIZE_IGN = 0,1901INDEX_SIZE_16_BIT = 0,1902INDEX_SIZE_32_BIT = 1,1903INDEX_SIZE_8_BIT = 2,1904INDEX_SIZE_INVALID = 0,1905};19061907static void1908cp_draw_indx(uint32_t *dwords, uint32_t sizedwords, int level)1909{1910uint32_t num_indices = draw_indx_common(dwords, level);19111912assert(!is_64b());19131914/* if we have an index buffer, dump that: */1915if (sizedwords == 5) {1916void *ptr = hostptr(dwords[3]);1917printl(2, "%sgpuaddr: %08x\n", levels[level], dwords[3]);1918printl(2, "%sidx_size: %d\n", levels[level], dwords[4]);1919if (ptr) {1920enum pc_di_index_size size =1921((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);1922if (!quiet(2)) {1923int i;1924printf("%sidxs: ", levels[level]);1925if (size == INDEX_SIZE_8_BIT) {1926uint8_t *idx = ptr;1927for (i = 0; i < dwords[4]; i++)1928printf(" %u", idx[i]);1929} else if (size == INDEX_SIZE_16_BIT) {1930uint16_t *idx = ptr;1931for (i = 0; i < dwords[4] / 2; i++)1932printf(" %u", idx[i]);1933} else if (size == INDEX_SIZE_32_BIT) {1934uint32_t *idx = ptr;1935for (i = 0; i < dwords[4] / 4; i++)1936printf(" %u", idx[i]);1937}1938printf("\n");1939dump_hex(ptr, dwords[4] / 4, level + 1);1940}1941}1942}19431944/* don't bother dumping registers for the dummy draw_indx's.. */1945if (num_indices > 0)1946dump_register_summary(level);19471948needs_wfi = true;1949}19501951static void1952cp_draw_indx_2(uint32_t *dwords, uint32_t sizedwords, int level)1953{1954uint32_t num_indices = draw_indx_common(dwords, level);1955enum pc_di_index_size size =1956((dwords[1] >> 11) & 1) | ((dwords[1] >> 12) & 2);1957void *ptr = &dwords[3];1958int sz = 0;19591960assert(!is_64b());19611962/* CP_DRAW_INDX_2 has embedded/inline idx buffer: */1963if (!quiet(2)) {1964int i;1965printf("%sidxs: ", levels[level]);1966if (size == INDEX_SIZE_8_BIT) {1967uint8_t *idx = ptr;1968for (i = 0; i < num_indices; i++)1969printf(" %u", idx[i]);1970sz = num_indices;1971} else if (size == INDEX_SIZE_16_BIT) {1972uint16_t *idx = ptr;1973for (i = 0; i < num_indices; i++)1974printf(" %u", idx[i]);1975sz = num_indices * 2;1976} else if (size == INDEX_SIZE_32_BIT) {1977uint32_t *idx = ptr;1978for (i = 0; i < num_indices; i++)1979printf(" %u", idx[i]);1980sz = num_indices * 4;1981}1982printf("\n");1983dump_hex(ptr, sz / 4, level + 1);1984}19851986/* don't bother dumping registers for the dummy draw_indx's.. */1987if (num_indices > 0)1988dump_register_summary(level);1989}19901991static void1992cp_draw_indx_offset(uint32_t *dwords, uint32_t sizedwords, int level)1993{1994uint32_t num_indices = dwords[2];1995uint32_t prim_type = dwords[0] & 0x1f;19961997do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), num_indices);1998print_mode(level);19992000/* don't bother dumping registers for the dummy draw_indx's.. */2001if (num_indices > 0)2002dump_register_summary(level);2003}20042005static void2006cp_draw_indx_indirect(uint32_t *dwords, uint32_t sizedwords, int level)2007{2008uint32_t prim_type = dwords[0] & 0x1f;2009uint64_t addr;20102011do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);2012print_mode(level);20132014if (is_64b())2015addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];2016else2017addr = dwords[1];2018dump_gpuaddr_size(addr, level, 0x10, 2);20192020if (is_64b())2021addr = (((uint64_t)dwords[5] & 0x1ffff) << 32) | dwords[4];2022else2023addr = dwords[3];2024dump_gpuaddr_size(addr, level, 0x10, 2);20252026dump_register_summary(level);2027}20282029static void2030cp_draw_indirect(uint32_t *dwords, uint32_t sizedwords, int level)2031{2032uint32_t prim_type = dwords[0] & 0x1f;2033uint64_t addr;20342035do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);2036print_mode(level);20372038addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];2039dump_gpuaddr_size(addr, level, 0x10, 2);20402041dump_register_summary(level);2042}20432044static void2045cp_draw_indirect_multi(uint32_t *dwords, uint32_t sizedwords, int level)2046{2047uint32_t prim_type = dwords[0] & 0x1f;2048uint32_t count = dwords[2];20492050do_query(rnn_enumname(rnn, "pc_di_primtype", prim_type), 0);2051print_mode(level);20522053struct rnndomain *domain = rnn_finddomain(rnn->db, "CP_DRAW_INDIRECT_MULTI");2054uint32_t count_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT_COUNT");2055uint32_t addr_dword = rnndec_decodereg(rnn->vc, domain, "INDIRECT");2056uint64_t stride_dword = rnndec_decodereg(rnn->vc, domain, "STRIDE");20572058if (count_dword) {2059uint64_t count_addr =2060((uint64_t)dwords[count_dword + 1] << 32) | dwords[count_dword];2061uint32_t *buf = hostptr(count_addr);20622063/* Don't print more draws than this if we don't know the indirect2064* count. It's possible the user will give ~0 or some other large2065* value, expecting the GPU to fill in the draw count, and we don't2066* want to print a gazillion draws in that case:2067*/2068const uint32_t max_draw_count = 0x100;20692070/* Assume the indirect count is garbage if it's larger than this2071* (quite large) value or 0. Hopefully this catches most cases.2072*/2073const uint32_t max_indirect_draw_count = 0x10000;20742075if (buf) {2076printf("%sindirect count: %u\n", levels[level], *buf);2077if (*buf == 0 || *buf > max_indirect_draw_count) {2078/* garbage value */2079count = min(count, max_draw_count);2080} else {2081/* not garbage */2082count = min(count, *buf);2083}2084} else {2085count = min(count, max_draw_count);2086}2087}20882089if (addr_dword && stride_dword) {2090uint64_t addr =2091((uint64_t)dwords[addr_dword + 1] << 32) | dwords[addr_dword];2092uint32_t stride = dwords[stride_dword];20932094for (unsigned i = 0; i < count; i++, addr += stride) {2095printf("%sdraw %d:\n", levels[level], i);2096dump_gpuaddr_size(addr, level, 0x10, 2);2097}2098}20992100dump_register_summary(level);2101}21022103static void2104cp_run_cl(uint32_t *dwords, uint32_t sizedwords, int level)2105{2106do_query("COMPUTE", 1);2107dump_register_summary(level);2108}21092110static void2111cp_nop(uint32_t *dwords, uint32_t sizedwords, int level)2112{2113const char *buf = (void *)dwords;2114int i;21152116if (quiet(3))2117return;21182119// blob doesn't use CP_NOP for string_marker but it does2120// use it for things that end up looking like, but aren't2121// ascii chars:2122if (!options->decode_markers)2123return;21242125for (i = 0; i < 4 * sizedwords; i++) {2126if (buf[i] == '\0')2127break;2128if (isascii(buf[i]))2129printf("%c", buf[i]);2130}2131printf("\n");2132}21332134static void2135cp_indirect(uint32_t *dwords, uint32_t sizedwords, int level)2136{2137/* traverse indirect buffers */2138uint64_t ibaddr;2139uint32_t ibsize;2140uint32_t *ptr = NULL;21412142if (is_64b()) {2143/* a5xx+.. high 32b of gpu addr, then size: */2144ibaddr = dwords[0];2145ibaddr |= ((uint64_t)dwords[1]) << 32;2146ibsize = dwords[2];2147} else {2148ibaddr = dwords[0];2149ibsize = dwords[1];2150}21512152if (!quiet(3)) {2153if (is_64b()) {2154printf("%sibaddr:%016" PRIx64 "\n", levels[level], ibaddr);2155} else {2156printf("%sibaddr:%08x\n", levels[level], (uint32_t)ibaddr);2157}2158printf("%sibsize:%08x\n", levels[level], ibsize);2159}21602161if (options->once && has_dumped(ibaddr, enable_mask))2162return;21632164/* 'query-compare' mode implies 'once' mode, although we need only to2165* process the cmdstream for *any* enable_mask mode, since we are2166* comparing binning vs draw reg values at the same time, ie. it is2167* not useful to process the same draw in both binning and draw pass.2168*/2169if (options->query_compare && has_dumped(ibaddr, MODE_ALL))2170return;21712172/* map gpuaddr back to hostptr: */2173ptr = hostptr(ibaddr);21742175if (ptr) {2176/* If the GPU hung within the target IB, the trigger point will be2177* just after the current CP_INDIRECT_BUFFER. Because the IB is2178* executed but never returns. Account for this by checking if2179* the IB returned:2180*/2181highlight_gpuaddr(gpuaddr(&dwords[is_64b() ? 3 : 2]));21822183ib++;2184ibs[ib].base = ibaddr;2185ibs[ib].size = ibsize;21862187dump_commands(ptr, ibsize, level);2188ib--;2189} else {2190fprintf(stderr, "could not find: %016" PRIx64 " (%d)\n", ibaddr, ibsize);2191}2192}21932194static void2195cp_wfi(uint32_t *dwords, uint32_t sizedwords, int level)2196{2197needs_wfi = false;2198}21992200static void2201cp_mem_write(uint32_t *dwords, uint32_t sizedwords, int level)2202{2203if (quiet(2))2204return;22052206if (is_64b()) {2207uint64_t gpuaddr = dwords[0] | (((uint64_t)dwords[1]) << 32);2208printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);2209dump_hex(&dwords[2], sizedwords - 2, level + 1);22102211if (pkt_is_type4(dwords[2]) || pkt_is_type7(dwords[2]))2212dump_commands(&dwords[2], sizedwords - 2, level + 1);2213} else {2214uint32_t gpuaddr = dwords[0];2215printf("%sgpuaddr:%08x\n", levels[level], gpuaddr);2216dump_float((float *)&dwords[1], sizedwords - 1, level + 1);2217}2218}22192220static void2221cp_rmw(uint32_t *dwords, uint32_t sizedwords, int level)2222{2223uint32_t val = dwords[0] & 0xffff;2224uint32_t and = dwords[1];2225uint32_t or = dwords[2];2226printl(3, "%srmw (%s & 0x%08x) | 0x%08x)\n", levels[level], regname(val, 1),2227and, or);2228if (needs_wfi)2229printl(2, "NEEDS WFI: rmw (%s & 0x%08x) | 0x%08x)\n", regname(val, 1),2230and, or);2231reg_set(val, (reg_val(val) & and) | or);2232}22332234static void2235cp_reg_mem(uint32_t *dwords, uint32_t sizedwords, int level)2236{2237uint32_t val = dwords[0] & 0xffff;2238printl(3, "%sbase register: %s\n", levels[level], regname(val, 1));22392240if (quiet(2))2241return;22422243uint64_t gpuaddr = dwords[1] | (((uint64_t)dwords[2]) << 32);2244printf("%sgpuaddr:%016" PRIx64 "\n", levels[level], gpuaddr);2245void *ptr = hostptr(gpuaddr);2246if (ptr) {2247uint32_t cnt = (dwords[0] >> 19) & 0x3ff;2248dump_hex(ptr, cnt, level + 1);2249}2250}22512252struct draw_state {2253uint16_t enable_mask;2254uint16_t flags;2255uint32_t count;2256uint64_t addr;2257};22582259struct draw_state state[32];22602261#define FLAG_DIRTY 0x12262#define FLAG_DISABLE 0x22263#define FLAG_DISABLE_ALL_GROUPS 0x42264#define FLAG_LOAD_IMMED 0x822652266static int draw_mode;22672268static void2269disable_group(unsigned group_id)2270{2271struct draw_state *ds = &state[group_id];2272memset(ds, 0, sizeof(*ds));2273}22742275static void2276disable_all_groups(void)2277{2278for (unsigned i = 0; i < ARRAY_SIZE(state); i++)2279disable_group(i);2280}22812282static void2283load_group(unsigned group_id, int level)2284{2285struct draw_state *ds = &state[group_id];22862287if (!ds->count)2288return;22892290printl(2, "%sgroup_id: %u\n", levels[level], group_id);2291printl(2, "%scount: %d\n", levels[level], ds->count);2292printl(2, "%saddr: %016llx\n", levels[level], ds->addr);2293printl(2, "%sflags: %x\n", levels[level], ds->flags);22942295if (options->gpu_id >= 600) {2296printl(2, "%senable_mask: 0x%x\n", levels[level], ds->enable_mask);22972298if (!(ds->enable_mask & enable_mask)) {2299printl(2, "%s\tskipped!\n\n", levels[level]);2300return;2301}2302}23032304void *ptr = hostptr(ds->addr);2305if (ptr) {2306if (!quiet(2))2307dump_hex(ptr, ds->count, level + 1);23082309ib++;2310dump_commands(ptr, ds->count, level + 1);2311ib--;2312}2313}23142315static void2316load_all_groups(int level)2317{2318/* sanity check, we should never recursively hit recursion here, and if2319* we do bad things happen:2320*/2321static bool loading_groups = false;2322if (loading_groups) {2323printf("ERROR: nothing in draw state should trigger recursively loading "2324"groups!\n");2325return;2326}2327loading_groups = true;2328for (unsigned i = 0; i < ARRAY_SIZE(state); i++)2329load_group(i, level);2330loading_groups = false;23312332/* in 'query-compare' mode, defer disabling all groups until we have a2333* chance to process the query:2334*/2335if (!options->query_compare)2336disable_all_groups();2337}23382339static void2340cp_set_draw_state(uint32_t *dwords, uint32_t sizedwords, int level)2341{2342uint32_t i;23432344for (i = 0; i < sizedwords;) {2345struct draw_state *ds;2346uint32_t count = dwords[i] & 0xffff;2347uint32_t group_id = (dwords[i] >> 24) & 0x1f;2348uint32_t enable_mask = (dwords[i] >> 20) & 0xf;2349uint32_t flags = (dwords[i] >> 16) & 0xf;2350uint64_t addr;23512352if (is_64b()) {2353addr = dwords[i + 1];2354addr |= ((uint64_t)dwords[i + 2]) << 32;2355i += 3;2356} else {2357addr = dwords[i + 1];2358i += 2;2359}23602361if (flags & FLAG_DISABLE_ALL_GROUPS) {2362disable_all_groups();2363continue;2364}23652366if (flags & FLAG_DISABLE) {2367disable_group(group_id);2368continue;2369}23702371assert(group_id < ARRAY_SIZE(state));2372disable_group(group_id);23732374ds = &state[group_id];23752376ds->enable_mask = enable_mask;2377ds->flags = flags;2378ds->count = count;2379ds->addr = addr;23802381if (flags & FLAG_LOAD_IMMED) {2382load_group(group_id, level);2383disable_group(group_id);2384}2385}2386}23872388static void2389cp_set_mode(uint32_t *dwords, uint32_t sizedwords, int level)2390{2391draw_mode = dwords[0];2392}23932394/* execute compute shader */2395static void2396cp_exec_cs(uint32_t *dwords, uint32_t sizedwords, int level)2397{2398do_query("compute", 0);2399dump_register_summary(level);2400}24012402static void2403cp_exec_cs_indirect(uint32_t *dwords, uint32_t sizedwords, int level)2404{2405uint64_t addr;24062407if (is_64b()) {2408addr = (((uint64_t)dwords[2] & 0x1ffff) << 32) | dwords[1];2409} else {2410addr = dwords[1];2411}24122413printl(3, "%saddr: %016llx\n", levels[level], addr);2414dump_gpuaddr_size(addr, level, 0x10, 2);24152416do_query("compute", 0);2417dump_register_summary(level);2418}24192420static void2421cp_set_marker(uint32_t *dwords, uint32_t sizedwords, int level)2422{2423render_mode = rnn_enumname(rnn, "a6xx_render_mode", dwords[0] & 0xf);24242425if (!strcmp(render_mode, "RM6_BINNING")) {2426enable_mask = MODE_BINNING;2427} else if (!strcmp(render_mode, "RM6_GMEM")) {2428enable_mask = MODE_GMEM;2429} else if (!strcmp(render_mode, "RM6_BYPASS")) {2430enable_mask = MODE_BYPASS;2431}2432}24332434static void2435cp_set_render_mode(uint32_t *dwords, uint32_t sizedwords, int level)2436{2437uint64_t addr;2438uint32_t *ptr, len;24392440assert(is_64b());24412442/* TODO seems to have two ptrs, 9 dwords total (incl pkt7 hdr)..2443* not sure if this can come in different sizes.2444*2445* First ptr doesn't seem to be cmdstream, second one does.2446*2447* Comment from downstream kernel:2448*2449* SRM -- set render mode (ex binning, direct render etc)2450* SRM is set by UMD usually at start of IB to tell CP the type of2451* preemption.2452* KMD needs to set SRM to NULL to indicate CP that rendering is2453* done by IB.2454* ------------------------------------------------------------------2455*2456* Seems to always be one of these two:2457* 70ec0008 00000001 001c0000 00000000 00000010 00000003 0000000d 001c20002458* 00000000 70ec0008 00000001 001c0000 00000000 00000000 00000003 0000000d2459* 001c2000 000000002460*2461*/24622463assert(options->gpu_id >= 500);24642465render_mode = rnn_enumname(rnn, "render_mode_cmd", dwords[0]);24662467if (sizedwords == 1)2468return;24692470addr = dwords[1];2471addr |= ((uint64_t)dwords[2]) << 32;24722473mode = dwords[3];24742475dump_gpuaddr(addr, level + 1);24762477if (sizedwords == 5)2478return;24792480assert(sizedwords == 8);24812482len = dwords[5];2483addr = dwords[6];2484addr |= ((uint64_t)dwords[7]) << 32;24852486printl(3, "%saddr: 0x%016lx\n", levels[level], addr);2487printl(3, "%slen: 0x%x\n", levels[level], len);24882489ptr = hostptr(addr);24902491if (ptr) {2492if (!quiet(2)) {2493ib++;2494dump_commands(ptr, len, level + 1);2495ib--;2496dump_hex(ptr, len, level + 1);2497}2498}2499}25002501static void2502cp_compute_checkpoint(uint32_t *dwords, uint32_t sizedwords, int level)2503{2504uint64_t addr;2505uint32_t *ptr, len;25062507assert(is_64b());2508assert(options->gpu_id >= 500);25092510assert(sizedwords == 8);25112512addr = dwords[5];2513addr |= ((uint64_t)dwords[6]) << 32;2514len = dwords[7];25152516printl(3, "%saddr: 0x%016" PRIx64 "\n", levels[level], addr);2517printl(3, "%slen: 0x%x\n", levels[level], len);25182519ptr = hostptr(addr);25202521if (ptr) {2522if (!quiet(2)) {2523ib++;2524dump_commands(ptr, len, level + 1);2525ib--;2526dump_hex(ptr, len, level + 1);2527}2528}2529}25302531static void2532cp_blit(uint32_t *dwords, uint32_t sizedwords, int level)2533{2534do_query(rnn_enumname(rnn, "cp_blit_cmd", dwords[0]), 0);2535print_mode(level);2536dump_register_summary(level);2537}25382539static void2540cp_context_reg_bunch(uint32_t *dwords, uint32_t sizedwords, int level)2541{2542int i;25432544/* NOTE: seems to write same reg multiple times.. not sure if different parts2545* of these are triggered by the FLUSH_SO_n events?? (if that is what they2546* actually are?)2547*/2548bool saved_summary = summary;2549summary = false;25502551for (i = 0; i < sizedwords; i += 2) {2552dump_register(dwords[i + 0], dwords[i + 1], level + 1);2553reg_set(dwords[i + 0], dwords[i + 1]);2554}25552556summary = saved_summary;2557}25582559static void2560cp_reg_write(uint32_t *dwords, uint32_t sizedwords, int level)2561{2562uint32_t reg = dwords[1] & 0xffff;25632564dump_register(reg, dwords[2], level + 1);2565reg_set(reg, dwords[2]);2566}25672568static void2569cp_set_ctxswitch_ib(uint32_t *dwords, uint32_t sizedwords, int level)2570{2571uint64_t addr;2572uint32_t size = dwords[2] & 0xffff;2573void *ptr;25742575addr = dwords[0] | ((uint64_t)dwords[1] << 32);25762577printf("addr=%" PRIx64 "\n", addr);2578ptr = hostptr(addr);2579if (ptr) {2580dump_commands(ptr, size, level + 1);2581}2582}25832584static void2585cp_skip_ib2_enable_global(uint32_t *dwords, uint32_t sizedwords, int level)2586{2587skip_ib2_enable_global = dwords[0];2588}25892590static void2591cp_skip_ib2_enable_local(uint32_t *dwords, uint32_t sizedwords, int level)2592{2593skip_ib2_enable_local = dwords[0];2594}25952596#define CP(x, fxn, ...) { "CP_" #x, fxn, ##__VA_ARGS__ }2597static const struct type3_op {2598const char *name;2599void (*fxn)(uint32_t *dwords, uint32_t sizedwords, int level);2600struct {2601bool load_all_groups;2602} options;2603} type3_op[] = {2604CP(NOP, cp_nop),2605CP(INDIRECT_BUFFER, cp_indirect),2606CP(INDIRECT_BUFFER_PFD, cp_indirect),2607CP(WAIT_FOR_IDLE, cp_wfi),2608CP(REG_RMW, cp_rmw),2609CP(REG_TO_MEM, cp_reg_mem),2610CP(MEM_TO_REG, cp_reg_mem), /* same layout as CP_REG_TO_MEM */2611CP(MEM_WRITE, cp_mem_write),2612CP(EVENT_WRITE, cp_event_write),2613CP(RUN_OPENCL, cp_run_cl),2614CP(DRAW_INDX, cp_draw_indx, {.load_all_groups = true}),2615CP(DRAW_INDX_2, cp_draw_indx_2, {.load_all_groups = true}),2616CP(SET_CONSTANT, cp_set_const),2617CP(IM_LOAD_IMMEDIATE, cp_im_loadi),2618CP(WIDE_REG_WRITE, cp_wide_reg_write),26192620/* for a3xx */2621CP(LOAD_STATE, cp_load_state),2622CP(SET_BIN, cp_set_bin),26232624/* for a4xx */2625CP(LOAD_STATE4, cp_load_state),2626CP(SET_DRAW_STATE, cp_set_draw_state),2627CP(DRAW_INDX_OFFSET, cp_draw_indx_offset, {.load_all_groups = true}),2628CP(EXEC_CS, cp_exec_cs, {.load_all_groups = true}),2629CP(EXEC_CS_INDIRECT, cp_exec_cs_indirect, {.load_all_groups = true}),26302631/* for a5xx */2632CP(SET_RENDER_MODE, cp_set_render_mode),2633CP(COMPUTE_CHECKPOINT, cp_compute_checkpoint),2634CP(BLIT, cp_blit),2635CP(CONTEXT_REG_BUNCH, cp_context_reg_bunch),2636CP(DRAW_INDIRECT, cp_draw_indirect, {.load_all_groups = true}),2637CP(DRAW_INDX_INDIRECT, cp_draw_indx_indirect, {.load_all_groups = true}),2638CP(DRAW_INDIRECT_MULTI, cp_draw_indirect_multi, {.load_all_groups = true}),2639CP(SKIP_IB2_ENABLE_GLOBAL, cp_skip_ib2_enable_global),2640CP(SKIP_IB2_ENABLE_LOCAL, cp_skip_ib2_enable_local),26412642/* for a6xx */2643CP(LOAD_STATE6_GEOM, cp_load_state),2644CP(LOAD_STATE6_FRAG, cp_load_state),2645CP(LOAD_STATE6, cp_load_state),2646CP(SET_MODE, cp_set_mode),2647CP(SET_MARKER, cp_set_marker),2648CP(REG_WRITE, cp_reg_write),26492650CP(SET_CTXSWITCH_IB, cp_set_ctxswitch_ib),2651};26522653static void2654noop_fxn(uint32_t *dwords, uint32_t sizedwords, int level)2655{2656}26572658static const struct type3_op *2659get_type3_op(unsigned opc)2660{2661static const struct type3_op dummy_op = {2662.fxn = noop_fxn,2663};2664const char *name = pktname(opc);26652666if (!name)2667return &dummy_op;26682669for (unsigned i = 0; i < ARRAY_SIZE(type3_op); i++)2670if (!strcmp(name, type3_op[i].name))2671return &type3_op[i];26722673return &dummy_op;2674}26752676void2677dump_commands(uint32_t *dwords, uint32_t sizedwords, int level)2678{2679int dwords_left = sizedwords;2680uint32_t count = 0; /* dword count including packet header */2681uint32_t val;26822683// assert(dwords);2684if (!dwords) {2685printf("NULL cmd buffer!\n");2686return;2687}26882689assert(ib < ARRAY_SIZE(draws));2690draws[ib] = 0;26912692while (dwords_left > 0) {26932694current_draw_count = draw_count;26952696/* hack, this looks like a -1 underflow, in some versions2697* when it tries to write zero registers via pkt02698*/2699// if ((dwords[0] >> 16) == 0xffff)2700// goto skip;27012702if (pkt_is_type0(dwords[0])) {2703printl(3, "t0");2704count = type0_pkt_size(dwords[0]) + 1;2705val = type0_pkt_offset(dwords[0]);2706assert(val < regcnt());2707printl(3, "%swrite %s%s (%04x)\n", levels[level + 1], regname(val, 1),2708(dwords[0] & 0x8000) ? " (same register)" : "", val);2709dump_registers(val, dwords + 1, count - 1, level + 2);2710if (!quiet(3))2711dump_hex(dwords, count, level + 1);2712} else if (pkt_is_type4(dwords[0])) {2713/* basically the same(ish) as type0 prior to a5xx */2714printl(3, "t4");2715count = type4_pkt_size(dwords[0]) + 1;2716val = type4_pkt_offset(dwords[0]);2717assert(val < regcnt());2718printl(3, "%swrite %s (%04x)\n", levels[level + 1], regname(val, 1),2719val);2720dump_registers(val, dwords + 1, count - 1, level + 2);2721if (!quiet(3))2722dump_hex(dwords, count, level + 1);2723#if 02724} else if (pkt_is_type1(dwords[0])) {2725printl(3, "t1");2726count = 3;2727val = dwords[0] & 0xfff;2728printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));2729dump_registers(val, dwords+1, 1, level+2);2730val = (dwords[0] >> 12) & 0xfff;2731printl(3, "%swrite %s\n", levels[level+1], regname(val, 1));2732dump_registers(val, dwords+2, 1, level+2);2733if (!quiet(3))2734dump_hex(dwords, count, level+1);2735} else if (pkt_is_type2(dwords[0])) {2736printl(3, "t2");2737printf("%sNOP\n", levels[level+1]);2738count = 1;2739if (!quiet(3))2740dump_hex(dwords, count, level+1);2741#endif2742} else if (pkt_is_type3(dwords[0])) {2743count = type3_pkt_size(dwords[0]) + 1;2744val = cp_type3_opcode(dwords[0]);2745const struct type3_op *op = get_type3_op(val);2746if (op->options.load_all_groups)2747load_all_groups(level + 1);2748printl(3, "t3");2749const char *name = pktname(val);2750if (!quiet(2)) {2751printf("\t%sopcode: %s%s%s (%02x) (%d dwords)%s\n", levels[level],2752rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,2753count, (dwords[0] & 0x1) ? " (predicated)" : "");2754}2755if (name)2756dump_domain(dwords + 1, count - 1, level + 2, name);2757op->fxn(dwords + 1, count - 1, level + 1);2758if (!quiet(2))2759dump_hex(dwords, count, level + 1);2760} else if (pkt_is_type7(dwords[0])) {2761count = type7_pkt_size(dwords[0]) + 1;2762val = cp_type7_opcode(dwords[0]);2763const struct type3_op *op = get_type3_op(val);2764if (op->options.load_all_groups)2765load_all_groups(level + 1);2766printl(3, "t7");2767const char *name = pktname(val);2768if (!quiet(2)) {2769printf("\t%sopcode: %s%s%s (%02x) (%d dwords)\n", levels[level],2770rnn->vc->colors->bctarg, name, rnn->vc->colors->reset, val,2771count);2772}2773if (name) {2774/* special hack for two packets that decode the same way2775* on a6xx:2776*/2777if (!strcmp(name, "CP_LOAD_STATE6_FRAG") ||2778!strcmp(name, "CP_LOAD_STATE6_GEOM"))2779name = "CP_LOAD_STATE6";2780dump_domain(dwords + 1, count - 1, level + 2, name);2781}2782op->fxn(dwords + 1, count - 1, level + 1);2783if (!quiet(2))2784dump_hex(dwords, count, level + 1);2785} else if (pkt_is_type2(dwords[0])) {2786printl(3, "t2");2787printl(3, "%snop\n", levels[level + 1]);2788} else {2789/* for 5xx+ we can do a passable job of looking for start of next valid2790* packet: */2791if (options->gpu_id >= 500) {2792while (dwords_left > 0) {2793if (pkt_is_type7(dwords[0]) || pkt_is_type4(dwords[0]))2794break;2795printf("bad type! %08x\n", dwords[0]);2796dwords++;2797dwords_left--;2798}2799} else {2800printf("bad type! %08x\n", dwords[0]);2801return;2802}2803}28042805dwords += count;2806dwords_left -= count;2807}28082809if (dwords_left < 0)2810printf("**** this ain't right!! dwords_left=%d\n", dwords_left);2811}281228132814