Path: blob/21.2-virgl/src/freedreno/ir3/disasm-a3xx.c
4565 views
/*1* Copyright (c) 2013 Rob Clark <[email protected]>2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include <assert.h>24#include <stdbool.h>25#include <stdint.h>26#include <stdio.h>27#include <stdlib.h>28#include <string.h>2930#include <util/log.h>31#include <util/u_debug.h>3233#include "isa/isa.h"3435#include "disasm.h"36#include "instr-a3xx.h"3738static enum debug_t debug;3940static const char *levels[] = {41"",42"\t",43"\t\t",44"\t\t\t",45"\t\t\t\t",46"\t\t\t\t\t",47"\t\t\t\t\t\t",48"\t\t\t\t\t\t\t",49"\t\t\t\t\t\t\t\t",50"\t\t\t\t\t\t\t\t\t",51"x",52"x",53"x",54"x",55"x",56"x",57};5859struct disasm_ctx {60FILE *out;61struct isa_decode_options *options;62unsigned level;63unsigned extra_cycles;6465/**66* nop_count/has_end used to detect the real end of shader. Since67* in some cases there can be a epilogue following an `end` we look68* for a sequence of `nop`s following the `end`69*/70int nop_count; /* number of nop's since non-nop instruction: */71bool has_end; /* have we seen end instruction */7273int cur_n; /* current instr # */74int cur_opc_cat; /* current opc_cat */7576int sfu_delay;7778/**79* State accumulated decoding fields of the current instruction,80* handled after decoding is complete (ie. at start of next instr)81*/82struct {83bool ss;84uint8_t nop;85uint8_t repeat;86} last;8788/**89* State accumulated decoding fields of src or dst register90*/91struct {92bool half;93bool r;94enum {95FILE_GPR = 1,96FILE_CONST = 2,97} file;98unsigned num;99} reg;100101struct shader_stats *stats;102};103104static void105print_stats(struct disasm_ctx *ctx)106{107if (ctx->options->gpu_id >= 600) {108/* handle MERGEREGS case.. this isn't *entirely* accurate, as109* you can have shader stages not using merged register file,110* but it is good enough for a guestimate:111*/112unsigned n = (ctx->stats->halfreg + 1) / 2;113114ctx->stats->halfreg = 0;115ctx->stats->fullreg = MAX2(ctx->stats->fullreg, n);116}117118unsigned instructions = ctx->cur_n + ctx->extra_cycles + 1;119120fprintf(ctx->out, "%sStats:\n", levels[ctx->level]);121fprintf(ctx->out,122"%s- shaderdb: %u instr, %u nops, %u non-nops, %u mov, %u cov\n",123levels[ctx->level], instructions, ctx->stats->nops,124instructions - ctx->stats->nops, ctx->stats->mov_count,125ctx->stats->cov_count);126127fprintf(ctx->out,128"%s- shaderdb: %u last-baryf, %d half, %d full, %u constlen\n",129levels[ctx->level], ctx->stats->last_baryf,130DIV_ROUND_UP(ctx->stats->halfreg, 4),131DIV_ROUND_UP(ctx->stats->fullreg, 4),132DIV_ROUND_UP(ctx->stats->constlen, 4));133134fprintf(135ctx->out,136"%s- shaderdb: %u cat0, %u cat1, %u cat2, %u cat3, %u cat4, %u cat5, %u cat6, %u cat7\n",137levels[ctx->level], ctx->stats->instrs_per_cat[0],138ctx->stats->instrs_per_cat[1], ctx->stats->instrs_per_cat[2],139ctx->stats->instrs_per_cat[3], ctx->stats->instrs_per_cat[4],140ctx->stats->instrs_per_cat[5], ctx->stats->instrs_per_cat[6],141ctx->stats->instrs_per_cat[7]);142143fprintf(ctx->out, "%s- shaderdb: %u sstall, %u (ss), %u (sy)\n",144levels[ctx->level], ctx->stats->sstall, ctx->stats->ss,145ctx->stats->sy);146}147148/* size of largest OPC field of all the instruction categories: */149#define NOPC_BITS 6150151static const struct opc_info {152const char *name;153} opcs[1 << (3 + NOPC_BITS)] = {154#define OPC(cat, opc, name) [(opc)] = {#name}155/* clang-format off */156/* category 0: */157OPC(0, OPC_NOP, nop),158OPC(0, OPC_B, b),159OPC(0, OPC_JUMP, jump),160OPC(0, OPC_CALL, call),161OPC(0, OPC_RET, ret),162OPC(0, OPC_KILL, kill),163OPC(0, OPC_DEMOTE, demote),164OPC(0, OPC_END, end),165OPC(0, OPC_EMIT, emit),166OPC(0, OPC_CUT, cut),167OPC(0, OPC_CHMASK, chmask),168OPC(0, OPC_CHSH, chsh),169OPC(0, OPC_FLOW_REV, flow_rev),170OPC(0, OPC_PREDT, predt),171OPC(0, OPC_PREDF, predf),172OPC(0, OPC_PREDE, prede),173OPC(0, OPC_BKT, bkt),174OPC(0, OPC_STKS, stks),175OPC(0, OPC_STKR, stkr),176OPC(0, OPC_XSET, xset),177OPC(0, OPC_XCLR, xclr),178OPC(0, OPC_GETONE, getone),179OPC(0, OPC_DBG, dbg),180OPC(0, OPC_SHPS, shps),181OPC(0, OPC_SHPE, shpe),182183/* category 1: */184OPC(1, OPC_MOV, ),185OPC(1, OPC_MOVMSK, movmsk),186OPC(1, OPC_SWZ, swz),187OPC(1, OPC_SCT, sct),188OPC(1, OPC_GAT, gat),189OPC(1, OPC_BALLOT_MACRO, ballot.macro),190OPC(1, OPC_ANY_MACRO, any.macro),191OPC(1, OPC_ALL_MACRO, all.macro),192OPC(1, OPC_ELECT_MACRO, elect.macro),193OPC(1, OPC_READ_COND_MACRO, read_cond.macro),194OPC(1, OPC_READ_FIRST_MACRO, read_first.macro),195OPC(1, OPC_SWZ_SHARED_MACRO, swz_shared.macro),196197/* category 2: */198OPC(2, OPC_ADD_F, add.f),199OPC(2, OPC_MIN_F, min.f),200OPC(2, OPC_MAX_F, max.f),201OPC(2, OPC_MUL_F, mul.f),202OPC(2, OPC_SIGN_F, sign.f),203OPC(2, OPC_CMPS_F, cmps.f),204OPC(2, OPC_ABSNEG_F, absneg.f),205OPC(2, OPC_CMPV_F, cmpv.f),206OPC(2, OPC_FLOOR_F, floor.f),207OPC(2, OPC_CEIL_F, ceil.f),208OPC(2, OPC_RNDNE_F, rndne.f),209OPC(2, OPC_RNDAZ_F, rndaz.f),210OPC(2, OPC_TRUNC_F, trunc.f),211OPC(2, OPC_ADD_U, add.u),212OPC(2, OPC_ADD_S, add.s),213OPC(2, OPC_SUB_U, sub.u),214OPC(2, OPC_SUB_S, sub.s),215OPC(2, OPC_CMPS_U, cmps.u),216OPC(2, OPC_CMPS_S, cmps.s),217OPC(2, OPC_MIN_U, min.u),218OPC(2, OPC_MIN_S, min.s),219OPC(2, OPC_MAX_U, max.u),220OPC(2, OPC_MAX_S, max.s),221OPC(2, OPC_ABSNEG_S, absneg.s),222OPC(2, OPC_AND_B, and.b),223OPC(2, OPC_OR_B, or.b),224OPC(2, OPC_NOT_B, not.b),225OPC(2, OPC_XOR_B, xor.b),226OPC(2, OPC_CMPV_U, cmpv.u),227OPC(2, OPC_CMPV_S, cmpv.s),228OPC(2, OPC_MUL_U24, mul.u24),229OPC(2, OPC_MUL_S24, mul.s24),230OPC(2, OPC_MULL_U, mull.u),231OPC(2, OPC_BFREV_B, bfrev.b),232OPC(2, OPC_CLZ_S, clz.s),233OPC(2, OPC_CLZ_B, clz.b),234OPC(2, OPC_SHL_B, shl.b),235OPC(2, OPC_SHR_B, shr.b),236OPC(2, OPC_ASHR_B, ashr.b),237OPC(2, OPC_BARY_F, bary.f),238OPC(2, OPC_MGEN_B, mgen.b),239OPC(2, OPC_GETBIT_B, getbit.b),240OPC(2, OPC_SETRM, setrm),241OPC(2, OPC_CBITS_B, cbits.b),242OPC(2, OPC_SHB, shb),243OPC(2, OPC_MSAD, msad),244245/* category 3: */246OPC(3, OPC_MAD_U16, mad.u16),247OPC(3, OPC_MADSH_U16, madsh.u16),248OPC(3, OPC_MAD_S16, mad.s16),249OPC(3, OPC_MADSH_M16, madsh.m16),250OPC(3, OPC_MAD_U24, mad.u24),251OPC(3, OPC_MAD_S24, mad.s24),252OPC(3, OPC_MAD_F16, mad.f16),253OPC(3, OPC_MAD_F32, mad.f32),254OPC(3, OPC_SEL_B16, sel.b16),255OPC(3, OPC_SEL_B32, sel.b32),256OPC(3, OPC_SEL_S16, sel.s16),257OPC(3, OPC_SEL_S32, sel.s32),258OPC(3, OPC_SEL_F16, sel.f16),259OPC(3, OPC_SEL_F32, sel.f32),260OPC(3, OPC_SAD_S16, sad.s16),261OPC(3, OPC_SAD_S32, sad.s32),262OPC(3, OPC_SHLG_B16, shlg.b16),263264/* category 4: */265OPC(4, OPC_RCP, rcp),266OPC(4, OPC_RSQ, rsq),267OPC(4, OPC_LOG2, log2),268OPC(4, OPC_EXP2, exp2),269OPC(4, OPC_SIN, sin),270OPC(4, OPC_COS, cos),271OPC(4, OPC_SQRT, sqrt),272OPC(4, OPC_HRSQ, hrsq),273OPC(4, OPC_HLOG2, hlog2),274OPC(4, OPC_HEXP2, hexp2),275276/* category 5: */277OPC(5, OPC_ISAM, isam),278OPC(5, OPC_ISAML, isaml),279OPC(5, OPC_ISAMM, isamm),280OPC(5, OPC_SAM, sam),281OPC(5, OPC_SAMB, samb),282OPC(5, OPC_SAML, saml),283OPC(5, OPC_SAMGQ, samgq),284OPC(5, OPC_GETLOD, getlod),285OPC(5, OPC_CONV, conv),286OPC(5, OPC_CONVM, convm),287OPC(5, OPC_GETSIZE, getsize),288OPC(5, OPC_GETBUF, getbuf),289OPC(5, OPC_GETPOS, getpos),290OPC(5, OPC_GETINFO, getinfo),291OPC(5, OPC_DSX, dsx),292OPC(5, OPC_DSY, dsy),293OPC(5, OPC_GATHER4R, gather4r),294OPC(5, OPC_GATHER4G, gather4g),295OPC(5, OPC_GATHER4B, gather4b),296OPC(5, OPC_GATHER4A, gather4a),297OPC(5, OPC_SAMGP0, samgp0),298OPC(5, OPC_SAMGP1, samgp1),299OPC(5, OPC_SAMGP2, samgp2),300OPC(5, OPC_SAMGP3, samgp3),301OPC(5, OPC_DSXPP_1, dsxpp.1),302OPC(5, OPC_DSYPP_1, dsypp.1),303OPC(5, OPC_RGETPOS, rgetpos),304OPC(5, OPC_RGETINFO, rgetinfo),305/* macros are needed here for ir3_print */306OPC(5, OPC_DSXPP_MACRO, dsxpp.macro),307OPC(5, OPC_DSYPP_MACRO, dsypp.macro),308309310/* category 6: */311OPC(6, OPC_LDG, ldg),312OPC(6, OPC_LDG_A, ldg.a),313OPC(6, OPC_LDL, ldl),314OPC(6, OPC_LDP, ldp),315OPC(6, OPC_STG, stg),316OPC(6, OPC_STG_A, stg.a),317OPC(6, OPC_STL, stl),318OPC(6, OPC_STP, stp),319OPC(6, OPC_LDIB, ldib),320OPC(6, OPC_G2L, g2l),321OPC(6, OPC_L2G, l2g),322OPC(6, OPC_PREFETCH, prefetch),323OPC(6, OPC_LDLW, ldlw),324OPC(6, OPC_STLW, stlw),325OPC(6, OPC_RESFMT, resfmt),326OPC(6, OPC_RESINFO, resinfo),327OPC(6, OPC_ATOMIC_ADD, atomic.add),328OPC(6, OPC_ATOMIC_SUB, atomic.sub),329OPC(6, OPC_ATOMIC_XCHG, atomic.xchg),330OPC(6, OPC_ATOMIC_INC, atomic.inc),331OPC(6, OPC_ATOMIC_DEC, atomic.dec),332OPC(6, OPC_ATOMIC_CMPXCHG, atomic.cmpxchg),333OPC(6, OPC_ATOMIC_MIN, atomic.min),334OPC(6, OPC_ATOMIC_MAX, atomic.max),335OPC(6, OPC_ATOMIC_AND, atomic.and),336OPC(6, OPC_ATOMIC_OR, atomic.or),337OPC(6, OPC_ATOMIC_XOR, atomic.xor),338OPC(6, OPC_LDGB, ldgb),339OPC(6, OPC_STGB, stgb),340OPC(6, OPC_STIB, stib),341OPC(6, OPC_LDC, ldc),342OPC(6, OPC_LDLV, ldlv),343OPC(6, OPC_PIPR, pipr),344OPC(6, OPC_PIPC, pipc),345OPC(6, OPC_EMIT2, emit),346OPC(6, OPC_ENDLS, endls),347OPC(6, OPC_GETSPID, getspid),348OPC(6, OPC_GETWID, getwid),349350OPC(7, OPC_BAR, bar),351OPC(7, OPC_FENCE, fence),352/* clang-format on */353#undef OPC354};355356#define GETINFO(instr) \357(&(opcs[((instr)->opc_cat << NOPC_BITS) | instr_opc(instr, ctx->gpu_id)]))358359const char *360disasm_a3xx_instr_name(opc_t opc)361{362if (opc_cat(opc) == -1)363return "??meta??";364return opcs[opc].name;365}366367static void368disasm_field_cb(void *d, const char *field_name, struct isa_decode_value *val)369{370struct disasm_ctx *ctx = d;371372if (!strcmp(field_name, "NAME")) {373if (!strcmp("nop", val->str)) {374if (ctx->has_end) {375ctx->nop_count++;376if (ctx->nop_count > 3) {377ctx->options->stop = true;378}379}380ctx->stats->nops += 1 + ctx->last.repeat;381} else {382ctx->nop_count = 0;383}384385if (!strcmp("end", val->str)) {386ctx->has_end = true;387ctx->nop_count = 0;388} else if (!strcmp("chsh", val->str)) {389ctx->options->stop = true;390} else if (!strcmp("bary.f", val->str)) {391ctx->stats->last_baryf = ctx->cur_n;392}393} else if (!strcmp(field_name, "REPEAT")) {394ctx->extra_cycles += val->num;395ctx->stats->instrs_per_cat[ctx->cur_opc_cat] += val->num;396ctx->last.repeat = val->num;397} else if (!strcmp(field_name, "NOP")) {398ctx->extra_cycles += val->num;399ctx->stats->instrs_per_cat[0] += val->num;400ctx->stats->nops += val->num;401ctx->last.nop = val->num;402} else if (!strcmp(field_name, "SY")) {403ctx->stats->sy += val->num;404} else if (!strcmp(field_name, "SS")) {405ctx->stats->ss += val->num;406ctx->last.ss = !!val->num;407} else if (!strcmp(field_name, "CONST")) {408ctx->reg.num = val->num;409ctx->reg.file = FILE_CONST;410} else if (!strcmp(field_name, "GPR")) {411/* don't count GPR regs r48.x (shared) or higher: */412if (val->num < 48) {413ctx->reg.num = val->num;414ctx->reg.file = FILE_GPR;415}416} else if (!strcmp(field_name, "SRC_R") || !strcmp(field_name, "SRC1_R") ||417!strcmp(field_name, "SRC2_R") || !strcmp(field_name, "SRC3_R")) {418ctx->reg.r = val->num;419} else if (!strcmp(field_name, "DST")) {420/* Dest register is always repeated421*422* Note that this doesn't really properly handle instructions423* that write multiple components.. the old disasm didn't handle424* that case either.425*/426ctx->reg.r = true;427} else if (strstr(field_name, "HALF")) {428ctx->reg.half = val->num;429} else if (!strcmp(field_name, "SWIZ")) {430unsigned num = (ctx->reg.num << 2) | val->num;431if (ctx->reg.r)432num += ctx->last.repeat;433434if (ctx->reg.file == FILE_CONST) {435ctx->stats->constlen = MAX2(ctx->stats->constlen, num);436} else if (ctx->reg.file == FILE_GPR) {437if (ctx->reg.half) {438ctx->stats->halfreg = MAX2(ctx->stats->halfreg, num);439} else {440ctx->stats->fullreg = MAX2(ctx->stats->fullreg, num);441}442}443444memset(&ctx->reg, 0, sizeof(ctx->reg));445}446}447448/**449* Handle stat updates dealt with at the end of instruction decoding,450* ie. before beginning of next instruction451*/452static void453disasm_handle_last(struct disasm_ctx *ctx)454{455if (ctx->last.ss) {456ctx->stats->sstall += ctx->sfu_delay;457ctx->sfu_delay = 0;458}459460if (ctx->cur_opc_cat == 4) {461ctx->sfu_delay = 10;462} else {463int n = MIN2(ctx->sfu_delay, 1 + ctx->last.repeat + ctx->last.nop);464ctx->sfu_delay -= n;465}466467memset(&ctx->last, 0, sizeof(ctx->last));468}469470static void471disasm_instr_cb(void *d, unsigned n, uint64_t instr)472{473struct disasm_ctx *ctx = d;474uint32_t *dwords = (uint32_t *)&instr;475unsigned opc_cat = instr >> 61;476477/* There are some cases where we can get instr_cb called multiple478* times per instruction (like when we need an extra line for branch479* target labels), don't update stats in these cases:480*/481if (n != ctx->cur_n) {482if (n > 0) {483disasm_handle_last(ctx);484}485ctx->stats->instrs_per_cat[opc_cat]++;486ctx->cur_n = n;487488/* mov vs cov stats are a bit harder to fish out of the field489* names, because current ir3-cat1.xml doesn't use {NAME} for490* this distinction. So for now just handle this case with491* some hand-coded parsing:492*/493if (opc_cat == 1) {494unsigned opc = (instr >> 57) & 0x3;495unsigned src_type = (instr >> 50) & 0x7;496unsigned dst_type = (instr >> 46) & 0x7;497498if (opc == 0) {499if (src_type == dst_type) {500ctx->stats->mov_count++;501} else {502ctx->stats->cov_count++;503}504}505}506}507508ctx->cur_opc_cat = opc_cat;509510if (debug & PRINT_RAW) {511fprintf(ctx->out, "%s:%d:%04d:%04d[%08xx_%08xx] ", levels[ctx->level],512opc_cat, n, ctx->extra_cycles + n, dwords[1], dwords[0]);513}514}515516int517disasm_a3xx_stat(uint32_t *dwords, int sizedwords, int level, FILE *out,518unsigned gpu_id, struct shader_stats *stats)519{520struct isa_decode_options decode_options = {521.gpu_id = gpu_id,522.show_errors = true,523.max_errors = 5,524.branch_labels = true,525.field_cb = disasm_field_cb,526.instr_cb = disasm_instr_cb,527};528struct disasm_ctx ctx = {529.out = out,530.level = level,531.options = &decode_options,532.stats = stats,533.cur_n = -1,534};535536memset(stats, 0, sizeof(*stats));537538decode_options.cbdata = &ctx;539540isa_decode(dwords, sizedwords * 4, out, &decode_options);541542disasm_handle_last(&ctx);543544if (debug & PRINT_STATS)545print_stats(&ctx);546547return 0;548}549550void551disasm_a3xx_set_debug(enum debug_t d)552{553debug = d;554}555556#include <setjmp.h>557558static bool jmp_env_valid;559static jmp_buf jmp_env;560561void562ir3_assert_handler(const char *expr, const char *file, int line,563const char *func)564{565mesa_loge("%s:%u: %s: Assertion `%s' failed.", file, line, func, expr);566if (jmp_env_valid)567longjmp(jmp_env, 1);568abort();569}570571#define TRY(x) \572do { \573assert(!jmp_env_valid); \574if (setjmp(jmp_env) == 0) { \575jmp_env_valid = true; \576x; \577} \578jmp_env_valid = false; \579} while (0)580581int582disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out,583unsigned gpu_id)584{585struct shader_stats stats;586return disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats);587}588589int590try_disasm_a3xx(uint32_t *dwords, int sizedwords, int level, FILE *out,591unsigned gpu_id)592{593struct shader_stats stats;594int ret = -1;595TRY(ret = disasm_a3xx_stat(dwords, sizedwords, level, out, gpu_id, &stats));596return ret;597}598599600