Path: blob/21.2-virgl/src/panfrost/midgard/disassemble.c
4564 views
/* Author(s):1* Connor Abbott2* Alyssa Rosenzweig3*4* Copyright (c) 2013 Connor Abbott ([email protected])5* Copyright (c) 2018 Alyssa Rosenzweig ([email protected])6* Copyright (C) 2019-2020 Collabora, Ltd.7*8* Permission is hereby granted, free of charge, to any person obtaining a copy9* of this software and associated documentation files (the "Software"), to deal10* in the Software without restriction, including without limitation the rights11* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell12* copies of the Software, and to permit persons to whom the Software is13* furnished to do so, subject to the following conditions:14*15* The above copyright notice and this permission notice shall be included in16* all copies or substantial portions of the Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR19* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,20* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE21* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER22* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,23* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN24* THE SOFTWARE.25*/2627#include <stdio.h>28#include <stdint.h>29#include <stdlib.h>30#include <assert.h>31#include <inttypes.h>32#include <ctype.h>33#include <string.h>34#include "midgard.h"35#include "midgard_ops.h"36#include "midgard_quirks.h"37#include "disassemble.h"38#include "helpers.h"39#include "util/bitscan.h"40#include "util/half_float.h"41#include "util/u_math.h"4243#define DEFINE_CASE(define, str) case define: { fprintf(fp, str); break; }4445/* These are not mapped to hardware values, they just represent the possible46* implicit arg modifiers that some midgard opcodes have, which can be decoded47* from the opcodes via midgard_{alu,ldst,tex}_special_arg_mod() */48typedef enum {49midgard_arg_mod_none = 0,50midgard_arg_mod_inv,51midgard_arg_mod_x2,52} midgard_special_arg_mod;5354static unsigned *midg_tags;55static bool is_instruction_int = false;5657/* Stats */5859static struct midgard_disasm_stats midg_stats;6061/* Transform an expanded writemask (duplicated 8-bit format) into its condensed62* form (one bit per component) */6364static inline unsigned65condense_writemask(unsigned expanded_mask,66unsigned bits_per_component)67{68if (bits_per_component == 8) {69/* Duplicate every bit to go from 8 to 16-channel wrmask */70unsigned omask = 0;7172for (unsigned i = 0; i < 8; ++i) {73if (expanded_mask & (1 << i))74omask |= (3 << (2 * i));75}7677return omask;78}7980unsigned slots_per_component = bits_per_component / 16;81unsigned max_comp = (16 * 8) / bits_per_component;82unsigned condensed_mask = 0;8384for (unsigned i = 0; i < max_comp; i++) {85if (expanded_mask & (1 << (i * slots_per_component)))86condensed_mask |= (1 << i);87}8889return condensed_mask;90}9192static void93print_alu_opcode(FILE *fp, midgard_alu_op op)94{95if (alu_opcode_props[op].name)96fprintf(fp, "%s", alu_opcode_props[op].name);97else98fprintf(fp, "alu_op_%02X", op);99100/* For constant analysis */101is_instruction_int = midgard_is_integer_op(op);102}103104static void105print_ld_st_opcode(FILE *fp, midgard_load_store_op op)106{107if (load_store_opcode_props[op].name)108fprintf(fp, "%s", load_store_opcode_props[op].name);109else110fprintf(fp, "ldst_op_%02X", op);111}112113static void114validate_sampler_type(enum mali_texture_op op, enum mali_sampler_type sampler_type)115{116if (op == midgard_tex_op_mov || op == midgard_tex_op_barrier)117assert(sampler_type == 0);118else119assert(sampler_type > 0);120}121122static void123validate_expand_mode(midgard_src_expand_mode expand_mode,124midgard_reg_mode reg_mode)125{126switch (expand_mode) {127case midgard_src_passthrough:128break;129130case midgard_src_rep_low:131assert(reg_mode == midgard_reg_mode_8 ||132reg_mode == midgard_reg_mode_16);133break;134135case midgard_src_rep_high:136assert(reg_mode == midgard_reg_mode_8 ||137reg_mode == midgard_reg_mode_16);138break;139140case midgard_src_swap:141assert(reg_mode == midgard_reg_mode_8 ||142reg_mode == midgard_reg_mode_16);143break;144145case midgard_src_expand_low:146assert(reg_mode != midgard_reg_mode_8);147break;148149case midgard_src_expand_high:150assert(reg_mode != midgard_reg_mode_8);151break;152153case midgard_src_expand_low_swap:154assert(reg_mode == midgard_reg_mode_16);155break;156157case midgard_src_expand_high_swap:158assert(reg_mode == midgard_reg_mode_16);159break;160161default:162unreachable("Invalid expand mode");163break;164}165}166167/* For static analysis to ensure all registers are written at least once before168* use along the source code path (TODO: does this break done for complex CF?)169*/170171uint16_t midg_ever_written = 0;172173static void174print_alu_reg(FILE *fp, unsigned reg, bool is_write)175{176unsigned uniform_reg = 23 - reg;177bool is_uniform = false;178179/* For r8-r15, it could be a work or uniform. We distinguish based on180* the fact work registers are ALWAYS written before use, but uniform181* registers are NEVER written before use. */182183if ((reg >= 8 && reg < 16) && !(midg_ever_written & (1 << reg)))184is_uniform = true;185186/* r16-r23 are always uniform */187188if (reg >= 16 && reg <= 23)189is_uniform = true;190191/* Update the uniform count appropriately */192193if (is_uniform)194midg_stats.uniform_count =195MAX2(uniform_reg + 1, midg_stats.uniform_count);196197if (reg == REGISTER_UNUSED || reg == REGISTER_UNUSED + 1)198fprintf(fp, "TMP%u", reg - REGISTER_UNUSED);199else if (reg == REGISTER_TEXTURE_BASE || reg == REGISTER_TEXTURE_BASE + 1)200fprintf(fp, "%s%u", is_write ? "AT" : "TA", reg - REGISTER_TEXTURE_BASE);201else if (reg == REGISTER_LDST_BASE || reg == REGISTER_LDST_BASE + 1)202fprintf(fp, "AL%u", reg - REGISTER_LDST_BASE);203else if (is_uniform)204fprintf(fp, "U%u", uniform_reg);205else if (reg == 31 && !is_write)206fprintf(fp, "PC_SP");207else208fprintf(fp, "R%u", reg);209}210211static void212print_ldst_write_reg(FILE *fp, unsigned reg)213{214switch (reg) {215case 26:216case 27:217fprintf(fp, "AL%u", reg - REGISTER_LDST_BASE);218break;219case 28:220case 29:221fprintf(fp, "AT%u", reg - REGISTER_TEXTURE_BASE);222break;223case 31:224fprintf(fp, "PC_SP");225break;226default:227fprintf(fp, "R%d", reg);228break;229}230}231232static void233print_ldst_read_reg(FILE *fp, unsigned reg)234{235switch (reg) {236case 0:237case 1:238fprintf(fp, "AL%u", reg);239break;240case 2:241fprintf(fp, "PC_SP");242break;243case 3:244fprintf(fp, "LOCAL_STORAGE_PTR");245break;246case 4:247fprintf(fp, "LOCAL_THREAD_ID");248break;249case 5:250fprintf(fp, "GROUP_ID");251break;252case 6:253fprintf(fp, "GLOBAL_THREAD_ID");254break;255case 7:256fprintf(fp, "0");257break;258default:259unreachable("Invalid load/store register read");260}261}262263static void264print_tex_reg(FILE *fp, unsigned reg, bool is_write)265{266char *str = is_write ? "TA" : "AT";267int select = reg & 1;268269switch (reg) {270case 0:271case 1:272fprintf(fp, "R%d", select);273break;274case 26:275case 27:276fprintf(fp, "AL%d", select);277break;278case 28:279case 29:280fprintf(fp, "%s%d", str, select);281break;282default:283unreachable("Invalid texture register");284}285}286287288static char *outmod_names_float[4] = {289"",290".clamp_0_inf",291".clamp_m1_1",292".clamp_0_1"293};294295static char *outmod_names_int[4] = {296".ssat",297".usat",298".keeplo",299".keephi"300};301302static char *srcmod_names_int[4] = {303".sext",304".zext",305".replicate",306".lshift",307};308309static char *argmod_names[3] = {310"",311".inv",312".x2",313};314315static char *index_format_names[4] = {316"",317".u64",318".u32",319".s32"320};321322static void323print_outmod(FILE *fp, unsigned outmod, bool is_int)324{325fprintf(fp, "%s", is_int ? outmod_names_int[outmod] :326outmod_names_float[outmod]);327}328329static void330print_alu_outmod(FILE *fp, unsigned outmod, bool is_int, bool half)331{332if (is_int && !half) {333assert(outmod == midgard_outmod_keeplo);334return;335}336337if (!is_int && half)338fprintf(fp, ".shrink");339340print_outmod(fp, outmod, is_int);341}342343/* arg == 0 (dest), arg == 1 (src1), arg == 2 (src2) */344static midgard_special_arg_mod345midgard_alu_special_arg_mod(midgard_alu_op op, unsigned arg) {346midgard_special_arg_mod mod = midgard_arg_mod_none;347348switch (op) {349case midgard_alu_op_ishladd:350case midgard_alu_op_ishlsub:351if (arg == 1) mod = midgard_arg_mod_x2;352break;353354default:355break;356}357358return mod;359}360361static void362print_quad_word(FILE *fp, uint32_t *words, unsigned tabs)363{364unsigned i;365366for (i = 0; i < 4; i++)367fprintf(fp, "0x%08X%s ", words[i], i == 3 ? "" : ",");368369fprintf(fp, "\n");370}371372static const char components[16] = "xyzwefghijklmnop";373374static int375bits_for_mode(midgard_reg_mode mode)376{377switch (mode) {378case midgard_reg_mode_8:379return 8;380case midgard_reg_mode_16:381return 16;382case midgard_reg_mode_32:383return 32;384case midgard_reg_mode_64:385return 64;386default:387unreachable("Invalid reg mode");388return 0;389}390}391392static int393bits_for_mode_halved(midgard_reg_mode mode, bool half)394{395unsigned bits = bits_for_mode(mode);396397if (half)398bits >>= 1;399400return bits;401}402403static void404print_vec_selectors_64(FILE *fp, unsigned swizzle,405midgard_reg_mode reg_mode,406midgard_src_expand_mode expand_mode,407unsigned selector_offset, uint8_t mask)408{409bool expands = INPUT_EXPANDS(expand_mode);410411unsigned comp_skip = expands ? 1 : 2;412unsigned mask_bit = 0;413for (unsigned i = selector_offset; i < 4; i += comp_skip, mask_bit += 4) {414if (!(mask & (1 << mask_bit))) continue;415416unsigned a = (swizzle >> (i * 2)) & 3;417418if (INPUT_EXPANDS(expand_mode)) {419fprintf(fp, "%c", components[a]);420continue;421}422423unsigned b = (swizzle >> ((i+1) * 2)) & 3;424425/* Normally we're adjacent, but if there's an issue,426* don't make it ambiguous */427428if (b == a + 1)429fprintf(fp, "%c", a >> 1 ? 'Y' : 'X');430else431fprintf(fp, "[%c%c]", components[a], components[b]);432}433}434435static void436print_vec_selectors(FILE *fp, unsigned swizzle,437midgard_reg_mode reg_mode,438unsigned selector_offset, uint8_t mask,439unsigned *mask_offset)440{441assert(reg_mode != midgard_reg_mode_64);442443unsigned mask_skip = MAX2(bits_for_mode(reg_mode) / 16, 1);444445bool is_vec16 = reg_mode == midgard_reg_mode_8;446447for (unsigned i = 0; i < 4; i++, *mask_offset += mask_skip) {448if (!(mask & (1 << *mask_offset))) continue;449450unsigned c = (swizzle >> (i * 2)) & 3;451452/* Vec16 has two components per swizzle selector. */453if (is_vec16)454c *= 2;455456c += selector_offset;457458fprintf(fp, "%c", components[c]);459if (is_vec16)460fprintf(fp, "%c", components[c+1]);461}462}463464static void465print_vec_swizzle(FILE *fp, unsigned swizzle,466midgard_src_expand_mode expand,467midgard_reg_mode mode,468uint8_t mask)469{470unsigned bits = bits_for_mode_halved(mode, INPUT_EXPANDS(expand));471472/* Swizzle selectors are divided in two halves that are always473* mirrored, the only difference is the starting component offset.474* The number represents an offset into the components[] array. */475unsigned first_half = 0;476unsigned second_half = (128 / bits) / 2; /* only used for 8 and 16-bit */477478switch (expand) {479case midgard_src_passthrough:480if (swizzle == 0xE4) return; /* identity swizzle */481break;482483case midgard_src_expand_low:484second_half /= 2;485break;486487case midgard_src_expand_high:488first_half = second_half;489second_half += second_half / 2;490break;491492/* The rest of the cases are only used for 8 and 16-bit */493494case midgard_src_rep_low:495second_half = 0;496break;497498case midgard_src_rep_high:499first_half = second_half;500break;501502case midgard_src_swap:503first_half = second_half;504second_half = 0;505break;506507case midgard_src_expand_low_swap:508first_half = second_half / 2;509second_half = 0;510break;511512case midgard_src_expand_high_swap:513first_half = second_half + second_half / 2;514break;515516default:517unreachable("Invalid expand mode");518break;519}520521fprintf(fp, ".");522523/* Vec2 are weird so we use a separate function to simplify things. */524if (mode == midgard_reg_mode_64) {525print_vec_selectors_64(fp, swizzle, mode, expand, first_half, mask);526return;527}528529unsigned mask_offs = 0;530print_vec_selectors(fp, swizzle, mode, first_half, mask, &mask_offs);531if (mode == midgard_reg_mode_8 || mode == midgard_reg_mode_16)532print_vec_selectors(fp, swizzle, mode, second_half, mask, &mask_offs);533}534535static void536print_scalar_constant(FILE *fp, unsigned src_binary,537const midgard_constants *consts,538midgard_scalar_alu *alu)539{540midgard_scalar_alu_src *src = (midgard_scalar_alu_src *)&src_binary;541assert(consts != NULL);542543fprintf(fp, "#");544mir_print_constant_component(fp, consts, src->component,545src->full ?546midgard_reg_mode_32 : midgard_reg_mode_16,547false, src->mod, alu->op);548}549550static void551print_vector_constants(FILE *fp, unsigned src_binary,552const midgard_constants *consts,553midgard_vector_alu *alu)554{555midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary;556bool expands = INPUT_EXPANDS(src->expand_mode);557unsigned bits = bits_for_mode_halved(alu->reg_mode, expands);558unsigned max_comp = (sizeof(*consts) * 8) / bits;559unsigned comp_mask, num_comp = 0;560561assert(consts);562assert(max_comp <= 16);563564comp_mask = effective_writemask(alu->op, condense_writemask(alu->mask, bits));565num_comp = util_bitcount(comp_mask);566567fprintf(fp, "<");568bool first = true;569570for (unsigned i = 0; i < max_comp; ++i) {571if (!(comp_mask & (1 << i))) continue;572573unsigned c = (src->swizzle >> (i * 2)) & 3;574575if (bits == 16 && !expands) {576bool upper = i >= 4;577578switch (src->expand_mode) {579case midgard_src_passthrough:580c += upper * 4;581break;582case midgard_src_rep_low:583break;584case midgard_src_rep_high:585c += 4;586break;587case midgard_src_swap:588c += !upper * 4;589break;590default:591unreachable("invalid expand mode");592break;593}594} else if (bits == 32 && !expands) {595/* Implicitly ok */596} else if (bits == 64 && !expands) {597/* Implicitly ok */598} else if (bits == 8 && !expands) {599bool upper = i >= 8;600601unsigned index = (i >> 1) & 3;602unsigned base = (src->swizzle >> (index * 2)) & 3;603c = base * 2;604605switch (src->expand_mode) {606case midgard_src_passthrough:607c += upper * 8;608break;609case midgard_src_rep_low:610break;611case midgard_src_rep_high:612c += 8;613break;614case midgard_src_swap:615c += !upper * 8;616break;617default:618unreachable("invalid expand mode");619break;620}621622/* We work on twos, actually */623if (i & 1)624c++;625} else {626printf(" (%u)", src->expand_mode);627}628629if (first)630first = false;631else632fprintf(fp, ", ");633634mir_print_constant_component(fp, consts, c, alu->reg_mode,635expands, src->mod, alu->op);636}637638if (num_comp > 1)639fprintf(fp, ">");640}641642static void643print_srcmod(FILE *fp, bool is_int, bool expands, unsigned mod, bool scalar)644{645/* Modifiers change meaning depending on the op's context */646647if (is_int) {648if (expands)649fprintf(fp, "%s", srcmod_names_int[mod]);650} else {651if (mod & MIDGARD_FLOAT_MOD_ABS)652fprintf(fp, ".abs");653if (mod & MIDGARD_FLOAT_MOD_NEG)654fprintf(fp, ".neg");655if (expands)656fprintf(fp, ".widen");657}658}659660static void661print_vector_src(FILE *fp, unsigned src_binary,662midgard_reg_mode mode, unsigned reg,663midgard_shrink_mode shrink_mode,664uint8_t src_mask, bool is_int,665midgard_special_arg_mod arg_mod)666{667midgard_vector_alu_src *src = (midgard_vector_alu_src *)&src_binary;668669validate_expand_mode(src->expand_mode, mode);670671print_alu_reg(fp, reg, false);672673print_vec_swizzle(fp, src->swizzle, src->expand_mode, mode, src_mask);674675fprintf(fp, "%s", argmod_names[arg_mod]);676677print_srcmod(fp, is_int, INPUT_EXPANDS(src->expand_mode), src->mod, false);678}679680static uint16_t681decode_vector_imm(unsigned src2_reg, unsigned imm)682{683uint16_t ret;684ret = src2_reg << 11;685ret |= (imm & 0x7) << 8;686ret |= (imm >> 3) & 0xFF;687return ret;688}689690static void691print_immediate(FILE *fp, uint16_t imm)692{693if (is_instruction_int)694fprintf(fp, "#%u", imm);695else696fprintf(fp, "#%g", _mesa_half_to_float(imm));697}698699static void700update_dest(unsigned reg)701{702/* We should record writes as marking this as a work register. Store703* the max register in work_count; we'll add one at the end */704705if (reg < 16) {706midg_stats.work_count = MAX2(reg, midg_stats.work_count);707midg_ever_written |= (1 << reg);708}709}710711static void712print_dest(FILE *fp, unsigned reg)713{714update_dest(reg);715print_alu_reg(fp, reg, true);716}717718/* For 16-bit+ masks, we read off from the 8-bit mask field. For 16-bit (vec8),719* it's just one bit per channel, easy peasy. For 32-bit (vec4), it's one bit720* per channel with one duplicate bit in the middle. For 64-bit (vec2), it's721* one-bit per channel with _3_ duplicate bits in the middle. Basically, just722* subdividing the 128-bit word in 16-bit increments. For 64-bit, we uppercase723* the mask to make it obvious what happened */724725static void726print_alu_mask(FILE *fp, uint8_t mask, unsigned bits, midgard_shrink_mode shrink_mode)727{728/* Skip 'complete' masks */729730if (shrink_mode == midgard_shrink_mode_none && mask == 0xFF)731return;732733fprintf(fp, ".");734735unsigned skip = MAX2(bits / 16, 1);736bool uppercase = bits > 32;737bool tripped = false;738739/* To apply an upper destination shrink_mode, we "shift" the alphabet.740* E.g. with an upper shrink_mode on 32-bit, instead of xyzw, print efgh.741* For upper 16-bit, instead of xyzwefgh, print ijklmnop */742743const char *alphabet = components;744745if (shrink_mode == midgard_shrink_mode_upper) {746assert(bits != 8);747alphabet += (128 / bits);748}749750for (unsigned i = 0; i < 8; i += skip) {751bool a = (mask & (1 << i)) != 0;752753for (unsigned j = 1; j < skip; ++j) {754bool dupe = (mask & (1 << (i + j))) != 0;755tripped |= (dupe != a);756}757758if (a) {759/* TODO: handle shrinking from 16-bit */760unsigned comp_idx = bits == 8 ? i * 2 : i;761char c = alphabet[comp_idx / skip];762763if (uppercase) {764c = toupper(c);765assert(c == 'X' || c == 'Y');766}767768fprintf(fp, "%c", c);769if (bits == 8)770fprintf(fp, "%c", alphabet[comp_idx+1]);771}772}773774if (tripped)775fprintf(fp, " /* %X */", mask);776}777778/* TODO: 16-bit mode */779static void780print_ldst_mask(FILE *fp, unsigned mask, unsigned swizzle) {781fprintf(fp, ".");782783for (unsigned i = 0; i < 4; ++i) {784bool write = (mask & (1 << i)) != 0;785unsigned c = (swizzle >> (i * 2)) & 3;786/* We can't omit the swizzle here since many ldst ops have a787* combined swizzle/writemask, and it would be ambiguous to not788* print the masked-out components. */789fprintf(fp, "%c", write ? components[c] : '~');790}791}792793static void794print_tex_mask(FILE *fp, unsigned mask, bool upper)795{796if (mask == 0xF) {797if (upper)798fprintf(fp, "'");799800return;801}802803fprintf(fp, ".");804805for (unsigned i = 0; i < 4; ++i) {806bool a = (mask & (1 << i)) != 0;807if (a)808fprintf(fp, "%c", components[i + (upper ? 4 : 0)]);809}810}811812static void813print_vector_field(FILE *fp, const char *name, uint16_t *words, uint16_t reg_word,814const midgard_constants *consts, unsigned tabs, bool verbose)815{816midgard_reg_info *reg_info = (midgard_reg_info *)®_word;817midgard_vector_alu *alu_field = (midgard_vector_alu *) words;818midgard_reg_mode mode = alu_field->reg_mode;819midgard_alu_op op = alu_field->op;820unsigned shrink_mode = alu_field->shrink_mode;821bool is_int = midgard_is_integer_op(op);822bool is_int_out = midgard_is_integer_out_op(op);823824if (verbose)825fprintf(fp, "%s.", name);826827print_alu_opcode(fp, alu_field->op);828829/* Print lane width */830fprintf(fp, ".%c%d", is_int_out ? 'i' : 'f', bits_for_mode(mode));831832fprintf(fp, " ");833834/* Mask denoting status of 8-lanes */835uint8_t mask = alu_field->mask;836837/* First, print the destination */838print_dest(fp, reg_info->out_reg);839840if (shrink_mode != midgard_shrink_mode_none) {841bool shrinkable = (mode != midgard_reg_mode_8);842bool known = shrink_mode != 0x3; /* Unused value */843844if (!(shrinkable && known))845fprintf(fp, "/* do%u */ ", shrink_mode);846}847848/* Instructions like fdot4 do *not* replicate, ensure the849* mask is of only a single component */850851unsigned rep = GET_CHANNEL_COUNT(alu_opcode_props[op].props);852853if (rep) {854unsigned comp_mask = condense_writemask(mask, bits_for_mode(mode));855unsigned num_comp = util_bitcount(comp_mask);856if (num_comp != 1)857fprintf(fp, "/* err too many components */");858}859print_alu_mask(fp, mask, bits_for_mode(mode), shrink_mode);860861/* Print output modifiers */862863print_alu_outmod(fp, alu_field->outmod, is_int_out, shrink_mode != midgard_shrink_mode_none);864865/* Mask out unused components based on the writemask, but don't mask out866* components that are used for interlane instructions like fdot3. */867uint8_t src_mask =868rep ? expand_writemask(mask_of(rep), log2(128 / bits_for_mode(mode))) : mask;869870fprintf(fp, ", ");871872if (reg_info->src1_reg == REGISTER_CONSTANT)873print_vector_constants(fp, alu_field->src1, consts, alu_field);874else {875midgard_special_arg_mod argmod = midgard_alu_special_arg_mod(op, 1);876print_vector_src(fp, alu_field->src1, mode, reg_info->src1_reg,877shrink_mode, src_mask, is_int, argmod);878}879880fprintf(fp, ", ");881882if (reg_info->src2_imm) {883uint16_t imm = decode_vector_imm(reg_info->src2_reg, alu_field->src2 >> 2);884print_immediate(fp, imm);885} else if (reg_info->src2_reg == REGISTER_CONSTANT) {886print_vector_constants(fp, alu_field->src2, consts, alu_field);887} else {888midgard_special_arg_mod argmod = midgard_alu_special_arg_mod(op, 2);889print_vector_src(fp, alu_field->src2, mode, reg_info->src2_reg,890shrink_mode, src_mask, is_int, argmod);891}892893midg_stats.instruction_count++;894fprintf(fp, "\n");895}896897static void898print_scalar_src(FILE *fp, bool is_int, unsigned src_binary, unsigned reg)899{900midgard_scalar_alu_src *src = (midgard_scalar_alu_src *)&src_binary;901902print_alu_reg(fp, reg, false);903904unsigned c = src->component;905906if (src->full) {907assert((c & 1) == 0);908c >>= 1;909}910911fprintf(fp, ".%c", components[c]);912913print_srcmod(fp, is_int, !src->full, src->mod, true);914}915916static uint16_t917decode_scalar_imm(unsigned src2_reg, unsigned imm)918{919uint16_t ret;920ret = src2_reg << 11;921ret |= (imm & 3) << 9;922ret |= (imm & 4) << 6;923ret |= (imm & 0x38) << 2;924ret |= imm >> 6;925return ret;926}927928static void929print_scalar_field(FILE *fp, const char *name, uint16_t *words, uint16_t reg_word,930const midgard_constants *consts, unsigned tabs, bool verbose)931{932midgard_reg_info *reg_info = (midgard_reg_info *)®_word;933midgard_scalar_alu *alu_field = (midgard_scalar_alu *) words;934bool is_int = midgard_is_integer_op(alu_field->op);935bool is_int_out = midgard_is_integer_out_op(alu_field->op);936bool full = alu_field->output_full;937938if (alu_field->unknown)939fprintf(fp, "scalar ALU unknown bit set\n");940941if (verbose)942fprintf(fp, "%s.", name);943944print_alu_opcode(fp, alu_field->op);945946/* Print lane width, in this case the lane width is always 32-bit, but947* we print it anyway to make it consistent with the other instructions. */948fprintf(fp, ".%c32", is_int_out ? 'i' : 'f');949950fprintf(fp, " ");951952print_dest(fp, reg_info->out_reg);953unsigned c = alu_field->output_component;954955if (full) {956assert((c & 1) == 0);957c >>= 1;958}959960fprintf(fp, ".%c", components[c]);961962print_alu_outmod(fp, alu_field->outmod, is_int_out, !full);963964fprintf(fp, ", ");965966if (reg_info->src1_reg == REGISTER_CONSTANT)967print_scalar_constant(fp, alu_field->src1, consts, alu_field);968else969print_scalar_src(fp, is_int, alu_field->src1, reg_info->src1_reg);970971fprintf(fp, ", ");972973if (reg_info->src2_imm) {974uint16_t imm = decode_scalar_imm(reg_info->src2_reg,975alu_field->src2);976print_immediate(fp, imm);977} else if (reg_info->src2_reg == REGISTER_CONSTANT) {978print_scalar_constant(fp, alu_field->src2, consts, alu_field);979} else980print_scalar_src(fp, is_int, alu_field->src2, reg_info->src2_reg);981982midg_stats.instruction_count++;983fprintf(fp, "\n");984}985986static void987print_branch_op(FILE *fp, unsigned op)988{989switch (op) {990case midgard_jmp_writeout_op_branch_uncond:991fprintf(fp, "uncond.");992break;993994case midgard_jmp_writeout_op_branch_cond:995fprintf(fp, "cond.");996break;997998case midgard_jmp_writeout_op_writeout:999fprintf(fp, "write.");1000break;10011002case midgard_jmp_writeout_op_tilebuffer_pending:1003fprintf(fp, "tilebuffer.");1004break;10051006case midgard_jmp_writeout_op_discard:1007fprintf(fp, "discard.");1008break;10091010default:1011fprintf(fp, "unk%u.", op);1012break;1013}1014}10151016static void1017print_branch_cond(FILE *fp, int cond)1018{1019switch (cond) {1020case midgard_condition_write0:1021fprintf(fp, "write0");1022break;10231024case midgard_condition_false:1025fprintf(fp, "false");1026break;10271028case midgard_condition_true:1029fprintf(fp, "true");1030break;10311032case midgard_condition_always:1033fprintf(fp, "always");1034break;10351036default:1037fprintf(fp, "unk%X", cond);1038break;1039}1040}10411042static bool1043print_compact_branch_writeout_field(FILE *fp, uint16_t word)1044{1045midgard_jmp_writeout_op op = word & 0x7;1046midg_stats.instruction_count++;10471048switch (op) {1049case midgard_jmp_writeout_op_branch_uncond: {1050midgard_branch_uncond br_uncond;1051memcpy((char *) &br_uncond, (char *) &word, sizeof(br_uncond));1052fprintf(fp, "br.uncond ");10531054if (br_uncond.unknown != 1)1055fprintf(fp, "unknown:%u, ", br_uncond.unknown);10561057if (br_uncond.offset >= 0)1058fprintf(fp, "+");10591060fprintf(fp, "%d -> %s", br_uncond.offset,1061midgard_tag_props[br_uncond.dest_tag].name);1062fprintf(fp, "\n");10631064return br_uncond.offset >= 0;1065}10661067case midgard_jmp_writeout_op_branch_cond:1068case midgard_jmp_writeout_op_writeout:1069case midgard_jmp_writeout_op_discard:1070default: {1071midgard_branch_cond br_cond;1072memcpy((char *) &br_cond, (char *) &word, sizeof(br_cond));10731074fprintf(fp, "br.");10751076print_branch_op(fp, br_cond.op);1077print_branch_cond(fp, br_cond.cond);10781079fprintf(fp, " ");10801081if (br_cond.offset >= 0)1082fprintf(fp, "+");10831084fprintf(fp, "%d -> %s", br_cond.offset,1085midgard_tag_props[br_cond.dest_tag].name);1086fprintf(fp, "\n");10871088return br_cond.offset >= 0;1089}1090}10911092return false;1093}10941095static bool1096print_extended_branch_writeout_field(FILE *fp, uint8_t *words, unsigned next)1097{1098midgard_branch_extended br;1099memcpy((char *) &br, (char *) words, sizeof(br));11001101fprintf(fp, "brx.");11021103print_branch_op(fp, br.op);11041105/* Condition codes are a LUT in the general case, but simply repeated 8 times for single-channel conditions.. Check this. */11061107bool single_channel = true;11081109for (unsigned i = 0; i < 16; i += 2) {1110single_channel &= (((br.cond >> i) & 0x3) == (br.cond & 0x3));1111}11121113if (single_channel)1114print_branch_cond(fp, br.cond & 0x3);1115else1116fprintf(fp, "lut%X", br.cond);11171118if (br.unknown)1119fprintf(fp, ".unknown%u", br.unknown);11201121fprintf(fp, " ");11221123if (br.offset >= 0)1124fprintf(fp, "+");11251126fprintf(fp, "%d -> %s\n", br.offset,1127midgard_tag_props[br.dest_tag].name);11281129unsigned I = next + br.offset * 4;11301131if (midg_tags[I] && midg_tags[I] != br.dest_tag) {1132fprintf(fp, "\t/* XXX TAG ERROR: jumping to %s but tagged %s \n",1133midgard_tag_props[br.dest_tag].name,1134midgard_tag_props[midg_tags[I]].name);1135}11361137midg_tags[I] = br.dest_tag;11381139midg_stats.instruction_count++;1140return br.offset >= 0;1141}11421143static unsigned1144num_alu_fields_enabled(uint32_t control_word)1145{1146unsigned ret = 0;11471148if ((control_word >> 17) & 1)1149ret++;11501151if ((control_word >> 19) & 1)1152ret++;11531154if ((control_word >> 21) & 1)1155ret++;11561157if ((control_word >> 23) & 1)1158ret++;11591160if ((control_word >> 25) & 1)1161ret++;11621163return ret;1164}11651166static bool1167print_alu_word(FILE *fp, uint32_t *words, unsigned num_quad_words,1168unsigned tabs, unsigned next, bool verbose)1169{1170uint32_t control_word = words[0];1171uint16_t *beginning_ptr = (uint16_t *)(words + 1);1172unsigned num_fields = num_alu_fields_enabled(control_word);1173uint16_t *word_ptr = beginning_ptr + num_fields;1174unsigned num_words = 2 + num_fields;1175const midgard_constants *consts = NULL;1176bool branch_forward = false;11771178if ((control_word >> 17) & 1)1179num_words += 3;11801181if ((control_word >> 19) & 1)1182num_words += 2;11831184if ((control_word >> 21) & 1)1185num_words += 3;11861187if ((control_word >> 23) & 1)1188num_words += 2;11891190if ((control_word >> 25) & 1)1191num_words += 3;11921193if ((control_word >> 26) & 1)1194num_words += 1;11951196if ((control_word >> 27) & 1)1197num_words += 3;11981199if (num_quad_words > (num_words + 7) / 8) {1200assert(num_quad_words == (num_words + 15) / 8);1201//Assume that the extra quadword is constants1202consts = (midgard_constants *)(words + (4 * num_quad_words - 4));1203}12041205if ((control_word >> 16) & 1)1206fprintf(fp, "unknown bit 16 enabled\n");12071208if ((control_word >> 17) & 1) {1209print_vector_field(fp, "vmul", word_ptr, *beginning_ptr, consts, tabs, verbose);1210beginning_ptr += 1;1211word_ptr += 3;1212}12131214if ((control_word >> 18) & 1)1215fprintf(fp, "unknown bit 18 enabled\n");12161217if ((control_word >> 19) & 1) {1218print_scalar_field(fp, "sadd", word_ptr, *beginning_ptr, consts, tabs, verbose);1219beginning_ptr += 1;1220word_ptr += 2;1221}12221223if ((control_word >> 20) & 1)1224fprintf(fp, "unknown bit 20 enabled\n");12251226if ((control_word >> 21) & 1) {1227print_vector_field(fp, "vadd", word_ptr, *beginning_ptr, consts, tabs, verbose);1228beginning_ptr += 1;1229word_ptr += 3;1230}12311232if ((control_word >> 22) & 1)1233fprintf(fp, "unknown bit 22 enabled\n");12341235if ((control_word >> 23) & 1) {1236print_scalar_field(fp, "smul", word_ptr, *beginning_ptr, consts, tabs, verbose);1237beginning_ptr += 1;1238word_ptr += 2;1239}12401241if ((control_word >> 24) & 1)1242fprintf(fp, "unknown bit 24 enabled\n");12431244if ((control_word >> 25) & 1) {1245print_vector_field(fp, "lut", word_ptr, *beginning_ptr, consts, tabs, verbose);1246word_ptr += 3;1247}12481249if ((control_word >> 26) & 1) {1250branch_forward |= print_compact_branch_writeout_field(fp, *word_ptr);1251word_ptr += 1;1252}12531254if ((control_word >> 27) & 1) {1255branch_forward |= print_extended_branch_writeout_field(fp, (uint8_t *) word_ptr, next);1256word_ptr += 3;1257}12581259if (consts)1260fprintf(fp, "uconstants 0x%X, 0x%X, 0x%X, 0x%X\n",1261consts->u32[0], consts->u32[1],1262consts->u32[2], consts->u32[3]);12631264return branch_forward;1265}12661267/* TODO: how can we use this now that we know that these params can't be known1268* before run time in every single case? Maybe just use it in the cases we can? */1269UNUSED static void1270print_varying_parameters(FILE *fp, midgard_load_store_word *word)1271{1272midgard_varying_params p = midgard_unpack_varying_params(*word);12731274/* If a varying, there are qualifiers */1275if (p.flat_shading)1276fprintf(fp, ".flat");12771278if (p.perspective_correction)1279fprintf(fp, ".correction");12801281if (p.centroid_mapping)1282fprintf(fp, ".centroid");12831284if (p.interpolate_sample)1285fprintf(fp, ".sample");12861287switch (p.modifier) {1288case midgard_varying_mod_perspective_y:1289fprintf(fp, ".perspectivey");1290break;1291case midgard_varying_mod_perspective_z:1292fprintf(fp, ".perspectivez");1293break;1294case midgard_varying_mod_perspective_w:1295fprintf(fp, ".perspectivew");1296break;1297default:1298unreachable("invalid varying modifier");1299break;1300}1301}13021303static bool1304is_op_varying(unsigned op)1305{1306switch (op) {1307case midgard_op_st_vary_16:1308case midgard_op_st_vary_32:1309case midgard_op_st_vary_32i:1310case midgard_op_st_vary_32u:1311case midgard_op_ld_vary_16:1312case midgard_op_ld_vary_32:1313case midgard_op_ld_vary_32i:1314case midgard_op_ld_vary_32u:1315return true;1316}13171318return false;1319}13201321static bool1322is_op_attribute(unsigned op)1323{1324switch (op) {1325case midgard_op_ld_attr_16:1326case midgard_op_ld_attr_32:1327case midgard_op_ld_attr_32i:1328case midgard_op_ld_attr_32u:1329return true;1330}13311332return false;1333}13341335/* Helper to print integer well-formatted, but only when non-zero. */1336static void1337midgard_print_sint(FILE *fp, int n)1338{1339if (n > 0)1340fprintf(fp, " + 0x%X", n);1341else if (n < 0)1342fprintf(fp, " - 0x%X", -n);1343}13441345static void1346update_stats(signed *stat, unsigned address)1347{1348if (*stat >= 0)1349*stat = MAX2(*stat, address + 1);1350}13511352static void1353print_load_store_instr(FILE *fp, uint64_t data, bool verbose)1354{1355midgard_load_store_word *word = (midgard_load_store_word *) &data;13561357print_ld_st_opcode(fp, word->op);13581359if (word->op == midgard_op_trap) {1360fprintf(fp, " 0x%X\n", word->signed_offset);1361return;1362}13631364/* Print opcode modifiers */13651366if (OP_USES_ATTRIB(word->op)) {1367/* Print non-default attribute tables */1368bool default_secondary =1369(word->op == midgard_op_st_vary_32) ||1370(word->op == midgard_op_st_vary_16) ||1371(word->op == midgard_op_st_vary_32u) ||1372(word->op == midgard_op_st_vary_32i) ||1373(word->op == midgard_op_ld_vary_32) ||1374(word->op == midgard_op_ld_vary_16) ||1375(word->op == midgard_op_ld_vary_32u) ||1376(word->op == midgard_op_ld_vary_32i);13771378bool default_primary =1379(word->op == midgard_op_ld_attr_32) ||1380(word->op == midgard_op_ld_attr_16) ||1381(word->op == midgard_op_ld_attr_32u) ||1382(word->op == midgard_op_ld_attr_32i);13831384bool has_default = (default_secondary || default_primary);1385bool is_secondary = (word->index_format >> 1);13861387if (has_default && (is_secondary != default_secondary))1388fprintf(fp, ".%s", is_secondary ? "secondary" : "primary");1389} else if (word->op == midgard_op_ld_cubemap_coords || OP_IS_PROJECTION(word->op))1390fprintf(fp, ".%s", word->bitsize_toggle ? "f32" : "f16");13911392fprintf(fp, " ");13931394/* src/dest register */13951396if (!OP_IS_STORE(word->op)) {1397print_ldst_write_reg(fp, word->reg);13981399/* Some opcodes don't have a swizzable src register, and1400* instead the swizzle is applied before the result is written1401* to the dest reg. For these ops, we combine the writemask1402* with the swizzle to display them in the disasm compactly. */1403unsigned swizzle = word->swizzle;1404if ((OP_IS_REG2REG_LDST(word->op) &&1405word->op != midgard_op_lea &&1406word->op != midgard_op_lea_image) || OP_IS_ATOMIC(word->op))1407swizzle = 0xE4;1408print_ldst_mask(fp, word->mask, swizzle);1409} else {1410print_ldst_read_reg(fp, word->reg);1411print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,1412midgard_reg_mode_32, 0xFF);1413}14141415/* ld_ubo args */1416if (OP_IS_UBO_READ(word->op)) {1417if (word->signed_offset & 1) { /* buffer index imm */1418unsigned imm = midgard_unpack_ubo_index_imm(*word);1419fprintf(fp, ", %u", imm);1420} else { /* buffer index from reg */1421fprintf(fp, ", ");1422print_ldst_read_reg(fp, word->arg_reg);1423fprintf(fp, ".%c", components[word->arg_comp]);1424}14251426fprintf(fp, ", ");1427print_ldst_read_reg(fp, word->index_reg);1428fprintf(fp, ".%c", components[word->index_comp]);1429if (word->index_shift)1430fprintf(fp, " lsl %u", word->index_shift);1431midgard_print_sint(fp, UNPACK_LDST_UBO_OFS(word->signed_offset));1432}14331434/* mem addr expression */1435if (OP_HAS_ADDRESS(word->op)) {1436fprintf(fp, ", ");1437bool first = true;14381439/* Skip printing zero */1440if (word->arg_reg != 7 || verbose) {1441print_ldst_read_reg(fp, word->arg_reg);1442fprintf(fp, ".u%d.%c",1443word->bitsize_toggle ? 64 : 32, components[word->arg_comp]);1444first = false;1445}14461447if ((word->op < midgard_op_atomic_cmpxchg ||1448word->op > midgard_op_atomic_cmpxchg64_be) &&1449word->index_reg != 0x7) {1450if (!first)1451fprintf(fp, " + ");14521453print_ldst_read_reg(fp, word->index_reg);1454fprintf(fp, "%s.%c",1455index_format_names[word->index_format],1456components[word->index_comp]);1457if (word->index_shift)1458fprintf(fp, " lsl %u", word->index_shift);1459}14601461midgard_print_sint(fp, word->signed_offset);1462}14631464/* src reg for reg2reg ldst opcodes */1465if (OP_IS_REG2REG_LDST(word->op)) {1466fprintf(fp, ", ");1467print_ldst_read_reg(fp, word->arg_reg);1468print_vec_swizzle(fp, word->swizzle, midgard_src_passthrough,1469midgard_reg_mode_32, 0xFF);1470}14711472/* atomic ops encode the source arg where the ldst swizzle would be. */1473if (OP_IS_ATOMIC(word->op)) {1474unsigned src = (word->swizzle >> 2) & 0x7;1475unsigned src_comp = word->swizzle & 0x3;1476fprintf(fp, ", ");1477print_ldst_read_reg(fp, src);1478fprintf(fp, ".%c", components[src_comp]);1479}14801481/* CMPXCHG encodes the extra comparison arg where the index reg would be. */1482if (word->op >= midgard_op_atomic_cmpxchg &&1483word->op <= midgard_op_atomic_cmpxchg64_be) {1484fprintf(fp, ", ");1485print_ldst_read_reg(fp, word->index_reg);1486fprintf(fp, ".%c", components[word->index_comp]);1487}14881489/* index reg for attr/vary/images, selector for ld/st_special */1490if (OP_IS_SPECIAL(word->op) || OP_USES_ATTRIB(word->op)) {1491fprintf(fp, ", ");1492print_ldst_read_reg(fp, word->index_reg);1493fprintf(fp, ".%c", components[word->index_comp]);1494if (word->index_shift)1495fprintf(fp, " lsl %u", word->index_shift);1496midgard_print_sint(fp, UNPACK_LDST_ATTRIB_OFS(word->signed_offset));1497}14981499/* vertex reg for attrib/varying ops, coord reg for image ops */1500if (OP_USES_ATTRIB(word->op)) {1501fprintf(fp, ", ");1502print_ldst_read_reg(fp, word->arg_reg);15031504if (OP_IS_IMAGE(word->op))1505fprintf(fp, ".u%d", word->bitsize_toggle ? 64 : 32);15061507fprintf(fp, ".%c", components[word->arg_comp]);15081509if (word->bitsize_toggle && !OP_IS_IMAGE(word->op))1510midgard_print_sint(fp, UNPACK_LDST_VERTEX_OFS(word->signed_offset));1511}15121513/* TODO: properly decode format specifier for PACK/UNPACK ops */1514if (OP_IS_PACK_COLOUR(word->op) || OP_IS_UNPACK_COLOUR(word->op)) {1515fprintf(fp, ", ");1516unsigned format_specifier = (word->signed_offset << 4) | word->index_shift;1517fprintf(fp, "0x%X", format_specifier);1518}15191520fprintf(fp, "\n");15211522/* Debugging stuff */15231524if (is_op_varying(word->op)) {1525/* Do some analysis: check if direct access */15261527if (word->index_reg == 0x7 && midg_stats.varying_count >= 0)1528update_stats(&midg_stats.varying_count,1529UNPACK_LDST_ATTRIB_OFS(word->signed_offset));1530else1531midg_stats.varying_count = -16;1532} else if (is_op_attribute(word->op)) {1533if (word->index_reg == 0x7 && midg_stats.attribute_count >= 0)1534update_stats(&midg_stats.attribute_count,1535UNPACK_LDST_ATTRIB_OFS(word->signed_offset));1536else1537midg_stats.attribute_count = -16;1538}15391540if (!OP_IS_STORE(word->op))1541update_dest(word->reg);15421543if (OP_IS_UBO_READ(word->op))1544update_stats(&midg_stats.uniform_buffer_count,1545UNPACK_LDST_UBO_OFS(word->signed_offset));15461547midg_stats.instruction_count++;1548}15491550static void1551print_load_store_word(FILE *fp, uint32_t *word, bool verbose)1552{1553midgard_load_store *load_store = (midgard_load_store *) word;15541555if (load_store->word1 != 3) {1556print_load_store_instr(fp, load_store->word1, verbose);1557}15581559if (load_store->word2 != 3) {1560print_load_store_instr(fp, load_store->word2, verbose);1561}1562}15631564static void1565print_texture_reg_select(FILE *fp, uint8_t u, unsigned base)1566{1567midgard_tex_register_select sel;1568memcpy(&sel, &u, sizeof(u));15691570print_tex_reg(fp, base + sel.select, false);15711572unsigned component = sel.component;15731574/* Use the upper half in half-reg mode */1575if (sel.upper) {1576assert(!sel.full);1577component += 4;1578}15791580fprintf(fp, ".%c.%d", components[component], sel.full ? 32 : 16);15811582assert(sel.zero == 0);1583}15841585static void1586print_texture_format(FILE *fp, int format)1587{1588/* Act like a modifier */1589fprintf(fp, ".");15901591switch (format) {1592DEFINE_CASE(1, "1d");1593DEFINE_CASE(2, "2d");1594DEFINE_CASE(3, "3d");1595DEFINE_CASE(0, "cube");15961597default:1598unreachable("Bad format");1599}1600}16011602static bool1603midgard_op_has_helpers(unsigned op)1604{1605switch (op) {1606case midgard_tex_op_normal:1607case midgard_tex_op_derivative:1608return true;1609default:1610return false;1611}1612}16131614static void1615print_texture_op(FILE *fp, unsigned op)1616{1617if (tex_opcode_props[op].name)1618fprintf(fp, "%s", tex_opcode_props[op].name);1619else1620fprintf(fp, "tex_op_%02X", op);1621}16221623static bool1624texture_op_takes_bias(unsigned op)1625{1626return op == midgard_tex_op_normal;1627}16281629static char1630sampler_type_name(enum mali_sampler_type t)1631{1632switch (t) {1633case MALI_SAMPLER_FLOAT:1634return 'f';1635case MALI_SAMPLER_UNSIGNED:1636return 'u';1637case MALI_SAMPLER_SIGNED:1638return 'i';1639default:1640return '?';1641}16421643}16441645static void1646print_texture_barrier(FILE *fp, uint32_t *word)1647{1648midgard_texture_barrier_word *barrier = (midgard_texture_barrier_word *) word;16491650if (barrier->type != TAG_TEXTURE_4_BARRIER)1651fprintf(fp, "/* barrier tag %X != tex/bar */ ", barrier->type);16521653if (!barrier->cont)1654fprintf(fp, "/* cont missing? */");16551656if (!barrier->last)1657fprintf(fp, "/* last missing? */");16581659if (barrier->zero1)1660fprintf(fp, "/* zero1 = 0x%X */ ", barrier->zero1);16611662if (barrier->zero2)1663fprintf(fp, "/* zero2 = 0x%X */ ", barrier->zero2);16641665if (barrier->zero3)1666fprintf(fp, "/* zero3 = 0x%X */ ", barrier->zero3);16671668if (barrier->zero4)1669fprintf(fp, "/* zero4 = 0x%X */ ", barrier->zero4);16701671if (barrier->zero5)1672fprintf(fp, "/* zero4 = 0x%" PRIx64 " */ ", barrier->zero5);16731674if (barrier->out_of_order)1675fprintf(fp, ".ooo%u", barrier->out_of_order);16761677fprintf(fp, "\n");1678}16791680#undef DEFINE_CASE16811682static const char *1683texture_mode(enum mali_texture_mode mode)1684{1685switch (mode) {1686case TEXTURE_NORMAL: return "";1687case TEXTURE_SHADOW: return ".shadow";1688case TEXTURE_GATHER_SHADOW: return ".gather.shadow";1689case TEXTURE_GATHER_X: return ".gatherX";1690case TEXTURE_GATHER_Y: return ".gatherY";1691case TEXTURE_GATHER_Z: return ".gatherZ";1692case TEXTURE_GATHER_W: return ".gatherW";1693default: return "unk";1694}1695}16961697static const char *1698derivative_mode(enum mali_derivative_mode mode)1699{1700switch (mode) {1701case TEXTURE_DFDX: return ".x";1702case TEXTURE_DFDY: return ".y";1703default: return "unk";1704}1705}17061707static void1708print_texture_word(FILE *fp, uint32_t *word, unsigned tabs, unsigned in_reg_base, unsigned out_reg_base)1709{1710midgard_texture_word *texture = (midgard_texture_word *) word;1711midg_stats.helper_invocations |= midgard_op_has_helpers(texture->op);1712validate_sampler_type(texture->op, texture->sampler_type);17131714/* Broad category of texture operation in question */1715print_texture_op(fp, texture->op);17161717/* Barriers use a dramatically different code path */1718if (texture->op == midgard_tex_op_barrier) {1719print_texture_barrier(fp, word);1720return;1721} else if (texture->type == TAG_TEXTURE_4_BARRIER)1722fprintf (fp, "/* nonbarrier had tex/bar tag */ ");1723else if (texture->type == TAG_TEXTURE_4_VTX)1724fprintf (fp, ".vtx");17251726if (texture->op == midgard_tex_op_derivative)1727fprintf(fp, "%s", derivative_mode(texture->mode));1728else1729fprintf(fp, "%s", texture_mode(texture->mode));17301731/* Specific format in question */1732print_texture_format(fp, texture->format);17331734/* Instruction "modifiers" parallel the ALU instructions. */17351736if (texture->cont)1737fprintf(fp, ".cont");17381739if (texture->last)1740fprintf(fp, ".last");17411742if (texture->out_of_order)1743fprintf(fp, ".ooo%u", texture->out_of_order);17441745fprintf(fp, " ");1746print_tex_reg(fp, out_reg_base + texture->out_reg_select, true);1747print_tex_mask(fp, texture->mask, texture->out_upper);1748fprintf(fp, ".%c%d", texture->sampler_type == MALI_SAMPLER_FLOAT ? 'f' : 'i',1749texture->out_full ? 32 : 16);1750assert(!(texture->out_full && texture->out_upper));17511752/* Output modifiers are only valid for float texture operations */1753if (texture->sampler_type == MALI_SAMPLER_FLOAT)1754print_outmod(fp, texture->outmod, false);17551756fprintf(fp, ", ");17571758/* Depending on whether we read from textures directly or indirectly,1759* we may be able to update our analysis */17601761if (texture->texture_register) {1762fprintf(fp, "texture[");1763print_texture_reg_select(fp, texture->texture_handle, in_reg_base);1764fprintf(fp, "], ");17651766/* Indirect, tut tut */1767midg_stats.texture_count = -16;1768} else {1769fprintf(fp, "texture%u, ", texture->texture_handle);1770update_stats(&midg_stats.texture_count, texture->texture_handle);1771}17721773/* Print the type, GL style */1774fprintf(fp, "%csampler", sampler_type_name(texture->sampler_type));17751776if (texture->sampler_register) {1777fprintf(fp, "[");1778print_texture_reg_select(fp, texture->sampler_handle, in_reg_base);1779fprintf(fp, "]");17801781midg_stats.sampler_count = -16;1782} else {1783fprintf(fp, "%u", texture->sampler_handle);1784update_stats(&midg_stats.sampler_count, texture->sampler_handle);1785}17861787print_vec_swizzle(fp, texture->swizzle, midgard_src_passthrough, midgard_reg_mode_32, 0xFF);17881789fprintf(fp, ", ");17901791midgard_src_expand_mode exp =1792texture->in_reg_upper ? midgard_src_expand_high : midgard_src_passthrough;1793print_tex_reg(fp, in_reg_base + texture->in_reg_select, false);1794print_vec_swizzle(fp, texture->in_reg_swizzle, exp, midgard_reg_mode_32, 0xFF);1795fprintf(fp, ".%d", texture->in_reg_full ? 32 : 16);1796assert(!(texture->in_reg_full && texture->in_reg_upper));17971798/* There is *always* an offset attached. Of1799* course, that offset is just immediate #0 for a1800* GLES call that doesn't take an offset. If there1801* is a non-negative non-zero offset, this is1802* specified in immediate offset mode, with the1803* values in the offset_* fields as immediates. If1804* this is a negative offset, we instead switch to1805* a register offset mode, where the offset_*1806* fields become register triplets */18071808if (texture->offset_register) {1809fprintf(fp, " + ");18101811bool full = texture->offset & 1;1812bool select = texture->offset & 2;1813bool upper = texture->offset & 4;1814unsigned swizzle = texture->offset >> 3;1815midgard_src_expand_mode exp =1816upper ? midgard_src_expand_high : midgard_src_passthrough;18171818print_tex_reg(fp, in_reg_base + select, false);1819print_vec_swizzle(fp, swizzle, exp, midgard_reg_mode_32, 0xFF);1820fprintf(fp, ".%d", full ? 32 : 16);1821assert(!(texture->out_full && texture->out_upper));18221823fprintf(fp, ", ");1824} else if (texture->offset) {1825/* Only select ops allow negative immediate offsets, verify */18261827signed offset_x = (texture->offset & 0xF);1828signed offset_y = ((texture->offset >> 4) & 0xF);1829signed offset_z = ((texture->offset >> 8) & 0xF);18301831bool neg_x = offset_x < 0;1832bool neg_y = offset_y < 0;1833bool neg_z = offset_z < 0;1834bool any_neg = neg_x || neg_y || neg_z;18351836if (any_neg && texture->op != midgard_tex_op_fetch)1837fprintf(fp, "/* invalid negative */ ");18381839/* Regardless, just print the immediate offset */18401841fprintf(fp, " + <%d, %d, %d>, ", offset_x, offset_y, offset_z);1842} else {1843fprintf(fp, ", ");1844}18451846char lod_operand = texture_op_takes_bias(texture->op) ? '+' : '=';18471848if (texture->lod_register) {1849fprintf(fp, "lod %c ", lod_operand);1850print_texture_reg_select(fp, texture->bias, in_reg_base);1851fprintf(fp, ", ");18521853if (texture->bias_int)1854fprintf(fp, " /* bias_int = 0x%X */", texture->bias_int);1855} else if (texture->op == midgard_tex_op_fetch) {1856/* For texel fetch, the int LOD is in the fractional place and1857* there is no fraction. We *always* have an explicit LOD, even1858* if it's zero. */18591860if (texture->bias_int)1861fprintf(fp, " /* bias_int = 0x%X */ ", texture->bias_int);18621863fprintf(fp, "lod = %u, ", texture->bias);1864} else if (texture->bias || texture->bias_int) {1865signed bias_int = texture->bias_int;1866float bias_frac = texture->bias / 256.0f;1867float bias = bias_int + bias_frac;18681869bool is_bias = texture_op_takes_bias(texture->op);1870char sign = (bias >= 0.0) ? '+' : '-';1871char operand = is_bias ? sign : '=';18721873fprintf(fp, "lod %c %f, ", operand, fabsf(bias));1874}18751876fprintf(fp, "\n");18771878/* While not zero in general, for these simple instructions the1879* following unknowns are zero, so we don't include them */18801881if (texture->unknown4 ||1882texture->unknown8) {1883fprintf(fp, "// unknown4 = 0x%x\n", texture->unknown4);1884fprintf(fp, "// unknown8 = 0x%x\n", texture->unknown8);1885}18861887midg_stats.instruction_count++;1888}18891890struct midgard_disasm_stats1891disassemble_midgard(FILE *fp, uint8_t *code, size_t size, unsigned gpu_id, bool verbose)1892{1893uint32_t *words = (uint32_t *) code;1894unsigned num_words = size / 4;1895int tabs = 0;18961897bool branch_forward = false;18981899int last_next_tag = -1;19001901unsigned i = 0;19021903midg_tags = calloc(sizeof(midg_tags[0]), num_words);19041905/* Stats for shader-db */1906memset(&midg_stats, 0, sizeof(midg_stats));1907midg_ever_written = 0;19081909while (i < num_words) {1910unsigned tag = words[i] & 0xF;1911unsigned next_tag = (words[i] >> 4) & 0xF;1912unsigned num_quad_words = midgard_tag_props[tag].size;19131914if (midg_tags[i] && midg_tags[i] != tag) {1915fprintf(fp, "\t/* XXX: TAG ERROR branch, got %s expected %s */\n",1916midgard_tag_props[tag].name,1917midgard_tag_props[midg_tags[i]].name);1918}19191920midg_tags[i] = tag;19211922/* Check the tag. The idea is to ensure that next_tag is1923* *always* recoverable from the disassembly, such that we may1924* safely omit printing next_tag. To show this, we first1925* consider that next tags are semantically off-byone -- we end1926* up parsing tag n during step n+1. So, we ensure after we're1927* done disassembling the next tag of the final bundle is BREAK1928* and warn otherwise. We also ensure that the next tag is1929* never INVALID. Beyond that, since the last tag is checked1930* outside the loop, we can check one tag prior. If equal to1931* the current tag (which is unique), we're done. Otherwise, we1932* print if that tag was > TAG_BREAK, which implies the tag was1933* not TAG_BREAK or TAG_INVALID. But we already checked for1934* TAG_INVALID, so it's just if the last tag was TAG_BREAK that1935* we're silent. So we throw in a print for break-next on at1936* the end of the bundle (if it's not the final bundle, which1937* we already check for above), disambiguating this case as1938* well. Hence in all cases we are unambiguous, QED. */19391940if (next_tag == TAG_INVALID)1941fprintf(fp, "\t/* XXX: invalid next tag */\n");19421943if (last_next_tag > TAG_BREAK && last_next_tag != tag) {1944fprintf(fp, "\t/* XXX: TAG ERROR sequence, got %s expexted %s */\n",1945midgard_tag_props[tag].name,1946midgard_tag_props[last_next_tag].name);1947}19481949last_next_tag = next_tag;19501951/* Tags are unique in the following way:1952*1953* INVALID, BREAK, UNKNOWN_*: verbosely printed1954* TEXTURE_4_BARRIER: verified by barrier/!barrier op1955* TEXTURE_4_VTX: .vtx tag printed1956* TEXTURE_4: tetxure lack of barriers or .vtx1957* TAG_LOAD_STORE_4: only load/store1958* TAG_ALU_4/8/12/16: by number of instructions/constants1959* TAG_ALU_4_8/12/16_WRITEOUT: ^^ with .writeout tag1960*/19611962switch (tag) {1963case TAG_TEXTURE_4_VTX ... TAG_TEXTURE_4_BARRIER: {1964bool interpipe_aliasing =1965midgard_get_quirks(gpu_id) & MIDGARD_INTERPIPE_REG_ALIASING;19661967print_texture_word(fp, &words[i], tabs,1968interpipe_aliasing ? 0 : REG_TEX_BASE,1969interpipe_aliasing ? REGISTER_LDST_BASE : REG_TEX_BASE);1970break;1971}19721973case TAG_LOAD_STORE_4:1974print_load_store_word(fp, &words[i], verbose);1975break;19761977case TAG_ALU_4 ... TAG_ALU_16_WRITEOUT:1978branch_forward = print_alu_word(fp, &words[i], num_quad_words, tabs, i + 4*num_quad_words, verbose);19791980/* TODO: infer/verify me */1981if (tag >= TAG_ALU_4_WRITEOUT)1982fprintf(fp, "writeout\n");19831984break;19851986default:1987fprintf(fp, "Unknown word type %u:\n", words[i] & 0xF);1988num_quad_words = 1;1989print_quad_word(fp, &words[i], tabs);1990fprintf(fp, "\n");1991break;1992}19931994/* We are parsing per bundle anyway. Add before we start1995* breaking out so we don't miss the final bundle. */19961997midg_stats.bundle_count++;1998midg_stats.quadword_count += num_quad_words;19992000/* Include a synthetic "break" instruction at the end of the2001* bundle to signify that if, absent a branch, the shader2002* execution will stop here. Stop disassembly at such a break2003* based on a heuristic */20042005if (next_tag == TAG_BREAK) {2006if (branch_forward) {2007fprintf(fp, "break\n");2008} else {2009fprintf(fp, "\n");2010break;2011}2012}20132014fprintf(fp, "\n");20152016i += 4 * num_quad_words;2017}20182019if (last_next_tag != TAG_BREAK) {2020fprintf(fp, "/* XXX: shader ended with tag %s */\n",2021midgard_tag_props[last_next_tag].name);2022}20232024free(midg_tags);20252026/* We computed work_count as max_work_registers, so add one to get the2027* count. If no work registers are written, you still have one work2028* reported, which is exactly what the hardware expects */20292030midg_stats.work_count++;20312032return midg_stats;2033}203420352036