Path: blob/21.2-virgl/src/panfrost/bifrost/disassemble.c
4564 views
/*1* Copyright (C) 2019 Connor Abbott <[email protected]>2* Copyright (C) 2019 Lyude Paul <[email protected]>3* Copyright (C) 2019 Ryan Houdek <[email protected]>4*5* Permission is hereby granted, free of charge, to any person obtaining a6* copy of this software and associated documentation files (the "Software"),7* to deal in the Software without restriction, including without limitation8* the rights to use, copy, modify, merge, publish, distribute, sublicense,9* and/or sell copies of the Software, and to permit persons to whom the10* Software is furnished to do so, subject to the following conditions:11*12* The above copyright notice and this permission notice (including the next13* paragraph) shall be included in all copies or substantial portions of the14* Software.15*16* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR17* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,18* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL19* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER20* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,21* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE22* SOFTWARE.23*/2425#include <stdbool.h>26#include <stdio.h>27#include <stdint.h>28#include <assert.h>29#include <inttypes.h>30#include <string.h>3132#include "bifrost.h"33#include "disassemble.h"34#include "bi_print_common.h"35#include "util/compiler.h"36#include "util/macros.h"3738// return bits (high, lo]39static uint64_t bits(uint32_t word, unsigned lo, unsigned high)40{41if (high == 32)42return word >> lo;43return (word & ((1 << high) - 1)) >> lo;44}4546// each of these structs represents an instruction that's dispatched in one47// cycle. Note that these instructions are packed in funny ways within the48// clause, hence the need for a separate struct.49struct bifrost_alu_inst {50uint32_t fma_bits;51uint32_t add_bits;52uint64_t reg_bits;53};5455static unsigned get_reg0(struct bifrost_regs regs)56{57if (regs.ctrl == 0)58return regs.reg0 | ((regs.reg1 & 0x1) << 5);5960return regs.reg0 <= regs.reg1 ? regs.reg0 : 63 - regs.reg0;61}6263static unsigned get_reg1(struct bifrost_regs regs)64{65return regs.reg0 <= regs.reg1 ? regs.reg1 : 63 - regs.reg1;66}6768// this represents the decoded version of the ctrl register field.69struct bifrost_reg_ctrl {70bool read_reg0;71bool read_reg1;72struct bifrost_reg_ctrl_23 slot23;73bool clause_start;74};7576static void dump_header(FILE *fp, struct bifrost_header header, bool verbose)77{78fprintf(fp, "ds(%du) ", header.dependency_slot);7980if (header.staging_barrier)81fprintf(fp, "osrb ");8283fprintf(fp, "%s ", bi_flow_control_name(header.flow_control));8485if (header.suppress_inf)86fprintf(fp, "inf_suppress ");87if (header.suppress_nan)88fprintf(fp, "nan_suppress ");8990if (header.flush_to_zero == BIFROST_FTZ_DX11)91fprintf(fp, "ftz_dx11 ");92else if (header.flush_to_zero == BIFROST_FTZ_ALWAYS)93fprintf(fp, "ftz_hsa ");94if (header.flush_to_zero == BIFROST_FTZ_ABRUPT)95fprintf(fp, "ftz_au ");9697assert(!header.zero1);98assert(!header.zero2);99100if (header.float_exceptions == BIFROST_EXCEPTIONS_DISABLED)101fprintf(fp, "fpe_ts ");102else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_DIVISION)103fprintf(fp, "fpe_pd ");104else if (header.float_exceptions == BIFROST_EXCEPTIONS_PRECISE_SQRT)105fprintf(fp, "fpe_psqr ");106107if (header.message_type)108fprintf(fp, "%s ", bi_message_type_name(header.message_type));109110if (header.terminate_discarded_threads)111fprintf(fp, "td ");112113if (header.next_clause_prefetch)114fprintf(fp, "ncph ");115116if (header.next_message_type)117fprintf(fp, "next_%s ", bi_message_type_name(header.next_message_type));118if (header.dependency_wait != 0) {119fprintf(fp, "dwb(");120bool first = true;121for (unsigned i = 0; i < 8; i++) {122if (header.dependency_wait & (1 << i)) {123if (!first) {124fprintf(fp, ", ");125}126fprintf(fp, "%d", i);127first = false;128}129}130fprintf(fp, ") ");131}132133fprintf(fp, "\n");134}135136static struct bifrost_reg_ctrl DecodeRegCtrl(FILE *fp, struct bifrost_regs regs, bool first)137{138struct bifrost_reg_ctrl decoded = {};139unsigned ctrl;140if (regs.ctrl == 0) {141ctrl = regs.reg1 >> 2;142decoded.read_reg0 = !(regs.reg1 & 0x2);143decoded.read_reg1 = false;144} else {145ctrl = regs.ctrl;146decoded.read_reg0 = decoded.read_reg1 = true;147}148149/* Modify control based on state */150if (first)151ctrl = (ctrl & 0x7) | ((ctrl & 0x8) << 1);152else if (regs.reg2 == regs.reg3)153ctrl += 16;154155decoded.slot23 = bifrost_reg_ctrl_lut[ctrl];156ASSERTED struct bifrost_reg_ctrl_23 reserved = { 0 };157assert(memcmp(&decoded.slot23, &reserved, sizeof(reserved)));158159return decoded;160}161162static void dump_regs(FILE *fp, struct bifrost_regs srcs, bool first)163{164struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, srcs, first);165fprintf(fp, " # ");166if (ctrl.read_reg0)167fprintf(fp, "slot 0: r%d ", get_reg0(srcs));168if (ctrl.read_reg1)169fprintf(fp, "slot 1: r%d ", get_reg1(srcs));170171const char *slot3_fma = ctrl.slot23.slot3_fma ? "FMA" : "ADD";172173if (ctrl.slot23.slot2 == BIFROST_OP_WRITE)174fprintf(fp, "slot 2: r%d (write FMA) ", srcs.reg2);175else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_LO)176fprintf(fp, "slot 2: r%d (write lo FMA) ", srcs.reg2);177else if (ctrl.slot23.slot2 == BIFROST_OP_WRITE_HI)178fprintf(fp, "slot 2: r%d (write hi FMA) ", srcs.reg2);179else if (ctrl.slot23.slot2 == BIFROST_OP_READ)180fprintf(fp, "slot 2: r%d (read) ", srcs.reg2);181182if (ctrl.slot23.slot3 == BIFROST_OP_WRITE)183fprintf(fp, "slot 3: r%d (write %s) ", srcs.reg3, slot3_fma);184else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_LO)185fprintf(fp, "slot 3: r%d (write lo %s) ", srcs.reg3, slot3_fma);186else if (ctrl.slot23.slot3 == BIFROST_OP_WRITE_HI)187fprintf(fp, "slot 3: r%d (write hi %s) ", srcs.reg3, slot3_fma);188189if (srcs.fau_idx)190fprintf(fp, "fau %X ", srcs.fau_idx);191192fprintf(fp, "\n");193}194195static void196bi_disasm_dest_mask(FILE *fp, enum bifrost_reg_op op)197{198if (op == BIFROST_OP_WRITE_LO)199fprintf(fp, ".h0");200else if (op == BIFROST_OP_WRITE_HI)201fprintf(fp, ".h1");202}203204void205bi_disasm_dest_fma(FILE *fp, struct bifrost_regs *next_regs, bool last)206{207/* If this is the last instruction, next_regs points to the first reg entry. */208struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);209if (ctrl.slot23.slot2 >= BIFROST_OP_WRITE) {210fprintf(fp, "r%u:t0", next_regs->reg2);211bi_disasm_dest_mask(fp, ctrl.slot23.slot2);212} else if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && ctrl.slot23.slot3_fma) {213fprintf(fp, "r%u:t0", next_regs->reg3);214bi_disasm_dest_mask(fp, ctrl.slot23.slot3);215} else216fprintf(fp, "t0");217}218219void220bi_disasm_dest_add(FILE *fp, struct bifrost_regs *next_regs, bool last)221{222/* If this is the last instruction, next_regs points to the first reg entry. */223struct bifrost_reg_ctrl ctrl = DecodeRegCtrl(fp, *next_regs, last);224225if (ctrl.slot23.slot3 >= BIFROST_OP_WRITE && !ctrl.slot23.slot3_fma) {226fprintf(fp, "r%u:t1", next_regs->reg3);227bi_disasm_dest_mask(fp, ctrl.slot23.slot3);228} else229fprintf(fp, "t1");230}231232static void dump_const_imm(FILE *fp, uint32_t imm)233{234union {235float f;236uint32_t i;237} fi;238fi.i = imm;239fprintf(fp, "0x%08x /* %f */", imm, fi.f);240}241242static void243dump_pc_imm(FILE *fp, uint64_t imm, enum bi_constmod mod, bool high32)244{245if (mod == BI_CONSTMOD_PC_HI && !high32) {246dump_const_imm(fp, imm);247return;248}249250/* 60-bit sign-extend */251uint64_t zx64 = (imm << 4);252int64_t sx64 = zx64;253sx64 >>= 4;254255/* 28-bit sign extend x 2 */256uint32_t imm32[2] = { (uint32_t) imm, (uint32_t) (imm >> 32) };257uint32_t zx32[2] = { imm32[0] << 4, imm32[1] << 4 };258int32_t sx32[2] = { zx32[0], zx32[1] };259sx32[0] >>= 4;260sx32[1] >>= 4;261262int64_t offs = 0;263264switch (mod) {265case BI_CONSTMOD_PC_LO:266offs = sx64;267break;268case BI_CONSTMOD_PC_HI:269offs = sx32[1];270break;271case BI_CONSTMOD_PC_LO_HI:272offs = sx32[high32];273break;274default:275unreachable("Invalid PC modifier");276}277278fprintf(fp, "(pc + %" PRId64 ")", offs);279280if (mod == BI_CONSTMOD_PC_LO && high32)281fprintf(fp, " >> 32");282283/* While technically in spec, referencing the current clause as (pc +284* 0) likely indicates an unintended infinite loop */285if (offs == 0)286fprintf(fp, " /* XXX: likely an infinite loop */");287}288289/* Convert an index to an embedded constant in FAU-RAM to the index of the290* embedded constant. No, it's not in order. Yes, really. */291292static unsigned293const_fau_to_idx(unsigned fau_value)294{295unsigned map[8] = {296~0, ~0, 4, 5, 0, 1, 2, 3297};298299assert(map[fau_value] < 6);300return map[fau_value];301}302303static void dump_fau_src(FILE *fp, struct bifrost_regs srcs, struct bi_constants *consts, bool high32)304{305if (srcs.fau_idx & 0x80) {306unsigned uniform = (srcs.fau_idx & 0x7f);307fprintf(fp, "u%d.w%d", uniform, high32);308} else if (srcs.fau_idx >= 0x20) {309unsigned idx = const_fau_to_idx(srcs.fau_idx >> 4);310uint64_t imm = consts->raw[idx];311imm |= (srcs.fau_idx & 0xf);312if (consts->mods[idx] != BI_CONSTMOD_NONE)313dump_pc_imm(fp, imm, consts->mods[idx], high32);314else if (high32)315dump_const_imm(fp, imm >> 32);316else317dump_const_imm(fp, imm);318} else {319switch (srcs.fau_idx) {320case 0:321fprintf(fp, "#0");322break;323case 1:324fprintf(fp, "lane_id");325break;326case 2:327fprintf(fp, "warp_id");328break;329case 3:330fprintf(fp, "core_id");331break;332case 4:333fprintf(fp, "framebuffer_size");334break;335case 5:336fprintf(fp, "atest_datum");337break;338case 6:339fprintf(fp, "sample");340break;341case 8:342case 9:343case 10:344case 11:345case 12:346case 13:347case 14:348case 15:349fprintf(fp, "blend_descriptor_%u", (unsigned) srcs.fau_idx - 8);350break;351default:352fprintf(fp, "XXX - reserved%u", (unsigned) srcs.fau_idx);353break;354}355356if (high32)357fprintf(fp, ".y");358else359fprintf(fp, ".x");360}361}362363void364dump_src(FILE *fp, unsigned src, struct bifrost_regs srcs, struct bi_constants *consts, bool isFMA)365{366switch (src) {367case 0:368fprintf(fp, "r%d", get_reg0(srcs));369break;370case 1:371fprintf(fp, "r%d", get_reg1(srcs));372break;373case 2:374fprintf(fp, "r%d", srcs.reg2);375break;376case 3:377if (isFMA)378fprintf(fp, "#0");379else380fprintf(fp, "t"); // i.e. the output of FMA this cycle381break;382case 4:383dump_fau_src(fp, srcs, consts, false);384break;385case 5:386dump_fau_src(fp, srcs, consts, true);387break;388case 6:389fprintf(fp, "t0");390break;391case 7:392fprintf(fp, "t1");393break;394}395}396397/* Tables for decoding M0, or if M0 == 7, M1 respectively.398*399* XXX: It's not clear if the third entry of M1_table corresponding to (7, 2)400* should have PC_LO_HI in the EC1 slot, or it's a weird hybrid mode? I would401* say this needs testing but no code should ever actually use this mode.402*/403404static const enum bi_constmod M1_table[7][2] = {405{ BI_CONSTMOD_NONE, BI_CONSTMOD_NONE },406{ BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },407{ BI_CONSTMOD_PC_LO, BI_CONSTMOD_PC_LO },408{ ~0, ~0 },409{ BI_CONSTMOD_PC_HI, BI_CONSTMOD_NONE },410{ BI_CONSTMOD_PC_HI, BI_CONSTMOD_PC_HI },411{ BI_CONSTMOD_PC_LO, BI_CONSTMOD_NONE },412};413414static const enum bi_constmod M2_table[4][2] = {415{ BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_NONE },416{ BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },417{ BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_LO_HI },418{ BI_CONSTMOD_PC_LO_HI, BI_CONSTMOD_PC_HI },419};420421static void422decode_M(enum bi_constmod *mod, unsigned M1, unsigned M2, bool single)423{424if (M1 >= 8) {425mod[0] = BI_CONSTMOD_NONE;426427if (!single)428mod[1] = BI_CONSTMOD_NONE;429430return;431} else if (M1 == 7) {432assert(M2 < 4);433memcpy(mod, M2_table[M2], sizeof(*mod) * (single ? 1 : 2));434} else {435assert(M1 != 3);436memcpy(mod, M1_table[M1], sizeof(*mod) * (single ? 1 : 2));437}438}439440static bool dump_clause(FILE *fp, uint32_t *words, unsigned *size, unsigned offset, bool verbose)441{442// State for a decoded clause443struct bifrost_alu_inst instrs[8] = {};444struct bi_constants consts = {};445unsigned num_instrs = 0;446unsigned num_consts = 0;447uint64_t header_bits = 0;448bool stopbit = false;449450unsigned i;451for (i = 0; ; i++, words += 4) {452if (verbose) {453fprintf(fp, "# ");454for (int j = 0; j < 4; j++)455fprintf(fp, "%08x ", words[3 - j]); // low bit on the right456fprintf(fp, "\n");457}458unsigned tag = bits(words[0], 0, 8);459460// speculatively decode some things that are common between many formats, so we can share some code461struct bifrost_alu_inst main_instr = {};462// 20 bits463main_instr.add_bits = bits(words[2], 2, 32 - 13);464// 23 bits465main_instr.fma_bits = bits(words[1], 11, 32) | bits(words[2], 0, 2) << (32 - 11);466// 35 bits467main_instr.reg_bits = ((uint64_t) bits(words[1], 0, 11)) << 24 | (uint64_t) bits(words[0], 8, 32);468469uint64_t const0 = bits(words[0], 8, 32) << 4 | (uint64_t) words[1] << 28 | bits(words[2], 0, 4) << 60;470uint64_t const1 = bits(words[2], 4, 32) << 4 | (uint64_t) words[3] << 32;471472/* Z-bit */473bool stop = tag & 0x40;474475if (verbose) {476fprintf(fp, "# tag: 0x%02x\n", tag);477}478if (tag & 0x80) {479/* Format 5 or 10 */480unsigned idx = stop ? 5 : 2;481main_instr.add_bits |= ((tag >> 3) & 0x7) << 17;482instrs[idx + 1] = main_instr;483instrs[idx].add_bits = bits(words[3], 0, 17) | ((tag & 0x7) << 17);484instrs[idx].fma_bits |= bits(words[2], 19, 32) << 10;485consts.raw[0] = bits(words[3], 17, 32) << 4;486} else {487bool done = false;488switch ((tag >> 3) & 0x7) {489case 0x0:490switch (tag & 0x7) {491case 0x3:492/* Format 1 */493main_instr.add_bits |= bits(words[3], 29, 32) << 17;494instrs[1] = main_instr;495num_instrs = 2;496done = stop;497break;498case 0x4:499/* Format 3 */500instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;501instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;502consts.raw[0] = const0;503decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);504num_instrs = 3;505num_consts = 1;506done = stop;507break;508case 0x1:509case 0x5:510/* Format 4 */511instrs[2].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;512instrs[2].fma_bits |= bits(words[2], 19, 32) << 10;513main_instr.add_bits |= bits(words[3], 26, 29) << 17;514instrs[3] = main_instr;515if ((tag & 0x7) == 0x5) {516num_instrs = 4;517done = stop;518}519break;520case 0x6:521/* Format 8 */522instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;523instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;524consts.raw[0] = const0;525decode_M(&consts.mods[0], bits(words[2], 4, 8), bits(words[2], 8, 12), true);526num_instrs = 6;527num_consts = 1;528done = stop;529break;530case 0x7:531/* Format 9 */532instrs[5].add_bits = bits(words[3], 0, 17) | bits(words[3], 29, 32) << 17;533instrs[5].fma_bits |= bits(words[2], 19, 32) << 10;534main_instr.add_bits |= bits(words[3], 26, 29) << 17;535instrs[6] = main_instr;536num_instrs = 7;537done = stop;538break;539default:540unreachable("[INSTR_INVALID_ENC] Invalid tag bits");541}542break;543case 0x2:544case 0x3: {545/* Format 6 or 11 */546unsigned idx = ((tag >> 3) & 0x7) == 2 ? 4 : 7;547main_instr.add_bits |= (tag & 0x7) << 17;548instrs[idx] = main_instr;549consts.raw[0] |= (bits(words[2], 19, 32) | ((uint64_t) words[3] << 13)) << 19;550num_consts = 1;551num_instrs = idx + 1;552done = stop;553break;554}555case 0x4: {556/* Format 2 */557unsigned idx = stop ? 4 : 1;558main_instr.add_bits |= (tag & 0x7) << 17;559instrs[idx] = main_instr;560instrs[idx + 1].fma_bits |= bits(words[3], 22, 32);561instrs[idx + 1].reg_bits = bits(words[2], 19, 32) | (bits(words[3], 0, 22) << (32 - 19));562break;563}564case 0x1:565/* Format 0 - followed by constants */566num_instrs = 1;567done = stop;568FALLTHROUGH;569case 0x5:570/* Format 0 - followed by instructions */571header_bits = bits(words[2], 19, 32) | ((uint64_t) words[3] << (32 - 19));572main_instr.add_bits |= (tag & 0x7) << 17;573instrs[0] = main_instr;574break;575case 0x6:576case 0x7: {577/* Format 12 */578unsigned pos = tag & 0xf;579580struct {581unsigned const_idx;582unsigned nr_tuples;583} pos_table[0x10] = {584{ 0, 1 },585{ 0, 2 },586{ 0, 4 },587{ 1, 3 },588{ 1, 5 },589{ 2, 4 },590{ 0, 7 },591{ 1, 6 },592{ 3, 5 },593{ 1, 8 },594{ 2, 7 },595{ 3, 6 },596{ 3, 8 },597{ 4, 7 },598{ 5, 6 },599{ ~0, ~0 }600};601602ASSERTED bool valid_count = pos_table[pos].nr_tuples == num_instrs;603assert(valid_count && "INSTR_INVALID_ENC");604605unsigned const_idx = pos_table[pos].const_idx;606607if (num_consts < const_idx + 2)608num_consts = const_idx + 2;609610consts.raw[const_idx] = const0;611consts.raw[const_idx + 1] = const1;612613/* Calculate M values from A, B and 4-bit614* unsigned arithmetic. Mathematically it615* should be (A - B) % 16 but we use this616* alternate form to avoid sign issues */617618unsigned A1 = bits(words[2], 0, 4);619unsigned B1 = bits(words[3], 28, 32);620unsigned A2 = bits(words[1], 0, 4);621unsigned B2 = bits(words[2], 28, 32);622623unsigned M1 = (16 + A1 - B1) & 0xF;624unsigned M2 = (16 + A2 - B2) & 0xF;625626decode_M(&consts.mods[const_idx], M1, M2, false);627628done = stop;629break;630}631default:632break;633}634635if (done)636break;637}638}639640*size = i + 1;641642if (verbose) {643fprintf(fp, "# header: %012" PRIx64 "\n", header_bits);644}645646struct bifrost_header header;647memcpy((char *) &header, (char *) &header_bits, sizeof(struct bifrost_header));648dump_header(fp, header, verbose);649if (header.flow_control == BIFROST_FLOW_END)650stopbit = true;651652fprintf(fp, "{\n");653for (i = 0; i < num_instrs; i++) {654struct bifrost_regs regs, next_regs;655if (i + 1 == num_instrs) {656memcpy((char *) &next_regs, (char *) &instrs[0].reg_bits,657sizeof(next_regs));658} else {659memcpy((char *) &next_regs, (char *) &instrs[i + 1].reg_bits,660sizeof(next_regs));661}662663memcpy((char *) ®s, (char *) &instrs[i].reg_bits, sizeof(regs));664665if (verbose) {666fprintf(fp, " # regs: %016" PRIx64 "\n", instrs[i].reg_bits);667dump_regs(fp, regs, i == 0);668}669670bi_disasm_fma(fp, instrs[i].fma_bits, ®s, &next_regs,671header.staging_register, offset, &consts,672i + 1 == num_instrs);673674bi_disasm_add(fp, instrs[i].add_bits, ®s, &next_regs,675header.staging_register, offset, &consts,676i + 1 == num_instrs);677}678fprintf(fp, "}\n");679680if (verbose) {681for (unsigned i = 0; i < num_consts; i++) {682fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i, consts.raw[i] & 0xffffffff);683fprintf(fp, "# const%d: %08" PRIx64 "\n", 2 * i + 1, consts.raw[i] >> 32);684}685}686687fprintf(fp, "\n");688return stopbit;689}690691void disassemble_bifrost(FILE *fp, uint8_t *code, size_t size, bool verbose)692{693uint32_t *words = (uint32_t *) code;694uint32_t *words_end = words + (size / 4);695// used for displaying branch targets696unsigned offset = 0;697while (words != words_end) {698fprintf(fp, "clause_%d:\n", offset);699unsigned size;700701if (dump_clause(fp, words, &size, offset, verbose))702break;703704words += size * 4;705offset += size;706}707}708709710711