Path: blob/21.2-virgl/src/amd/compiler/aco_print_asm.cpp
4550 views
/*1* Copyright © 2018 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22*/2324#include "aco_ir.h"2526#include "llvm/ac_llvm_util.h"2728#include "llvm-c/Disassembler.h"29#include <llvm/ADT/StringRef.h>30#include <llvm/MC/MCDisassembler/MCDisassembler.h>3132#include <array>33#include <iomanip>34#include <vector>3536namespace aco {37namespace {3839/* LLVM disassembler only supports GFX8+, try to disassemble with CLRXdisasm40* for GFX6-GFX7 if found on the system, this is better than nothing.41*/42bool43print_asm_gfx6_gfx7(Program* program, std::vector<uint32_t>& binary, FILE* output)44{45#ifdef _WIN3246return true;47#else48char path[] = "/tmp/fileXXXXXX";49char line[2048], command[128];50const char* gpu_type;51FILE* p;52int fd;5354/* Dump the binary into a temporary file. */55fd = mkstemp(path);56if (fd < 0)57return true;5859for (uint32_t w : binary) {60if (write(fd, &w, sizeof(w)) == -1)61goto fail;62}6364/* Determine the GPU type for CLRXdisasm. Use the family for GFX6 chips65* because it doesn't allow to use gfx600 directly.66*/67switch (program->chip_class) {68case GFX6:69switch (program->family) {70case CHIP_TAHITI: gpu_type = "tahiti"; break;71case CHIP_PITCAIRN: gpu_type = "pitcairn"; break;72case CHIP_VERDE: gpu_type = "capeverde"; break;73case CHIP_OLAND: gpu_type = "oland"; break;74case CHIP_HAINAN: gpu_type = "hainan"; break;75default: unreachable("Invalid GFX6 family!");76}77break;78case GFX7: gpu_type = "gfx700"; break;79default: unreachable("Invalid chip class!");80}8182sprintf(command, "clrxdisasm --gpuType=%s -r %s", gpu_type, path);8384p = popen(command, "r");85if (p) {86if (!fgets(line, sizeof(line), p)) {87fprintf(output, "clrxdisasm not found\n");88pclose(p);89goto fail;90}9192do {93fputs(line, output);94} while (fgets(line, sizeof(line), p));9596pclose(p);97}9899return false;100101fail:102close(fd);103unlink(path);104return true;105#endif106}107108std::pair<bool, size_t>109disasm_instr(chip_class chip, LLVMDisasmContextRef disasm, uint32_t* binary, unsigned exec_size,110size_t pos, char* outline, unsigned outline_size)111{112/* mask out src2 on v_writelane_b32 */113if (((chip == GFX8 || chip == GFX9) && (binary[pos] & 0xffff8000) == 0xd28a0000) ||114(chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd7610000)) {115binary[pos + 1] = binary[pos + 1] & 0xF803FFFF;116}117118size_t l =119LLVMDisasmInstruction(disasm, (uint8_t*)&binary[pos], (exec_size - pos) * sizeof(uint32_t),120pos * 4, outline, outline_size);121122if (chip >= GFX10 && l == 8 && ((binary[pos] & 0xffff0000) == 0xd7610000) &&123((binary[pos + 1] & 0x1ff) == 0xff)) {124/* v_writelane with literal uses 3 dwords but llvm consumes only 2 */125l += 4;126}127128bool invalid = false;129size_t size;130if (!l &&131((chip >= GFX9 && (binary[pos] & 0xffff8000) == 0xd1348000) || /* v_add_u32_e64 + clamp */132(chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd7038000) || /* v_add_u16_e64 + clamp */133(chip <= GFX9 && (binary[pos] & 0xffff8000) == 0xd1268000) || /* v_add_u16_e64 + clamp */134(chip >= GFX10 && (binary[pos] & 0xffff8000) == 0xd76d8000) || /* v_add3_u32 + clamp */135(chip == GFX9 && (binary[pos] & 0xffff8000) == 0xd1ff8000)) /* v_add3_u32 + clamp */) {136strcpy(outline, "\tinteger addition + clamp");137bool has_literal = chip >= GFX10 && (((binary[pos + 1] & 0x1ff) == 0xff) ||138(((binary[pos + 1] >> 9) & 0x1ff) == 0xff));139size = 2 + has_literal;140} else if (chip >= GFX10 && l == 4 && ((binary[pos] & 0xfe0001ff) == 0x020000f9)) {141strcpy(outline, "\tv_cndmask_b32 + sdwa");142size = 2;143} else if (!l) {144strcpy(outline, "(invalid instruction)");145size = 1;146invalid = true;147} else {148assert(l % 4 == 0);149size = l / 4;150}151152return std::make_pair(invalid, size);153}154} /* end namespace */155156bool157print_asm(Program* program, std::vector<uint32_t>& binary, unsigned exec_size, FILE* output)158{159if (program->chip_class <= GFX7) {160/* Do not abort if clrxdisasm isn't found. */161print_asm_gfx6_gfx7(program, binary, output);162return false;163}164165std::vector<bool> referenced_blocks(program->blocks.size());166referenced_blocks[0] = true;167for (Block& block : program->blocks) {168for (unsigned succ : block.linear_succs)169referenced_blocks[succ] = true;170}171172std::vector<llvm::SymbolInfoTy> symbols;173std::vector<std::array<char, 16>> block_names;174block_names.reserve(program->blocks.size());175for (Block& block : program->blocks) {176if (!referenced_blocks[block.index])177continue;178std::array<char, 16> name;179sprintf(name.data(), "BB%u", block.index);180block_names.push_back(name);181symbols.emplace_back(block.offset * 4,182llvm::StringRef(block_names[block_names.size() - 1].data()), 0);183}184185const char* features = "";186if (program->chip_class >= GFX10 && program->wave_size == 64) {187features = "+wavefrontsize64";188}189190LLVMDisasmContextRef disasm =191LLVMCreateDisasmCPUFeatures("amdgcn-mesa-mesa3d", ac_get_llvm_processor_name(program->family),192features, &symbols, 0, NULL, NULL);193194size_t pos = 0;195bool invalid = false;196unsigned next_block = 0;197198unsigned prev_size = 0;199unsigned prev_pos = 0;200unsigned repeat_count = 0;201while (pos < exec_size) {202bool new_block =203next_block < program->blocks.size() && pos == program->blocks[next_block].offset;204if (pos + prev_size <= exec_size && prev_pos != pos && !new_block &&205memcmp(&binary[prev_pos], &binary[pos], prev_size * 4) == 0) {206repeat_count++;207pos += prev_size;208continue;209} else {210if (repeat_count)211fprintf(output, "\t(then repeated %u times)\n", repeat_count);212repeat_count = 0;213}214215while (next_block < program->blocks.size() && pos == program->blocks[next_block].offset) {216if (referenced_blocks[next_block])217fprintf(output, "BB%u:\n", next_block);218next_block++;219}220221char outline[1024];222std::pair<bool, size_t> res = disasm_instr(program->chip_class, disasm, binary.data(),223exec_size, pos, outline, sizeof(outline));224invalid |= res.first;225226fprintf(output, "%-60s ;", outline);227228for (unsigned i = 0; i < res.second; i++)229fprintf(output, " %.8x", binary[pos + i]);230fputc('\n', output);231232prev_size = res.second;233prev_pos = pos;234pos += res.second;235}236assert(next_block == program->blocks.size());237238LLVMDisasmDispose(disasm);239240if (program->constant_data.size()) {241fputs("\n/* constant data */\n", output);242for (unsigned i = 0; i < program->constant_data.size(); i += 32) {243fprintf(output, "[%.6u]", i);244unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32);245for (unsigned j = 0; j < line_size; j += 4) {246unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4);247uint32_t v = 0;248memcpy(&v, &program->constant_data[i + j], size);249fprintf(output, " %.8x", v);250}251fputc('\n', output);252}253}254255return invalid;256}257258} // namespace aco259260261