Path: blob/21.2-virgl/src/amd/compiler/aco_print_ir.cpp
4550 views
/*1* Copyright © 2018 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22*/2324#include "aco_builder.h"25#include "aco_ir.h"2627#include "common/ac_shader_util.h"28#include "common/sid.h"2930#include <array>3132namespace aco {3334const std::array<const char*, num_reduce_ops> reduce_ops = []()35{36std::array<const char*, num_reduce_ops> ret{};37ret[iadd8] = "iadd8";38ret[iadd16] = "iadd16";39ret[iadd32] = "iadd32";40ret[iadd64] = "iadd64";41ret[imul8] = "imul8";42ret[imul16] = "imul16";43ret[imul32] = "imul32";44ret[imul64] = "imul64";45ret[fadd16] = "fadd16";46ret[fadd32] = "fadd32";47ret[fadd64] = "fadd64";48ret[fmul16] = "fmul16";49ret[fmul32] = "fmul32";50ret[fmul64] = "fmul64";51ret[imin8] = "imin8";52ret[imin16] = "imin16";53ret[imin32] = "imin32";54ret[imin64] = "imin64";55ret[imax8] = "imax8";56ret[imax16] = "imax16";57ret[imax32] = "imax32";58ret[imax64] = "imax64";59ret[umin8] = "umin8";60ret[umin16] = "umin16";61ret[umin32] = "umin32";62ret[umin64] = "umin64";63ret[umax8] = "umax8";64ret[umax16] = "umax16";65ret[umax32] = "umax32";66ret[umax64] = "umax64";67ret[fmin16] = "fmin16";68ret[fmin32] = "fmin32";69ret[fmin64] = "fmin64";70ret[fmax16] = "fmax16";71ret[fmax32] = "fmax32";72ret[fmax64] = "fmax64";73ret[iand8] = "iand8";74ret[iand16] = "iand16";75ret[iand32] = "iand32";76ret[iand64] = "iand64";77ret[ior8] = "ior8";78ret[ior16] = "ior16";79ret[ior32] = "ior32";80ret[ior64] = "ior64";81ret[ixor8] = "ixor8";82ret[ixor16] = "ixor16";83ret[ixor32] = "ixor32";84ret[ixor64] = "ixor64";85return ret;86}();8788static void89print_reg_class(const RegClass rc, FILE* output)90{91switch (rc) {92case RegClass::s1: fprintf(output, " s1: "); return;93case RegClass::s2: fprintf(output, " s2: "); return;94case RegClass::s3: fprintf(output, " s3: "); return;95case RegClass::s4: fprintf(output, " s4: "); return;96case RegClass::s6: fprintf(output, " s6: "); return;97case RegClass::s8: fprintf(output, " s8: "); return;98case RegClass::s16: fprintf(output, "s16: "); return;99case RegClass::v1: fprintf(output, " v1: "); return;100case RegClass::v2: fprintf(output, " v2: "); return;101case RegClass::v3: fprintf(output, " v3: "); return;102case RegClass::v4: fprintf(output, " v4: "); return;103case RegClass::v5: fprintf(output, " v5: "); return;104case RegClass::v6: fprintf(output, " v6: "); return;105case RegClass::v7: fprintf(output, " v7: "); return;106case RegClass::v8: fprintf(output, " v8: "); return;107case RegClass::v1b: fprintf(output, " v1b: "); return;108case RegClass::v2b: fprintf(output, " v2b: "); return;109case RegClass::v3b: fprintf(output, " v3b: "); return;110case RegClass::v4b: fprintf(output, " v4b: "); return;111case RegClass::v6b: fprintf(output, " v6b: "); return;112case RegClass::v8b: fprintf(output, " v8b: "); return;113case RegClass::v1_linear: fprintf(output, " v1: "); return;114case RegClass::v2_linear: fprintf(output, " v2: "); return;115}116}117118void119print_physReg(PhysReg reg, unsigned bytes, FILE* output, unsigned flags)120{121if (reg == 124) {122fprintf(output, "m0");123} else if (reg == 106) {124fprintf(output, "vcc");125} else if (reg == 253) {126fprintf(output, "scc");127} else if (reg == 126) {128fprintf(output, "exec");129} else {130bool is_vgpr = reg / 256;131unsigned r = reg % 256;132unsigned size = DIV_ROUND_UP(bytes, 4);133if (size == 1 && (flags & print_no_ssa)) {134fprintf(output, "%c%d", is_vgpr ? 'v' : 's', r);135} else {136fprintf(output, "%c[%d", is_vgpr ? 'v' : 's', r);137if (size > 1)138fprintf(output, "-%d]", r + size - 1);139else140fprintf(output, "]");141}142if (reg.byte() || bytes % 4)143fprintf(output, "[%d:%d]", reg.byte() * 8, (reg.byte() + bytes) * 8);144}145}146147static void148print_constant(uint8_t reg, FILE* output)149{150if (reg >= 128 && reg <= 192) {151fprintf(output, "%d", reg - 128);152return;153} else if (reg >= 192 && reg <= 208) {154fprintf(output, "%d", 192 - reg);155return;156}157158switch (reg) {159case 240: fprintf(output, "0.5"); break;160case 241: fprintf(output, "-0.5"); break;161case 242: fprintf(output, "1.0"); break;162case 243: fprintf(output, "-1.0"); break;163case 244: fprintf(output, "2.0"); break;164case 245: fprintf(output, "-2.0"); break;165case 246: fprintf(output, "4.0"); break;166case 247: fprintf(output, "-4.0"); break;167case 248: fprintf(output, "1/(2*PI)"); break;168}169}170171void172aco_print_operand(const Operand* operand, FILE* output, unsigned flags)173{174if (operand->isLiteral() || (operand->isConstant() && operand->bytes() == 1)) {175if (operand->bytes() == 1)176fprintf(output, "0x%.2x", operand->constantValue());177else if (operand->bytes() == 2)178fprintf(output, "0x%.4x", operand->constantValue());179else180fprintf(output, "0x%x", operand->constantValue());181} else if (operand->isConstant()) {182print_constant(operand->physReg().reg(), output);183} else if (operand->isUndefined()) {184print_reg_class(operand->regClass(), output);185fprintf(output, "undef");186} else {187if (operand->isLateKill())188fprintf(output, "(latekill)");189if (operand->is16bit())190fprintf(output, "(is16bit)");191if (operand->is24bit())192fprintf(output, "(is24bit)");193if ((flags & print_kill) && operand->isKill())194fprintf(output, "(kill)");195196if (!(flags & print_no_ssa))197fprintf(output, "%%%d%s", operand->tempId(), operand->isFixed() ? ":" : "");198199if (operand->isFixed())200print_physReg(operand->physReg(), operand->bytes(), output, flags);201}202}203204static void205print_definition(const Definition* definition, FILE* output, unsigned flags)206{207if (!(flags & print_no_ssa))208print_reg_class(definition->regClass(), output);209if (definition->isPrecise())210fprintf(output, "(precise)");211if (definition->isNUW())212fprintf(output, "(nuw)");213if (definition->isNoCSE())214fprintf(output, "(noCSE)");215if ((flags & print_kill) && definition->isKill())216fprintf(output, "(kill)");217if (!(flags & print_no_ssa))218fprintf(output, "%%%d%s", definition->tempId(), definition->isFixed() ? ":" : "");219220if (definition->isFixed())221print_physReg(definition->physReg(), definition->bytes(), output, flags);222}223224static void225print_storage(storage_class storage, FILE* output)226{227fprintf(output, " storage:");228int printed = 0;229if (storage & storage_buffer)230printed += fprintf(output, "%sbuffer", printed ? "," : "");231if (storage & storage_atomic_counter)232printed += fprintf(output, "%satomic_counter", printed ? "," : "");233if (storage & storage_image)234printed += fprintf(output, "%simage", printed ? "," : "");235if (storage & storage_shared)236printed += fprintf(output, "%sshared", printed ? "," : "");237if (storage & storage_vmem_output)238printed += fprintf(output, "%svmem_output", printed ? "," : "");239if (storage & storage_scratch)240printed += fprintf(output, "%sscratch", printed ? "," : "");241if (storage & storage_vgpr_spill)242printed += fprintf(output, "%svgpr_spill", printed ? "," : "");243}244245static void246print_semantics(memory_semantics sem, FILE* output)247{248fprintf(output, " semantics:");249int printed = 0;250if (sem & semantic_acquire)251printed += fprintf(output, "%sacquire", printed ? "," : "");252if (sem & semantic_release)253printed += fprintf(output, "%srelease", printed ? "," : "");254if (sem & semantic_volatile)255printed += fprintf(output, "%svolatile", printed ? "," : "");256if (sem & semantic_private)257printed += fprintf(output, "%sprivate", printed ? "," : "");258if (sem & semantic_can_reorder)259printed += fprintf(output, "%sreorder", printed ? "," : "");260if (sem & semantic_atomic)261printed += fprintf(output, "%satomic", printed ? "," : "");262if (sem & semantic_rmw)263printed += fprintf(output, "%srmw", printed ? "," : "");264}265266static void267print_scope(sync_scope scope, FILE* output, const char* prefix = "scope")268{269fprintf(output, " %s:", prefix);270switch (scope) {271case scope_invocation: fprintf(output, "invocation"); break;272case scope_subgroup: fprintf(output, "subgroup"); break;273case scope_workgroup: fprintf(output, "workgroup"); break;274case scope_queuefamily: fprintf(output, "queuefamily"); break;275case scope_device: fprintf(output, "device"); break;276}277}278279static void280print_sync(memory_sync_info sync, FILE* output)281{282print_storage(sync.storage, output);283print_semantics(sync.semantics, output);284print_scope(sync.scope, output);285}286287static void288print_instr_format_specific(const Instruction* instr, FILE* output)289{290switch (instr->format) {291case Format::SOPK: {292const SOPK_instruction& sopk = instr->sopk();293fprintf(output, " imm:%d", sopk.imm & 0x8000 ? (sopk.imm - 65536) : sopk.imm);294break;295}296case Format::SOPP: {297uint16_t imm = instr->sopp().imm;298switch (instr->opcode) {299case aco_opcode::s_waitcnt: {300/* we usually should check the chip class for vmcnt/lgkm, but301* insert_waitcnt() should fill it in regardless. */302unsigned vmcnt = (imm & 0xF) | ((imm & (0x3 << 14)) >> 10);303if (vmcnt != 63)304fprintf(output, " vmcnt(%d)", vmcnt);305if (((imm >> 4) & 0x7) < 0x7)306fprintf(output, " expcnt(%d)", (imm >> 4) & 0x7);307if (((imm >> 8) & 0x3F) < 0x3F)308fprintf(output, " lgkmcnt(%d)", (imm >> 8) & 0x3F);309break;310}311case aco_opcode::s_endpgm:312case aco_opcode::s_endpgm_saved:313case aco_opcode::s_endpgm_ordered_ps_done:314case aco_opcode::s_wakeup:315case aco_opcode::s_barrier:316case aco_opcode::s_icache_inv:317case aco_opcode::s_ttracedata:318case aco_opcode::s_set_gpr_idx_off: {319break;320}321case aco_opcode::s_sendmsg: {322unsigned id = imm & sendmsg_id_mask;323switch (id) {324case sendmsg_none: fprintf(output, " sendmsg(MSG_NONE)"); break;325case _sendmsg_gs:326fprintf(output, " sendmsg(gs%s%s, %u)", imm & 0x10 ? ", cut" : "",327imm & 0x20 ? ", emit" : "", imm >> 8);328break;329case _sendmsg_gs_done:330fprintf(output, " sendmsg(gs_done%s%s, %u)", imm & 0x10 ? ", cut" : "",331imm & 0x20 ? ", emit" : "", imm >> 8);332break;333case sendmsg_save_wave: fprintf(output, " sendmsg(save_wave)"); break;334case sendmsg_stall_wave_gen: fprintf(output, " sendmsg(stall_wave_gen)"); break;335case sendmsg_halt_waves: fprintf(output, " sendmsg(halt_waves)"); break;336case sendmsg_ordered_ps_done: fprintf(output, " sendmsg(ordered_ps_done)"); break;337case sendmsg_early_prim_dealloc: fprintf(output, " sendmsg(early_prim_dealloc)"); break;338case sendmsg_gs_alloc_req: fprintf(output, " sendmsg(gs_alloc_req)"); break;339}340break;341}342default: {343if (imm)344fprintf(output, " imm:%u", imm);345break;346}347}348if (instr->sopp().block != -1)349fprintf(output, " block:BB%d", instr->sopp().block);350break;351}352case Format::SMEM: {353const SMEM_instruction& smem = instr->smem();354if (smem.glc)355fprintf(output, " glc");356if (smem.dlc)357fprintf(output, " dlc");358if (smem.nv)359fprintf(output, " nv");360print_sync(smem.sync, output);361break;362}363case Format::VINTRP: {364const Interp_instruction& vintrp = instr->vintrp();365fprintf(output, " attr%d.%c", vintrp.attribute, "xyzw"[vintrp.component]);366break;367}368case Format::DS: {369const DS_instruction& ds = instr->ds();370if (ds.offset0)371fprintf(output, " offset0:%u", ds.offset0);372if (ds.offset1)373fprintf(output, " offset1:%u", ds.offset1);374if (ds.gds)375fprintf(output, " gds");376print_sync(ds.sync, output);377break;378}379case Format::MUBUF: {380const MUBUF_instruction& mubuf = instr->mubuf();381if (mubuf.offset)382fprintf(output, " offset:%u", mubuf.offset);383if (mubuf.offen)384fprintf(output, " offen");385if (mubuf.idxen)386fprintf(output, " idxen");387if (mubuf.addr64)388fprintf(output, " addr64");389if (mubuf.glc)390fprintf(output, " glc");391if (mubuf.dlc)392fprintf(output, " dlc");393if (mubuf.slc)394fprintf(output, " slc");395if (mubuf.tfe)396fprintf(output, " tfe");397if (mubuf.lds)398fprintf(output, " lds");399if (mubuf.disable_wqm)400fprintf(output, " disable_wqm");401print_sync(mubuf.sync, output);402break;403}404case Format::MIMG: {405const MIMG_instruction& mimg = instr->mimg();406unsigned identity_dmask =407!instr->definitions.empty() ? (1 << instr->definitions[0].size()) - 1 : 0xf;408if ((mimg.dmask & identity_dmask) != identity_dmask)409fprintf(output, " dmask:%s%s%s%s", mimg.dmask & 0x1 ? "x" : "",410mimg.dmask & 0x2 ? "y" : "", mimg.dmask & 0x4 ? "z" : "",411mimg.dmask & 0x8 ? "w" : "");412switch (mimg.dim) {413case ac_image_1d: fprintf(output, " 1d"); break;414case ac_image_2d: fprintf(output, " 2d"); break;415case ac_image_3d: fprintf(output, " 3d"); break;416case ac_image_cube: fprintf(output, " cube"); break;417case ac_image_1darray: fprintf(output, " 1darray"); break;418case ac_image_2darray: fprintf(output, " 2darray"); break;419case ac_image_2dmsaa: fprintf(output, " 2dmsaa"); break;420case ac_image_2darraymsaa: fprintf(output, " 2darraymsaa"); break;421}422if (mimg.unrm)423fprintf(output, " unrm");424if (mimg.glc)425fprintf(output, " glc");426if (mimg.dlc)427fprintf(output, " dlc");428if (mimg.slc)429fprintf(output, " slc");430if (mimg.tfe)431fprintf(output, " tfe");432if (mimg.da)433fprintf(output, " da");434if (mimg.lwe)435fprintf(output, " lwe");436if (mimg.r128 || mimg.a16)437fprintf(output, " r128/a16");438if (mimg.d16)439fprintf(output, " d16");440if (mimg.disable_wqm)441fprintf(output, " disable_wqm");442print_sync(mimg.sync, output);443break;444}445case Format::EXP: {446const Export_instruction& exp = instr->exp();447unsigned identity_mask = exp.compressed ? 0x5 : 0xf;448if ((exp.enabled_mask & identity_mask) != identity_mask)449fprintf(output, " en:%c%c%c%c", exp.enabled_mask & 0x1 ? 'r' : '*',450exp.enabled_mask & 0x2 ? 'g' : '*', exp.enabled_mask & 0x4 ? 'b' : '*',451exp.enabled_mask & 0x8 ? 'a' : '*');452if (exp.compressed)453fprintf(output, " compr");454if (exp.done)455fprintf(output, " done");456if (exp.valid_mask)457fprintf(output, " vm");458459if (exp.dest <= V_008DFC_SQ_EXP_MRT + 7)460fprintf(output, " mrt%d", exp.dest - V_008DFC_SQ_EXP_MRT);461else if (exp.dest == V_008DFC_SQ_EXP_MRTZ)462fprintf(output, " mrtz");463else if (exp.dest == V_008DFC_SQ_EXP_NULL)464fprintf(output, " null");465else if (exp.dest >= V_008DFC_SQ_EXP_POS && exp.dest <= V_008DFC_SQ_EXP_POS + 3)466fprintf(output, " pos%d", exp.dest - V_008DFC_SQ_EXP_POS);467else if (exp.dest >= V_008DFC_SQ_EXP_PARAM && exp.dest <= V_008DFC_SQ_EXP_PARAM + 31)468fprintf(output, " param%d", exp.dest - V_008DFC_SQ_EXP_PARAM);469break;470}471case Format::PSEUDO_BRANCH: {472const Pseudo_branch_instruction& branch = instr->branch();473/* Note: BB0 cannot be a branch target */474if (branch.target[0] != 0)475fprintf(output, " BB%d", branch.target[0]);476if (branch.target[1] != 0)477fprintf(output, ", BB%d", branch.target[1]);478break;479}480case Format::PSEUDO_REDUCTION: {481const Pseudo_reduction_instruction& reduce = instr->reduction();482fprintf(output, " op:%s", reduce_ops[reduce.reduce_op]);483if (reduce.cluster_size)484fprintf(output, " cluster_size:%u", reduce.cluster_size);485break;486}487case Format::PSEUDO_BARRIER: {488const Pseudo_barrier_instruction& barrier = instr->barrier();489print_sync(barrier.sync, output);490print_scope(barrier.exec_scope, output, "exec_scope");491break;492}493case Format::FLAT:494case Format::GLOBAL:495case Format::SCRATCH: {496const FLAT_instruction& flat = instr->flatlike();497if (flat.offset)498fprintf(output, " offset:%u", flat.offset);499if (flat.glc)500fprintf(output, " glc");501if (flat.dlc)502fprintf(output, " dlc");503if (flat.slc)504fprintf(output, " slc");505if (flat.lds)506fprintf(output, " lds");507if (flat.nv)508fprintf(output, " nv");509if (flat.disable_wqm)510fprintf(output, " disable_wqm");511print_sync(flat.sync, output);512break;513}514case Format::MTBUF: {515const MTBUF_instruction& mtbuf = instr->mtbuf();516fprintf(output, " dfmt:");517switch (mtbuf.dfmt) {518case V_008F0C_BUF_DATA_FORMAT_8: fprintf(output, "8"); break;519case V_008F0C_BUF_DATA_FORMAT_16: fprintf(output, "16"); break;520case V_008F0C_BUF_DATA_FORMAT_8_8: fprintf(output, "8_8"); break;521case V_008F0C_BUF_DATA_FORMAT_32: fprintf(output, "32"); break;522case V_008F0C_BUF_DATA_FORMAT_16_16: fprintf(output, "16_16"); break;523case V_008F0C_BUF_DATA_FORMAT_10_11_11: fprintf(output, "10_11_11"); break;524case V_008F0C_BUF_DATA_FORMAT_11_11_10: fprintf(output, "11_11_10"); break;525case V_008F0C_BUF_DATA_FORMAT_10_10_10_2: fprintf(output, "10_10_10_2"); break;526case V_008F0C_BUF_DATA_FORMAT_2_10_10_10: fprintf(output, "2_10_10_10"); break;527case V_008F0C_BUF_DATA_FORMAT_8_8_8_8: fprintf(output, "8_8_8_8"); break;528case V_008F0C_BUF_DATA_FORMAT_32_32: fprintf(output, "32_32"); break;529case V_008F0C_BUF_DATA_FORMAT_16_16_16_16: fprintf(output, "16_16_16_16"); break;530case V_008F0C_BUF_DATA_FORMAT_32_32_32: fprintf(output, "32_32_32"); break;531case V_008F0C_BUF_DATA_FORMAT_32_32_32_32: fprintf(output, "32_32_32_32"); break;532case V_008F0C_BUF_DATA_FORMAT_RESERVED_15: fprintf(output, "reserved15"); break;533}534fprintf(output, " nfmt:");535switch (mtbuf.nfmt) {536case V_008F0C_BUF_NUM_FORMAT_UNORM: fprintf(output, "unorm"); break;537case V_008F0C_BUF_NUM_FORMAT_SNORM: fprintf(output, "snorm"); break;538case V_008F0C_BUF_NUM_FORMAT_USCALED: fprintf(output, "uscaled"); break;539case V_008F0C_BUF_NUM_FORMAT_SSCALED: fprintf(output, "sscaled"); break;540case V_008F0C_BUF_NUM_FORMAT_UINT: fprintf(output, "uint"); break;541case V_008F0C_BUF_NUM_FORMAT_SINT: fprintf(output, "sint"); break;542case V_008F0C_BUF_NUM_FORMAT_SNORM_OGL: fprintf(output, "snorm"); break;543case V_008F0C_BUF_NUM_FORMAT_FLOAT: fprintf(output, "float"); break;544}545if (mtbuf.offset)546fprintf(output, " offset:%u", mtbuf.offset);547if (mtbuf.offen)548fprintf(output, " offen");549if (mtbuf.idxen)550fprintf(output, " idxen");551if (mtbuf.glc)552fprintf(output, " glc");553if (mtbuf.dlc)554fprintf(output, " dlc");555if (mtbuf.slc)556fprintf(output, " slc");557if (mtbuf.tfe)558fprintf(output, " tfe");559if (mtbuf.disable_wqm)560fprintf(output, " disable_wqm");561print_sync(mtbuf.sync, output);562break;563}564case Format::VOP3P: {565if (instr->vop3p().clamp)566fprintf(output, " clamp");567break;568}569default: {570break;571}572}573if (instr->isVOP3()) {574const VOP3_instruction& vop3 = instr->vop3();575switch (vop3.omod) {576case 1: fprintf(output, " *2"); break;577case 2: fprintf(output, " *4"); break;578case 3: fprintf(output, " *0.5"); break;579}580if (vop3.clamp)581fprintf(output, " clamp");582if (vop3.opsel & (1 << 3))583fprintf(output, " opsel_hi");584} else if (instr->isDPP()) {585const DPP_instruction& dpp = instr->dpp();586if (dpp.dpp_ctrl <= 0xff) {587fprintf(output, " quad_perm:[%d,%d,%d,%d]", dpp.dpp_ctrl & 0x3, (dpp.dpp_ctrl >> 2) & 0x3,588(dpp.dpp_ctrl >> 4) & 0x3, (dpp.dpp_ctrl >> 6) & 0x3);589} else if (dpp.dpp_ctrl >= 0x101 && dpp.dpp_ctrl <= 0x10f) {590fprintf(output, " row_shl:%d", dpp.dpp_ctrl & 0xf);591} else if (dpp.dpp_ctrl >= 0x111 && dpp.dpp_ctrl <= 0x11f) {592fprintf(output, " row_shr:%d", dpp.dpp_ctrl & 0xf);593} else if (dpp.dpp_ctrl >= 0x121 && dpp.dpp_ctrl <= 0x12f) {594fprintf(output, " row_ror:%d", dpp.dpp_ctrl & 0xf);595} else if (dpp.dpp_ctrl == dpp_wf_sl1) {596fprintf(output, " wave_shl:1");597} else if (dpp.dpp_ctrl == dpp_wf_rl1) {598fprintf(output, " wave_rol:1");599} else if (dpp.dpp_ctrl == dpp_wf_sr1) {600fprintf(output, " wave_shr:1");601} else if (dpp.dpp_ctrl == dpp_wf_rr1) {602fprintf(output, " wave_ror:1");603} else if (dpp.dpp_ctrl == dpp_row_mirror) {604fprintf(output, " row_mirror");605} else if (dpp.dpp_ctrl == dpp_row_half_mirror) {606fprintf(output, " row_half_mirror");607} else if (dpp.dpp_ctrl == dpp_row_bcast15) {608fprintf(output, " row_bcast:15");609} else if (dpp.dpp_ctrl == dpp_row_bcast31) {610fprintf(output, " row_bcast:31");611} else {612fprintf(output, " dpp_ctrl:0x%.3x", dpp.dpp_ctrl);613}614if (dpp.row_mask != 0xf)615fprintf(output, " row_mask:0x%.1x", dpp.row_mask);616if (dpp.bank_mask != 0xf)617fprintf(output, " bank_mask:0x%.1x", dpp.bank_mask);618if (dpp.bound_ctrl)619fprintf(output, " bound_ctrl:1");620} else if (instr->isSDWA()) {621const SDWA_instruction& sdwa = instr->sdwa();622switch (sdwa.omod) {623case 1: fprintf(output, " *2"); break;624case 2: fprintf(output, " *4"); break;625case 3: fprintf(output, " *0.5"); break;626}627if (sdwa.clamp)628fprintf(output, " clamp");629switch (sdwa.dst_sel & sdwa_asuint) {630case sdwa_udword: break;631case sdwa_ubyte0:632case sdwa_ubyte1:633case sdwa_ubyte2:634case sdwa_ubyte3:635fprintf(output, " dst_sel:%sbyte%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",636sdwa.dst_sel & sdwa_bytenum);637break;638case sdwa_uword0:639case sdwa_uword1:640fprintf(output, " dst_sel:%sword%u", sdwa.dst_sel & sdwa_sext ? "s" : "u",641sdwa.dst_sel & sdwa_wordnum);642break;643}644if (sdwa.dst_preserve)645fprintf(output, " dst_preserve");646}647}648649void650aco_print_instr(const Instruction* instr, FILE* output, unsigned flags)651{652if (!instr->definitions.empty()) {653for (unsigned i = 0; i < instr->definitions.size(); ++i) {654print_definition(&instr->definitions[i], output, flags);655if (i + 1 != instr->definitions.size())656fprintf(output, ", ");657}658fprintf(output, " = ");659}660fprintf(output, "%s", instr_info.name[(int)instr->opcode]);661if (instr->operands.size()) {662bool* const abs = (bool*)alloca(instr->operands.size() * sizeof(bool));663bool* const neg = (bool*)alloca(instr->operands.size() * sizeof(bool));664bool* const opsel = (bool*)alloca(instr->operands.size() * sizeof(bool));665uint8_t* const sel = (uint8_t*)alloca(instr->operands.size() * sizeof(uint8_t));666for (unsigned i = 0; i < instr->operands.size(); ++i) {667abs[i] = false;668neg[i] = false;669opsel[i] = false;670sel[i] = sdwa_udword;671}672if (instr->isVOP3()) {673const VOP3_instruction& vop3 = instr->vop3();674for (unsigned i = 0; i < 3; ++i) {675abs[i] = vop3.abs[i];676neg[i] = vop3.neg[i];677opsel[i] = vop3.opsel & (1 << i);678sel[i] = sdwa_udword;679}680} else if (instr->isDPP()) {681const DPP_instruction& dpp = instr->dpp();682for (unsigned i = 0; i < 2; ++i) {683abs[i] = dpp.abs[i];684neg[i] = dpp.neg[i];685opsel[i] = false;686sel[i] = sdwa_udword;687}688} else if (instr->isSDWA()) {689const SDWA_instruction& sdwa = instr->sdwa();690for (unsigned i = 0; i < 2; ++i) {691abs[i] = sdwa.abs[i];692neg[i] = sdwa.neg[i];693opsel[i] = false;694sel[i] = sdwa.sel[i];695}696}697for (unsigned i = 0; i < instr->operands.size(); ++i) {698if (i)699fprintf(output, ", ");700else701fprintf(output, " ");702703if (neg[i])704fprintf(output, "-");705if (abs[i])706fprintf(output, "|");707if (opsel[i])708fprintf(output, "hi(");709else if (sel[i] & sdwa_sext)710fprintf(output, "sext(");711aco_print_operand(&instr->operands[i], output, flags);712if (opsel[i] || (sel[i] & sdwa_sext))713fprintf(output, ")");714if (!(sel[i] & sdwa_isra)) {715if (sel[i] == sdwa_udword || sel[i] == sdwa_sdword) {716/* print nothing */717} else if (sel[i] & sdwa_isword) {718unsigned index = sel[i] & sdwa_wordnum;719fprintf(output, "[%u:%u]", index * 16, index * 16 + 15);720} else {721unsigned index = sel[i] & sdwa_bytenum;722fprintf(output, "[%u:%u]", index * 8, index * 8 + 7);723}724}725if (abs[i])726fprintf(output, "|");727728if (instr->isVOP3P()) {729const VOP3P_instruction& vop3 = instr->vop3p();730if ((vop3.opsel_lo & (1 << i)) || !(vop3.opsel_hi & (1 << i))) {731fprintf(output, ".%c%c", vop3.opsel_lo & (1 << i) ? 'y' : 'x',732vop3.opsel_hi & (1 << i) ? 'y' : 'x');733}734if (vop3.neg_lo[i] && vop3.neg_hi[i])735fprintf(output, "*[-1,-1]");736else if (vop3.neg_lo[i])737fprintf(output, "*[-1,1]");738else if (vop3.neg_hi[i])739fprintf(output, "*[1,-1]");740}741}742}743print_instr_format_specific(instr, output);744}745746static void747print_block_kind(uint16_t kind, FILE* output)748{749if (kind & block_kind_uniform)750fprintf(output, "uniform, ");751if (kind & block_kind_top_level)752fprintf(output, "top-level, ");753if (kind & block_kind_loop_preheader)754fprintf(output, "loop-preheader, ");755if (kind & block_kind_loop_header)756fprintf(output, "loop-header, ");757if (kind & block_kind_loop_exit)758fprintf(output, "loop-exit, ");759if (kind & block_kind_continue)760fprintf(output, "continue, ");761if (kind & block_kind_break)762fprintf(output, "break, ");763if (kind & block_kind_continue_or_break)764fprintf(output, "continue_or_break, ");765if (kind & block_kind_discard)766fprintf(output, "discard, ");767if (kind & block_kind_branch)768fprintf(output, "branch, ");769if (kind & block_kind_merge)770fprintf(output, "merge, ");771if (kind & block_kind_invert)772fprintf(output, "invert, ");773if (kind & block_kind_uses_discard_if)774fprintf(output, "discard_if, ");775if (kind & block_kind_needs_lowering)776fprintf(output, "needs_lowering, ");777if (kind & block_kind_uses_demote)778fprintf(output, "uses_demote, ");779if (kind & block_kind_export_end)780fprintf(output, "export_end, ");781}782783static void784print_stage(Stage stage, FILE* output)785{786fprintf(output, "ACO shader stage: ");787788if (stage == compute_cs)789fprintf(output, "compute_cs");790else if (stage == fragment_fs)791fprintf(output, "fragment_fs");792else if (stage == gs_copy_vs)793fprintf(output, "gs_copy_vs");794else if (stage == vertex_ls)795fprintf(output, "vertex_ls");796else if (stage == vertex_es)797fprintf(output, "vertex_es");798else if (stage == vertex_vs)799fprintf(output, "vertex_vs");800else if (stage == tess_control_hs)801fprintf(output, "tess_control_hs");802else if (stage == vertex_tess_control_hs)803fprintf(output, "vertex_tess_control_hs");804else if (stage == tess_eval_es)805fprintf(output, "tess_eval_es");806else if (stage == tess_eval_vs)807fprintf(output, "tess_eval_vs");808else if (stage == geometry_gs)809fprintf(output, "geometry_gs");810else if (stage == vertex_geometry_gs)811fprintf(output, "vertex_geometry_gs");812else if (stage == tess_eval_geometry_gs)813fprintf(output, "tess_eval_geometry_gs");814else if (stage == vertex_ngg)815fprintf(output, "vertex_ngg");816else if (stage == tess_eval_ngg)817fprintf(output, "tess_eval_ngg");818else if (stage == vertex_geometry_ngg)819fprintf(output, "vertex_geometry_ngg");820else if (stage == tess_eval_geometry_ngg)821fprintf(output, "tess_eval_geometry_ngg");822else823fprintf(output, "unknown");824825fprintf(output, "\n");826}827828void829aco_print_block(const Block* block, FILE* output, unsigned flags, const live& live_vars)830{831fprintf(output, "BB%d\n", block->index);832fprintf(output, "/* logical preds: ");833for (unsigned pred : block->logical_preds)834fprintf(output, "BB%d, ", pred);835fprintf(output, "/ linear preds: ");836for (unsigned pred : block->linear_preds)837fprintf(output, "BB%d, ", pred);838fprintf(output, "/ kind: ");839print_block_kind(block->kind, output);840fprintf(output, "*/\n");841842if (flags & print_live_vars) {843fprintf(output, "\tlive out:");844for (unsigned id : live_vars.live_out[block->index])845fprintf(output, " %%%d", id);846fprintf(output, "\n");847848RegisterDemand demand = block->register_demand;849fprintf(output, "\tdemand: %u vgpr, %u sgpr\n", demand.vgpr, demand.sgpr);850}851852unsigned index = 0;853for (auto const& instr : block->instructions) {854fprintf(output, "\t");855if (flags & print_live_vars) {856RegisterDemand demand = live_vars.register_demand[block->index][index];857fprintf(output, "(%3u vgpr, %3u sgpr) ", demand.vgpr, demand.sgpr);858}859if (flags & print_perf_info)860fprintf(output, "(%3u clk) ", instr->pass_flags);861862aco_print_instr(instr.get(), output, flags);863fprintf(output, "\n");864index++;865}866}867868void869aco_print_program(const Program* program, FILE* output, const live& live_vars, unsigned flags)870{871switch (program->progress) {872case CompilationProgress::after_isel: fprintf(output, "After Instruction Selection:\n"); break;873case CompilationProgress::after_spilling:874fprintf(output, "After Spilling:\n");875flags |= print_kill;876break;877case CompilationProgress::after_ra: fprintf(output, "After RA:\n"); break;878}879880print_stage(program->stage, output);881882for (Block const& block : program->blocks)883aco_print_block(&block, output, flags, live_vars);884885if (program->constant_data.size()) {886fprintf(output, "\n/* constant data */\n");887for (unsigned i = 0; i < program->constant_data.size(); i += 32) {888fprintf(output, "[%06d] ", i);889unsigned line_size = std::min<size_t>(program->constant_data.size() - i, 32);890for (unsigned j = 0; j < line_size; j += 4) {891unsigned size = std::min<size_t>(program->constant_data.size() - (i + j), 4);892uint32_t v = 0;893memcpy(&v, &program->constant_data[i + j], size);894fprintf(output, " %08x", v);895}896fprintf(output, "\n");897}898}899900fprintf(output, "\n");901}902903void904aco_print_program(const Program* program, FILE* output, unsigned flags)905{906aco_print_program(program, output, live(), flags);907}908909} // namespace aco910911912