Path: blob/21.2-virgl/src/amd/compiler/aco_validate.cpp
4550 views
/*1* Copyright © 2018 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22*/2324#include "aco_ir.h"2526#include "util/memstream.h"2728#include <array>29#include <map>30#include <set>31#include <vector>3233namespace aco {3435static void36aco_log(Program* program, enum radv_compiler_debug_level level, const char* prefix,37const char* file, unsigned line, const char* fmt, va_list args)38{39char* msg;4041if (program->debug.shorten_messages) {42msg = ralloc_vasprintf(NULL, fmt, args);43} else {44msg = ralloc_strdup(NULL, prefix);45ralloc_asprintf_append(&msg, " In file %s:%u\n", file, line);46ralloc_asprintf_append(&msg, " ");47ralloc_vasprintf_append(&msg, fmt, args);48}4950if (program->debug.func)51program->debug.func(program->debug.private_data, level, msg);5253fprintf(program->debug.output, "%s\n", msg);5455ralloc_free(msg);56}5758void59_aco_perfwarn(Program* program, const char* file, unsigned line, const char* fmt, ...)60{61va_list args;6263va_start(args, fmt);64aco_log(program, RADV_COMPILER_DEBUG_LEVEL_PERFWARN, "ACO PERFWARN:\n", file, line, fmt, args);65va_end(args);66}6768void69_aco_err(Program* program, const char* file, unsigned line, const char* fmt, ...)70{71va_list args;7273va_start(args, fmt);74aco_log(program, RADV_COMPILER_DEBUG_LEVEL_ERROR, "ACO ERROR:\n", file, line, fmt, args);75va_end(args);76}7778bool79validate_ir(Program* program)80{81bool is_valid = true;82auto check = [&program, &is_valid](bool success, const char* msg,83aco::Instruction* instr) -> void84{85if (!success) {86char* out;87size_t outsize;88struct u_memstream mem;89u_memstream_open(&mem, &out, &outsize);90FILE* const memf = u_memstream_get(&mem);9192fprintf(memf, "%s: ", msg);93aco_print_instr(instr, memf);94u_memstream_close(&mem);9596aco_err(program, "%s", out);97free(out);9899is_valid = false;100}101};102103auto check_block = [&program, &is_valid](bool success, const char* msg,104aco::Block* block) -> void105{106if (!success) {107aco_err(program, "%s: BB%u", msg, block->index);108is_valid = false;109}110};111112for (Block& block : program->blocks) {113for (aco_ptr<Instruction>& instr : block.instructions) {114115/* check base format */116Format base_format = instr->format;117base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::SDWA);118base_format = (Format)((uint32_t)base_format & ~(uint32_t)Format::DPP);119if ((uint32_t)base_format & (uint32_t)Format::VOP1)120base_format = Format::VOP1;121else if ((uint32_t)base_format & (uint32_t)Format::VOP2)122base_format = Format::VOP2;123else if ((uint32_t)base_format & (uint32_t)Format::VOPC)124base_format = Format::VOPC;125else if ((uint32_t)base_format & (uint32_t)Format::VINTRP) {126if (instr->opcode == aco_opcode::v_interp_p1ll_f16 ||127instr->opcode == aco_opcode::v_interp_p1lv_f16 ||128instr->opcode == aco_opcode::v_interp_p2_legacy_f16 ||129instr->opcode == aco_opcode::v_interp_p2_f16) {130/* v_interp_*_fp16 are considered VINTRP by the compiler but131* they are emitted as VOP3.132*/133base_format = Format::VOP3;134} else {135base_format = Format::VINTRP;136}137}138check(base_format == instr_info.format[(int)instr->opcode],139"Wrong base format for instruction", instr.get());140141/* check VOP3 modifiers */142if (instr->isVOP3() && instr->format != Format::VOP3) {143check(base_format == Format::VOP2 || base_format == Format::VOP1 ||144base_format == Format::VOPC || base_format == Format::VINTRP,145"Format cannot have VOP3/VOP3B applied", instr.get());146}147148/* check SDWA */149if (instr->isSDWA()) {150check(base_format == Format::VOP2 || base_format == Format::VOP1 ||151base_format == Format::VOPC,152"Format cannot have SDWA applied", instr.get());153154check(program->chip_class >= GFX8, "SDWA is GFX8+ only", instr.get());155156SDWA_instruction& sdwa = instr->sdwa();157check(sdwa.omod == 0 || program->chip_class >= GFX9,158"SDWA omod only supported on GFX9+", instr.get());159if (base_format == Format::VOPC) {160check(sdwa.clamp == false || program->chip_class == GFX8,161"SDWA VOPC clamp only supported on GFX8", instr.get());162check((instr->definitions[0].isFixed() && instr->definitions[0].physReg() == vcc) ||163program->chip_class >= GFX9,164"SDWA+VOPC definition must be fixed to vcc on GFX8", instr.get());165}166167if (instr->operands.size() >= 3) {168check(instr->operands[2].isFixed() && instr->operands[2].physReg() == vcc,169"3rd operand must be fixed to vcc with SDWA", instr.get());170}171if (instr->definitions.size() >= 2) {172check(instr->definitions[1].isFixed() && instr->definitions[1].physReg() == vcc,173"2nd definition must be fixed to vcc with SDWA", instr.get());174}175176const bool sdwa_opcodes =177instr->opcode != aco_opcode::v_fmac_f32 && instr->opcode != aco_opcode::v_fmac_f16 &&178instr->opcode != aco_opcode::v_fmamk_f32 &&179instr->opcode != aco_opcode::v_fmaak_f32 &&180instr->opcode != aco_opcode::v_fmamk_f16 &&181instr->opcode != aco_opcode::v_fmaak_f16 &&182instr->opcode != aco_opcode::v_madmk_f32 &&183instr->opcode != aco_opcode::v_madak_f32 &&184instr->opcode != aco_opcode::v_madmk_f16 &&185instr->opcode != aco_opcode::v_madak_f16 &&186instr->opcode != aco_opcode::v_readfirstlane_b32 &&187instr->opcode != aco_opcode::v_clrexcp && instr->opcode != aco_opcode::v_swap_b32;188189const bool feature_mac =190program->chip_class == GFX8 &&191(instr->opcode == aco_opcode::v_mac_f32 && instr->opcode == aco_opcode::v_mac_f16);192193check(sdwa_opcodes || feature_mac, "SDWA can't be used with this opcode", instr.get());194195if (instr->definitions[0].regClass().is_subdword())196check((sdwa.dst_sel & sdwa_asuint) == (sdwa_isra | instr->definitions[0].bytes()),197"Unexpected SDWA sel for sub-dword definition", instr.get());198}199200/* check opsel */201if (instr->isVOP3()) {202VOP3_instruction& vop3 = instr->vop3();203check(vop3.opsel == 0 || program->chip_class >= GFX9,204"Opsel is only supported on GFX9+", instr.get());205206for (unsigned i = 0; i < 3; i++) {207if (i >= instr->operands.size() ||208(instr->operands[i].hasRegClass() &&209instr->operands[i].regClass().is_subdword() && !instr->operands[i].isFixed()))210check((vop3.opsel & (1 << i)) == 0, "Unexpected opsel for operand", instr.get());211}212if (instr->definitions[0].regClass().is_subdword() && !instr->definitions[0].isFixed())213check((vop3.opsel & (1 << 3)) == 0, "Unexpected opsel for sub-dword definition",214instr.get());215}216217/* check for undefs */218for (unsigned i = 0; i < instr->operands.size(); i++) {219if (instr->operands[i].isUndefined()) {220bool flat = instr->isFlatLike();221bool can_be_undef = is_phi(instr) || instr->isEXP() || instr->isReduction() ||222instr->opcode == aco_opcode::p_create_vector ||223(flat && i == 1) || (instr->isMIMG() && (i == 1 || i == 2)) ||224((instr->isMUBUF() || instr->isMTBUF()) && i == 1);225check(can_be_undef, "Undefs can only be used in certain operands", instr.get());226} else {227check(instr->operands[i].isFixed() || instr->operands[i].isTemp() ||228instr->operands[i].isConstant(),229"Uninitialized Operand", instr.get());230}231}232233/* check subdword definitions */234for (unsigned i = 0; i < instr->definitions.size(); i++) {235if (instr->definitions[i].regClass().is_subdword())236check(instr->isPseudo() || instr->definitions[i].bytes() <= 4,237"Only Pseudo instructions can write subdword registers larger than 4 bytes",238instr.get());239}240241if (instr->isSALU() || instr->isVALU()) {242/* check literals */243Operand literal(s1);244for (unsigned i = 0; i < instr->operands.size(); i++) {245Operand op = instr->operands[i];246if (!op.isLiteral())247continue;248249check(!instr->isDPP() && !instr->isSDWA() &&250(!instr->isVOP3() || program->chip_class >= GFX10) &&251(!instr->isVOP3P() || program->chip_class >= GFX10),252"Literal applied on wrong instruction format", instr.get());253254check(literal.isUndefined() || (literal.size() == op.size() &&255literal.constantValue() == op.constantValue()),256"Only 1 Literal allowed", instr.get());257literal = op;258check(instr->isSALU() || instr->isVOP3() || instr->isVOP3P() || i == 0 || i == 2,259"Wrong source position for Literal argument", instr.get());260}261262/* check num sgprs for VALU */263if (instr->isVALU()) {264bool is_shift64 = instr->opcode == aco_opcode::v_lshlrev_b64 ||265instr->opcode == aco_opcode::v_lshrrev_b64 ||266instr->opcode == aco_opcode::v_ashrrev_i64;267unsigned const_bus_limit = 1;268if (program->chip_class >= GFX10 && !is_shift64)269const_bus_limit = 2;270271uint32_t scalar_mask = instr->isVOP3() || instr->isVOP3P() ? 0x7 : 0x5;272if (instr->isSDWA())273scalar_mask = program->chip_class >= GFX9 ? 0x7 : 0x4;274else if (instr->isDPP())275scalar_mask = 0x0;276277if (instr->isVOPC() || instr->opcode == aco_opcode::v_readfirstlane_b32 ||278instr->opcode == aco_opcode::v_readlane_b32 ||279instr->opcode == aco_opcode::v_readlane_b32_e64) {280check(instr->definitions[0].getTemp().type() == RegType::sgpr,281"Wrong Definition type for VALU instruction", instr.get());282} else {283check(instr->definitions[0].getTemp().type() == RegType::vgpr,284"Wrong Definition type for VALU instruction", instr.get());285}286287unsigned num_sgprs = 0;288unsigned sgpr[] = {0, 0};289for (unsigned i = 0; i < instr->operands.size(); i++) {290Operand op = instr->operands[i];291if (instr->opcode == aco_opcode::v_readfirstlane_b32 ||292instr->opcode == aco_opcode::v_readlane_b32 ||293instr->opcode == aco_opcode::v_readlane_b32_e64) {294check(i != 1 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||295op.isConstant(),296"Must be a SGPR or a constant", instr.get());297check(i == 1 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&298op.bytes() <= 4),299"Wrong Operand type for VALU instruction", instr.get());300continue;301}302if (instr->opcode == aco_opcode::v_permlane16_b32 ||303instr->opcode == aco_opcode::v_permlanex16_b32) {304check(i != 0 || (op.isTemp() && op.regClass().type() == RegType::vgpr),305"Operand 0 of v_permlane must be VGPR", instr.get());306check(i == 0 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||307op.isConstant(),308"Lane select operands of v_permlane must be SGPR or constant",309instr.get());310}311312if (instr->opcode == aco_opcode::v_writelane_b32 ||313instr->opcode == aco_opcode::v_writelane_b32_e64) {314check(i != 2 || (op.isTemp() && op.regClass().type() == RegType::vgpr &&315op.bytes() <= 4),316"Wrong Operand type for VALU instruction", instr.get());317check(i == 2 || (op.isTemp() && op.regClass().type() == RegType::sgpr) ||318op.isConstant(),319"Must be a SGPR or a constant", instr.get());320continue;321}322if (op.isTemp() && instr->operands[i].regClass().type() == RegType::sgpr) {323check(scalar_mask & (1 << i), "Wrong source position for SGPR argument",324instr.get());325326if (op.tempId() != sgpr[0] && op.tempId() != sgpr[1]) {327if (num_sgprs < 2)328sgpr[num_sgprs++] = op.tempId();329}330}331332if (op.isConstant() && !op.isLiteral())333check(scalar_mask & (1 << i), "Wrong source position for constant argument",334instr.get());335}336check(num_sgprs + (literal.isUndefined() ? 0 : 1) <= const_bus_limit,337"Too many SGPRs/literals", instr.get());338}339340if (instr->isSOP1() || instr->isSOP2()) {341check(instr->definitions[0].getTemp().type() == RegType::sgpr,342"Wrong Definition type for SALU instruction", instr.get());343for (const Operand& op : instr->operands) {344check(op.isConstant() || op.regClass().type() <= RegType::sgpr,345"Wrong Operand type for SALU instruction", instr.get());346}347}348}349350switch (instr->format) {351case Format::PSEUDO: {352if (instr->opcode == aco_opcode::p_create_vector) {353unsigned size = 0;354for (const Operand& op : instr->operands) {355check(op.bytes() < 4 || size % 4 == 0, "Operand is not aligned", instr.get());356size += op.bytes();357}358check(size == instr->definitions[0].bytes(),359"Definition size does not match operand sizes", instr.get());360if (instr->definitions[0].getTemp().type() == RegType::sgpr) {361for (const Operand& op : instr->operands) {362check(op.isConstant() || op.regClass().type() == RegType::sgpr,363"Wrong Operand type for scalar vector", instr.get());364}365}366} else if (instr->opcode == aco_opcode::p_extract_vector) {367check((instr->operands[0].isTemp()) && instr->operands[1].isConstant(),368"Wrong Operand types", instr.get());369check((instr->operands[1].constantValue() + 1) * instr->definitions[0].bytes() <=370instr->operands[0].bytes(),371"Index out of range", instr.get());372check(instr->definitions[0].getTemp().type() == RegType::vgpr ||373instr->operands[0].regClass().type() == RegType::sgpr,374"Cannot extract SGPR value from VGPR vector", instr.get());375check(program->chip_class >= GFX9 ||376!instr->definitions[0].regClass().is_subdword() ||377instr->operands[0].regClass().type() == RegType::vgpr,378"Cannot extract subdword from SGPR before GFX9+", instr.get());379} else if (instr->opcode == aco_opcode::p_split_vector) {380check(instr->operands[0].isTemp(), "Operand must be a temporary", instr.get());381unsigned size = 0;382for (const Definition& def : instr->definitions) {383size += def.bytes();384}385check(size == instr->operands[0].bytes(),386"Operand size does not match definition sizes", instr.get());387if (instr->operands[0].getTemp().type() == RegType::vgpr) {388for (const Definition& def : instr->definitions)389check(def.regClass().type() == RegType::vgpr,390"Wrong Definition type for VGPR split_vector", instr.get());391} else {392for (const Definition& def : instr->definitions)393check(program->chip_class >= GFX9 || !def.regClass().is_subdword(),394"Cannot split SGPR into subdword VGPRs before GFX9+", instr.get());395}396} else if (instr->opcode == aco_opcode::p_parallelcopy) {397check(instr->definitions.size() == instr->operands.size(),398"Number of Operands does not match number of Definitions", instr.get());399for (unsigned i = 0; i < instr->operands.size(); i++) {400check(instr->definitions[i].bytes() == instr->operands[i].bytes(),401"Operand and Definition size must match", instr.get());402if (instr->operands[i].isTemp())403check((instr->definitions[i].getTemp().type() ==404instr->operands[i].regClass().type()) ||405(instr->definitions[i].getTemp().type() == RegType::vgpr &&406instr->operands[i].regClass().type() == RegType::sgpr),407"Operand and Definition types do not match", instr.get());408}409} else if (instr->opcode == aco_opcode::p_phi) {410check(instr->operands.size() == block.logical_preds.size(),411"Number of Operands does not match number of predecessors", instr.get());412check(instr->definitions[0].getTemp().type() == RegType::vgpr,413"Logical Phi Definition must be vgpr", instr.get());414for (const Operand& op : instr->operands)415check(instr->definitions[0].size() == op.size(),416"Operand sizes must match Definition size", instr.get());417} else if (instr->opcode == aco_opcode::p_linear_phi) {418for (const Operand& op : instr->operands) {419check(!op.isTemp() || op.getTemp().is_linear(), "Wrong Operand type",420instr.get());421check(instr->definitions[0].size() == op.size(),422"Operand sizes must match Definition size", instr.get());423}424check(instr->operands.size() == block.linear_preds.size(),425"Number of Operands does not match number of predecessors", instr.get());426} else if (instr->opcode == aco_opcode::p_extract ||427instr->opcode == aco_opcode::p_insert) {428check(instr->operands[0].isTemp(), "Data operand must be temporary", instr.get());429check(instr->operands[1].isConstant(), "Index must be constant", instr.get());430if (instr->opcode == aco_opcode::p_extract)431check(instr->operands[3].isConstant(), "Sign-extend flag must be constant",432instr.get());433434check(instr->definitions[0].getTemp().type() != RegType::sgpr ||435instr->operands[0].getTemp().type() == RegType::sgpr,436"Can't extract/insert VGPR to SGPR", instr.get());437438if (instr->operands[0].getTemp().type() == RegType::vgpr)439check(instr->operands[0].bytes() == instr->definitions[0].bytes(),440"Sizes of operand and definition must match", instr.get());441442if (instr->definitions[0].getTemp().type() == RegType::sgpr)443check(instr->definitions.size() >= 2 && instr->definitions[1].isFixed() &&444instr->definitions[1].physReg() == scc,445"SGPR extract/insert needs a SCC definition", instr.get());446447check(instr->operands[2].constantEquals(8) || instr->operands[2].constantEquals(16),448"Size must be 8 or 16", instr.get());449check(instr->operands[2].constantValue() < instr->operands[0].getTemp().bytes() * 8u,450"Size must be smaller than source", instr.get());451452unsigned comp =453instr->operands[0].bytes() * 8u / MAX2(instr->operands[2].constantValue(), 1);454check(instr->operands[1].constantValue() < comp, "Index must be in-bounds",455instr.get());456}457break;458}459case Format::PSEUDO_REDUCTION: {460for (const Operand& op : instr->operands)461check(op.regClass().type() == RegType::vgpr,462"All operands of PSEUDO_REDUCTION instructions must be in VGPRs.",463instr.get());464465if (instr->opcode == aco_opcode::p_reduce &&466instr->reduction().cluster_size == program->wave_size)467check(instr->definitions[0].regClass().type() == RegType::sgpr ||468program->wave_size == 32,469"The result of unclustered reductions must go into an SGPR.", instr.get());470else471check(instr->definitions[0].regClass().type() == RegType::vgpr,472"The result of scans and clustered reductions must go into a VGPR.",473instr.get());474475break;476}477case Format::SMEM: {478if (instr->operands.size() >= 1)479check((instr->operands[0].isFixed() && !instr->operands[0].isConstant()) ||480(instr->operands[0].isTemp() &&481instr->operands[0].regClass().type() == RegType::sgpr),482"SMEM operands must be sgpr", instr.get());483if (instr->operands.size() >= 2)484check(instr->operands[1].isConstant() ||485(instr->operands[1].isTemp() &&486instr->operands[1].regClass().type() == RegType::sgpr),487"SMEM offset must be constant or sgpr", instr.get());488if (!instr->definitions.empty())489check(instr->definitions[0].getTemp().type() == RegType::sgpr,490"SMEM result must be sgpr", instr.get());491break;492}493case Format::MTBUF:494case Format::MUBUF: {495check(instr->operands.size() > 1, "VMEM instructions must have at least one operand",496instr.get());497check(instr->operands[1].hasRegClass() &&498instr->operands[1].regClass().type() == RegType::vgpr,499"VADDR must be in vgpr for VMEM instructions", instr.get());500check(501instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::sgpr,502"VMEM resource constant must be sgpr", instr.get());503check(instr->operands.size() < 4 ||504(instr->operands[3].isTemp() &&505instr->operands[3].regClass().type() == RegType::vgpr),506"VMEM write data must be vgpr", instr.get());507break;508}509case Format::MIMG: {510check(instr->operands.size() >= 4, "MIMG instructions must have at least 4 operands",511instr.get());512check(instr->operands[0].hasRegClass() &&513(instr->operands[0].regClass() == s4 || instr->operands[0].regClass() == s8),514"MIMG operands[0] (resource constant) must be in 4 or 8 SGPRs", instr.get());515if (instr->operands[1].hasRegClass())516check(instr->operands[1].regClass() == s4,517"MIMG operands[1] (sampler constant) must be 4 SGPRs", instr.get());518if (!instr->operands[2].isUndefined()) {519bool is_cmpswap = instr->opcode == aco_opcode::image_atomic_cmpswap ||520instr->opcode == aco_opcode::image_atomic_fcmpswap;521check(instr->definitions.empty() ||522(instr->definitions[0].regClass() == instr->operands[2].regClass() ||523is_cmpswap),524"MIMG operands[2] (VDATA) must be the same as definitions[0] for atomics and "525"TFE/LWE loads",526instr.get());527}528check(instr->operands.size() == 4 || program->chip_class >= GFX10,529"NSA is only supported on GFX10+", instr.get());530for (unsigned i = 3; i < instr->operands.size(); i++) {531if (instr->operands.size() == 4) {532check(instr->operands[i].hasRegClass() &&533instr->operands[i].regClass().type() == RegType::vgpr,534"MIMG operands[3] (VADDR) must be VGPR", instr.get());535} else {536check(instr->operands[i].regClass() == v1, "MIMG VADDR must be v1 if NSA is used",537instr.get());538}539}540check(instr->definitions.empty() ||541(instr->definitions[0].isTemp() &&542instr->definitions[0].regClass().type() == RegType::vgpr),543"MIMG definitions[0] (VDATA) must be VGPR", instr.get());544break;545}546case Format::DS: {547for (const Operand& op : instr->operands) {548check((op.isTemp() && op.regClass().type() == RegType::vgpr) || op.physReg() == m0,549"Only VGPRs are valid DS instruction operands", instr.get());550}551if (!instr->definitions.empty())552check(instr->definitions[0].getTemp().type() == RegType::vgpr,553"DS instruction must return VGPR", instr.get());554break;555}556case Format::EXP: {557for (unsigned i = 0; i < 4; i++)558check(instr->operands[i].hasRegClass() &&559instr->operands[i].regClass().type() == RegType::vgpr,560"Only VGPRs are valid Export arguments", instr.get());561break;562}563case Format::FLAT:564check(instr->operands[1].isUndefined(), "Flat instructions don't support SADDR",565instr.get());566FALLTHROUGH;567case Format::GLOBAL:568case Format::SCRATCH: {569check(570instr->operands[0].isTemp() && instr->operands[0].regClass().type() == RegType::vgpr,571"FLAT/GLOBAL/SCRATCH address must be vgpr", instr.get());572check(instr->operands[1].hasRegClass() &&573instr->operands[1].regClass().type() == RegType::sgpr,574"FLAT/GLOBAL/SCRATCH sgpr address must be undefined or sgpr", instr.get());575if (!instr->definitions.empty())576check(instr->definitions[0].getTemp().type() == RegType::vgpr,577"FLAT/GLOBAL/SCRATCH result must be vgpr", instr.get());578else579check(instr->operands[2].regClass().type() == RegType::vgpr,580"FLAT/GLOBAL/SCRATCH data must be vgpr", instr.get());581break;582}583default: break;584}585}586}587588/* validate CFG */589for (unsigned i = 0; i < program->blocks.size(); i++) {590Block& block = program->blocks[i];591check_block(block.index == i, "block.index must match actual index", &block);592593/* predecessors/successors should be sorted */594for (unsigned j = 0; j + 1 < block.linear_preds.size(); j++)595check_block(block.linear_preds[j] < block.linear_preds[j + 1],596"linear predecessors must be sorted", &block);597for (unsigned j = 0; j + 1 < block.logical_preds.size(); j++)598check_block(block.logical_preds[j] < block.logical_preds[j + 1],599"logical predecessors must be sorted", &block);600for (unsigned j = 0; j + 1 < block.linear_succs.size(); j++)601check_block(block.linear_succs[j] < block.linear_succs[j + 1],602"linear successors must be sorted", &block);603for (unsigned j = 0; j + 1 < block.logical_succs.size(); j++)604check_block(block.logical_succs[j] < block.logical_succs[j + 1],605"logical successors must be sorted", &block);606607/* critical edges are not allowed */608if (block.linear_preds.size() > 1) {609for (unsigned pred : block.linear_preds)610check_block(program->blocks[pred].linear_succs.size() == 1,611"linear critical edges are not allowed", &program->blocks[pred]);612for (unsigned pred : block.logical_preds)613check_block(program->blocks[pred].logical_succs.size() == 1,614"logical critical edges are not allowed", &program->blocks[pred]);615}616}617618return is_valid;619}620621/* RA validation */622namespace {623624struct Location {625Location() : block(NULL), instr(NULL) {}626627Block* block;628Instruction* instr; // NULL if it's the block's live-in629};630631struct Assignment {632Location defloc;633Location firstloc;634PhysReg reg;635};636637bool638ra_fail(Program* program, Location loc, Location loc2, const char* fmt, ...)639{640va_list args;641va_start(args, fmt);642char msg[1024];643vsprintf(msg, fmt, args);644va_end(args);645646char* out;647size_t outsize;648struct u_memstream mem;649u_memstream_open(&mem, &out, &outsize);650FILE* const memf = u_memstream_get(&mem);651652fprintf(memf, "RA error found at instruction in BB%d:\n", loc.block->index);653if (loc.instr) {654aco_print_instr(loc.instr, memf);655fprintf(memf, "\n%s", msg);656} else {657fprintf(memf, "%s", msg);658}659if (loc2.block) {660fprintf(memf, " in BB%d:\n", loc2.block->index);661aco_print_instr(loc2.instr, memf);662}663fprintf(memf, "\n\n");664u_memstream_close(&mem);665666aco_err(program, "%s", out);667free(out);668669return true;670}671672bool673validate_subdword_operand(chip_class chip, const aco_ptr<Instruction>& instr, unsigned index)674{675Operand op = instr->operands[index];676unsigned byte = op.physReg().byte();677678if (instr->opcode == aco_opcode::p_as_uniform)679return byte == 0;680if (instr->isPseudo() && chip >= GFX8)681return true;682if (instr->isSDWA()) {683unsigned sel = instr->sdwa().sel[index] & sdwa_asuint;684return (sel & sdwa_isra) && (sel & sdwa_rasize) <= op.bytes();685}686if (byte == 2 && can_use_opsel(chip, instr->opcode, index, 1))687return true;688689switch (instr->opcode) {690case aco_opcode::v_cvt_f32_ubyte1:691if (byte == 1)692return true;693break;694case aco_opcode::v_cvt_f32_ubyte2:695if (byte == 2)696return true;697break;698case aco_opcode::v_cvt_f32_ubyte3:699if (byte == 3)700return true;701break;702case aco_opcode::ds_write_b8_d16_hi:703case aco_opcode::ds_write_b16_d16_hi:704if (byte == 2 && index == 1)705return true;706break;707case aco_opcode::buffer_store_byte_d16_hi:708case aco_opcode::buffer_store_short_d16_hi:709if (byte == 2 && index == 3)710return true;711break;712case aco_opcode::flat_store_byte_d16_hi:713case aco_opcode::flat_store_short_d16_hi:714case aco_opcode::scratch_store_byte_d16_hi:715case aco_opcode::scratch_store_short_d16_hi:716case aco_opcode::global_store_byte_d16_hi:717case aco_opcode::global_store_short_d16_hi:718if (byte == 2 && index == 2)719return true;720break;721default: break;722}723724return byte == 0;725}726727bool728validate_subdword_definition(chip_class chip, const aco_ptr<Instruction>& instr)729{730Definition def = instr->definitions[0];731unsigned byte = def.physReg().byte();732733if (instr->isPseudo() && chip >= GFX8)734return true;735if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))736return true;737if (byte == 2 && can_use_opsel(chip, instr->opcode, -1, 1))738return true;739740switch (instr->opcode) {741case aco_opcode::buffer_load_ubyte_d16_hi:742case aco_opcode::buffer_load_short_d16_hi:743case aco_opcode::flat_load_ubyte_d16_hi:744case aco_opcode::flat_load_short_d16_hi:745case aco_opcode::scratch_load_ubyte_d16_hi:746case aco_opcode::scratch_load_short_d16_hi:747case aco_opcode::global_load_ubyte_d16_hi:748case aco_opcode::global_load_short_d16_hi:749case aco_opcode::ds_read_u8_d16_hi:750case aco_opcode::ds_read_u16_d16_hi: return byte == 2;751default: break;752}753754return byte == 0;755}756757unsigned758get_subdword_bytes_written(Program* program, const aco_ptr<Instruction>& instr, unsigned index)759{760chip_class chip = program->chip_class;761Definition def = instr->definitions[index];762763if (instr->isPseudo())764return chip >= GFX8 ? def.bytes() : def.size() * 4u;765if (instr->isSDWA() && instr->sdwa().dst_sel == (sdwa_isra | def.bytes()))766return def.bytes();767768switch (instr->opcode) {769case aco_opcode::buffer_load_ubyte_d16:770case aco_opcode::buffer_load_short_d16:771case aco_opcode::flat_load_ubyte_d16:772case aco_opcode::flat_load_short_d16:773case aco_opcode::scratch_load_ubyte_d16:774case aco_opcode::scratch_load_short_d16:775case aco_opcode::global_load_ubyte_d16:776case aco_opcode::global_load_short_d16:777case aco_opcode::ds_read_u8_d16:778case aco_opcode::ds_read_u16_d16:779case aco_opcode::buffer_load_ubyte_d16_hi:780case aco_opcode::buffer_load_short_d16_hi:781case aco_opcode::flat_load_ubyte_d16_hi:782case aco_opcode::flat_load_short_d16_hi:783case aco_opcode::scratch_load_ubyte_d16_hi:784case aco_opcode::scratch_load_short_d16_hi:785case aco_opcode::global_load_ubyte_d16_hi:786case aco_opcode::global_load_short_d16_hi:787case aco_opcode::ds_read_u8_d16_hi:788case aco_opcode::ds_read_u16_d16_hi: return program->dev.sram_ecc_enabled ? 4 : 2;789case aco_opcode::v_mad_f16:790case aco_opcode::v_mad_u16:791case aco_opcode::v_mad_i16:792case aco_opcode::v_fma_f16:793case aco_opcode::v_div_fixup_f16:794case aco_opcode::v_interp_p2_f16:795if (chip >= GFX9)796return 2;797break;798default: break;799}800801return MAX2(chip >= GFX10 ? def.bytes() : 4,802instr_info.definition_size[(int)instr->opcode] / 8u);803}804805} /* end namespace */806807bool808validate_ra(Program* program)809{810if (!(debug_flags & DEBUG_VALIDATE_RA))811return false;812813bool err = false;814aco::live live_vars = aco::live_var_analysis(program);815std::vector<std::vector<Temp>> phi_sgpr_ops(program->blocks.size());816uint16_t sgpr_limit = get_addr_sgpr_from_waves(program, program->num_waves);817818std::map<unsigned, Assignment> assignments;819for (Block& block : program->blocks) {820Location loc;821loc.block = █822for (aco_ptr<Instruction>& instr : block.instructions) {823if (instr->opcode == aco_opcode::p_phi) {824for (unsigned i = 0; i < instr->operands.size(); i++) {825if (instr->operands[i].isTemp() &&826instr->operands[i].getTemp().type() == RegType::sgpr &&827instr->operands[i].isFirstKill())828phi_sgpr_ops[block.logical_preds[i]].emplace_back(instr->operands[i].getTemp());829}830}831832loc.instr = instr.get();833for (unsigned i = 0; i < instr->operands.size(); i++) {834Operand& op = instr->operands[i];835if (!op.isTemp())836continue;837if (!op.isFixed())838err |= ra_fail(program, loc, Location(), "Operand %d is not assigned a register", i);839if (assignments.count(op.tempId()) && assignments[op.tempId()].reg != op.physReg())840err |=841ra_fail(program, loc, assignments.at(op.tempId()).firstloc,842"Operand %d has an inconsistent register assignment with instruction", i);843if ((op.getTemp().type() == RegType::vgpr &&844op.physReg().reg_b + op.bytes() > (256 + program->config->num_vgprs) * 4) ||845(op.getTemp().type() == RegType::sgpr &&846op.physReg() + op.size() > program->config->num_sgprs &&847op.physReg() < sgpr_limit))848err |= ra_fail(program, loc, assignments.at(op.tempId()).firstloc,849"Operand %d has an out-of-bounds register assignment", i);850if (op.physReg() == vcc && !program->needs_vcc)851err |= ra_fail(program, loc, Location(),852"Operand %d fixed to vcc but needs_vcc=false", i);853if (op.regClass().is_subdword() &&854!validate_subdword_operand(program->chip_class, instr, i))855err |= ra_fail(program, loc, Location(), "Operand %d not aligned correctly", i);856if (!assignments[op.tempId()].firstloc.block)857assignments[op.tempId()].firstloc = loc;858if (!assignments[op.tempId()].defloc.block)859assignments[op.tempId()].reg = op.physReg();860}861862for (unsigned i = 0; i < instr->definitions.size(); i++) {863Definition& def = instr->definitions[i];864if (!def.isTemp())865continue;866if (!def.isFixed())867err |=868ra_fail(program, loc, Location(), "Definition %d is not assigned a register", i);869if (assignments[def.tempId()].defloc.block)870err |= ra_fail(program, loc, assignments.at(def.tempId()).defloc,871"Temporary %%%d also defined by instruction", def.tempId());872if ((def.getTemp().type() == RegType::vgpr &&873def.physReg().reg_b + def.bytes() > (256 + program->config->num_vgprs) * 4) ||874(def.getTemp().type() == RegType::sgpr &&875def.physReg() + def.size() > program->config->num_sgprs &&876def.physReg() < sgpr_limit))877err |= ra_fail(program, loc, assignments.at(def.tempId()).firstloc,878"Definition %d has an out-of-bounds register assignment", i);879if (def.physReg() == vcc && !program->needs_vcc)880err |= ra_fail(program, loc, Location(),881"Definition %d fixed to vcc but needs_vcc=false", i);882if (def.regClass().is_subdword() &&883!validate_subdword_definition(program->chip_class, instr))884err |= ra_fail(program, loc, Location(), "Definition %d not aligned correctly", i);885if (!assignments[def.tempId()].firstloc.block)886assignments[def.tempId()].firstloc = loc;887assignments[def.tempId()].defloc = loc;888assignments[def.tempId()].reg = def.physReg();889}890}891}892893for (Block& block : program->blocks) {894Location loc;895loc.block = █896897std::array<unsigned, 2048> regs; /* register file in bytes */898regs.fill(0);899900std::set<Temp> live;901for (unsigned id : live_vars.live_out[block.index])902live.insert(Temp(id, program->temp_rc[id]));903/* remove killed p_phi sgpr operands */904for (Temp tmp : phi_sgpr_ops[block.index])905live.erase(tmp);906907/* check live out */908for (Temp tmp : live) {909PhysReg reg = assignments.at(tmp.id()).reg;910for (unsigned i = 0; i < tmp.bytes(); i++) {911if (regs[reg.reg_b + i]) {912err |= ra_fail(program, loc, Location(),913"Assignment of element %d of %%%d already taken by %%%d in live-out",914i, tmp.id(), regs[reg.reg_b + i]);915}916regs[reg.reg_b + i] = tmp.id();917}918}919regs.fill(0);920921for (auto it = block.instructions.rbegin(); it != block.instructions.rend(); ++it) {922aco_ptr<Instruction>& instr = *it;923924/* check killed p_phi sgpr operands */925if (instr->opcode == aco_opcode::p_logical_end) {926for (Temp tmp : phi_sgpr_ops[block.index]) {927PhysReg reg = assignments.at(tmp.id()).reg;928for (unsigned i = 0; i < tmp.bytes(); i++) {929if (regs[reg.reg_b + i])930err |= ra_fail(931program, loc, Location(),932"Assignment of element %d of %%%d already taken by %%%d in live-out", i,933tmp.id(), regs[reg.reg_b + i]);934}935live.emplace(tmp);936}937}938939for (const Definition& def : instr->definitions) {940if (!def.isTemp())941continue;942live.erase(def.getTemp());943}944945/* don't count phi operands as live-in, since they are actually946* killed when they are copied at the predecessor */947if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {948for (const Operand& op : instr->operands) {949if (!op.isTemp())950continue;951live.insert(op.getTemp());952}953}954}955956for (Temp tmp : live) {957PhysReg reg = assignments.at(tmp.id()).reg;958for (unsigned i = 0; i < tmp.bytes(); i++)959regs[reg.reg_b + i] = tmp.id();960}961962for (aco_ptr<Instruction>& instr : block.instructions) {963loc.instr = instr.get();964965/* remove killed p_phi operands from regs */966if (instr->opcode == aco_opcode::p_logical_end) {967for (Temp tmp : phi_sgpr_ops[block.index]) {968PhysReg reg = assignments.at(tmp.id()).reg;969for (unsigned i = 0; i < tmp.bytes(); i++)970regs[reg.reg_b + i] = 0;971}972}973974if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {975for (const Operand& op : instr->operands) {976if (!op.isTemp())977continue;978if (op.isFirstKillBeforeDef()) {979for (unsigned j = 0; j < op.getTemp().bytes(); j++)980regs[op.physReg().reg_b + j] = 0;981}982}983}984985for (unsigned i = 0; i < instr->definitions.size(); i++) {986Definition& def = instr->definitions[i];987if (!def.isTemp())988continue;989Temp tmp = def.getTemp();990PhysReg reg = assignments.at(tmp.id()).reg;991for (unsigned j = 0; j < tmp.bytes(); j++) {992if (regs[reg.reg_b + j])993err |= ra_fail(994program, loc, assignments.at(regs[reg.reg_b + j]).defloc,995"Assignment of element %d of %%%d already taken by %%%d from instruction", i,996tmp.id(), regs[reg.reg_b + j]);997regs[reg.reg_b + j] = tmp.id();998}999if (def.regClass().is_subdword() && def.bytes() < 4) {1000unsigned written = get_subdword_bytes_written(program, instr, i);1001/* If written=4, the instruction still might write the upper half. In that case, it's1002* the lower half that isn't preserved */1003for (unsigned j = reg.byte() & ~(written - 1); j < written; j++) {1004unsigned written_reg = reg.reg() * 4u + j;1005if (regs[written_reg] && regs[written_reg] != def.tempId())1006err |= ra_fail(program, loc, assignments.at(regs[written_reg]).defloc,1007"Assignment of element %d of %%%d overwrites the full register "1008"taken by %%%d from instruction",1009i, tmp.id(), regs[written_reg]);1010}1011}1012}10131014for (const Definition& def : instr->definitions) {1015if (!def.isTemp())1016continue;1017if (def.isKill()) {1018for (unsigned j = 0; j < def.getTemp().bytes(); j++)1019regs[def.physReg().reg_b + j] = 0;1020}1021}10221023if (instr->opcode != aco_opcode::p_phi && instr->opcode != aco_opcode::p_linear_phi) {1024for (const Operand& op : instr->operands) {1025if (!op.isTemp())1026continue;1027if (op.isLateKill() && op.isFirstKill()) {1028for (unsigned j = 0; j < op.getTemp().bytes(); j++)1029regs[op.physReg().reg_b + j] = 0;1030}1031}1032}1033}1034}10351036return err;1037}1038} // namespace aco103910401041