Path: blob/21.2-virgl/src/freedreno/ir3/ir3_ra_validate.c
4565 views
/*1* Copyright (C) 2021 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223#include "util/ralloc.h"24#include "ir3_ra.h"25#include "ir3_shader.h"2627/* This file implements a validation pass for register allocation. We check28* that the assignment of SSA values to registers is "valid", in the sense29* that each original definition reaches all of its uses without being30* clobbered by something else.31*32* The validation is a forward dataflow analysis. The state at each point33* consists of, for each physical register, the SSA value occupying it, or a34* few special values:35*36* - "unknown" is set initially, before the dataflow analysis assigns it a37* value. This is the lattice bottom.38* - Values at the start get "undef", which acts like a special SSA value that39* indicates it is never written.40* - "overdefined" registers are set to more than one value, depending on41* which path you take to get to the spot. This is the lattice top.42*43* Overdefined is necessary to distinguish because in some programs, like this44* simple example, it's perfectly normal and allowed:45*46* if (...) {47* mov.u32u32 ssa_1(r1.x), ...48* ...49* } else {50* mov.u32u32 ssa_2(r1.x), ...51* ...52* }53* // r1.x is overdefined here!54*55* However, if an ssa value after the if is accidentally assigned to r1.x, we56* need to remember that it's invalid to catch the mistake. Overdef has to be57* distinguished from undef so that the state forms a valid lattice to58* guarantee that the analysis always terminates. We could avoid relying on59* overdef by using liveness analysis, but not relying on liveness has the60* benefit that we can catch bugs in liveness analysis too.61*62* One tricky thing we have to handle is the coalescing of splits/collects,63* which means that multiple SSA values can occupy a register at the same64* time. While we could use the same merge set indices that RA uses, again65* that would rely on the merge set calculation being correct which we don't66* want to. Instead we treat splits/collects as transfer instructions, similar67* to the parallelcopy instructions inserted by RA, and have them copy their68* sources to their destinations. This means that each physreg must carry the69* SSA def assigned to it plus an offset into that definition, and when70* validating sources we must look through splits/collects to find the71* "original" source for each subregister.72*/7374#define UNKNOWN ((struct ir3_register *)NULL)75#define UNDEF ((struct ir3_register *)(uintptr_t)1)76#define OVERDEF ((struct ir3_register *)(uintptr_t)2)7778struct reg_state {79struct ir3_register *def;80unsigned offset;81};8283struct file_state {84struct reg_state regs[RA_MAX_FILE_SIZE];85};8687struct reaching_state {88struct file_state half, full, shared;89};9091struct ra_val_ctx {92struct ir3_instruction *current_instr;9394struct reaching_state reaching;95struct reaching_state *block_reaching;96unsigned block_count;9798unsigned full_size, half_size;99100bool merged_regs;101102bool failed;103};104105static void106validate_error(struct ra_val_ctx *ctx, const char *condstr)107{108fprintf(stderr, "ra validation fail: %s\n", condstr);109fprintf(stderr, " -> for instruction: ");110ir3_print_instr(ctx->current_instr);111abort();112}113114#define validate_assert(ctx, cond) \115do { \116if (!(cond)) { \117validate_error(ctx, #cond); \118} \119} while (0)120121static unsigned122get_file_size(struct ra_val_ctx *ctx, struct ir3_register *reg)123{124if (reg->flags & IR3_REG_SHARED)125return RA_SHARED_SIZE;126else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))127return ctx->full_size;128else129return ctx->half_size;130}131132/* Validate simple things, like the registers being in-bounds. This way we133* don't have to worry about out-of-bounds accesses later.134*/135136static void137validate_simple(struct ra_val_ctx *ctx, struct ir3_instruction *instr)138{139ctx->current_instr = instr;140ra_foreach_dst (dst, instr) {141unsigned dst_max = ra_reg_get_physreg(dst) + reg_size(dst);142validate_assert(ctx, dst_max <= get_file_size(ctx, dst));143if (dst->tied)144validate_assert(ctx, ra_reg_get_num(dst) == ra_reg_get_num(dst->tied));145}146147ra_foreach_src (src, instr) {148unsigned src_max = ra_reg_get_physreg(src) + reg_size(src);149validate_assert(ctx, src_max <= get_file_size(ctx, src));150}151}152153/* This is the lattice operator. */154static bool155merge_reg(struct reg_state *dst, const struct reg_state *src)156{157if (dst->def == UNKNOWN) {158*dst = *src;159return src->def != UNKNOWN;160} else if (dst->def == OVERDEF) {161return false;162} else {163if (src->def == UNKNOWN)164return false;165else if (src->def == OVERDEF) {166*dst = *src;167return true;168} else {169if (dst->def != src->def || dst->offset != src->offset) {170dst->def = OVERDEF;171dst->offset = 0;172return true;173} else {174return false;175}176}177}178}179180static bool181merge_file(struct file_state *dst, const struct file_state *src, unsigned size)182{183bool progress = false;184for (unsigned i = 0; i < size; i++)185progress |= merge_reg(&dst->regs[i], &src->regs[i]);186return progress;187}188189static bool190merge_state(struct ra_val_ctx *ctx, struct reaching_state *dst,191const struct reaching_state *src)192{193bool progress = false;194progress |= merge_file(&dst->full, &src->full, ctx->full_size);195progress |= merge_file(&dst->half, &src->half, ctx->half_size);196return progress;197}198199static bool200merge_state_physical(struct ra_val_ctx *ctx, struct reaching_state *dst,201const struct reaching_state *src)202{203return merge_file(&dst->shared, &src->shared, RA_SHARED_SIZE);204}205206static struct file_state *207ra_val_get_file(struct ra_val_ctx *ctx, struct ir3_register *reg)208{209if (reg->flags & IR3_REG_SHARED)210return &ctx->reaching.shared;211else if (ctx->merged_regs || !(reg->flags & IR3_REG_HALF))212return &ctx->reaching.full;213else214return &ctx->reaching.half;215}216217static void218propagate_normal_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)219{220ra_foreach_dst (dst, instr) {221struct file_state *file = ra_val_get_file(ctx, dst);222physreg_t physreg = ra_reg_get_physreg(dst);223for (unsigned i = 0; i < reg_size(dst); i++) {224file->regs[physreg + i] = (struct reg_state){225.def = dst,226.offset = i,227};228}229}230}231232static void233propagate_split(struct ra_val_ctx *ctx, struct ir3_instruction *split)234{235struct ir3_register *dst = split->dsts[0];236struct ir3_register *src = split->srcs[0];237physreg_t dst_physreg = ra_reg_get_physreg(dst);238physreg_t src_physreg = ra_reg_get_physreg(src);239struct file_state *file = ra_val_get_file(ctx, dst);240241unsigned offset = split->split.off * reg_elem_size(src);242for (unsigned i = 0; i < reg_elem_size(src); i++) {243file->regs[dst_physreg + i] = file->regs[src_physreg + offset + i];244}245}246247static void248propagate_collect(struct ra_val_ctx *ctx, struct ir3_instruction *collect)249{250struct ir3_register *dst = collect->dsts[0];251physreg_t dst_physreg = ra_reg_get_physreg(dst);252struct file_state *file = ra_val_get_file(ctx, dst);253254unsigned size = reg_size(dst);255struct reg_state srcs[size];256257for (unsigned i = 0; i < collect->srcs_count; i++) {258struct ir3_register *src = collect->srcs[i];259unsigned dst_offset = i * reg_elem_size(dst);260for (unsigned j = 0; j < reg_elem_size(dst); j++) {261if (!ra_reg_is_src(src)) {262srcs[dst_offset + j] = (struct reg_state){263.def = dst,264.offset = dst_offset + j,265};266} else {267physreg_t src_physreg = ra_reg_get_physreg(src);268srcs[dst_offset + j] = file->regs[src_physreg + j];269}270}271}272273for (unsigned i = 0; i < size; i++)274file->regs[dst_physreg + i] = srcs[i];275}276277static void278propagate_parallelcopy(struct ra_val_ctx *ctx, struct ir3_instruction *pcopy)279{280unsigned size = 0;281for (unsigned i = 0; i < pcopy->dsts_count; i++) {282size += reg_size(pcopy->srcs[i]);283}284285struct reg_state srcs[size];286287unsigned offset = 0;288for (unsigned i = 0; i < pcopy->srcs_count; i++) {289struct ir3_register *dst = pcopy->dsts[i];290struct ir3_register *src = pcopy->srcs[i];291struct file_state *file = ra_val_get_file(ctx, dst);292293for (unsigned j = 0; j < reg_size(dst); j++) {294if (src->flags & (IR3_REG_IMMED | IR3_REG_CONST)) {295srcs[offset + j] = (struct reg_state){296.def = dst,297.offset = j,298};299} else {300physreg_t src_physreg = ra_reg_get_physreg(src);301srcs[offset + j] = file->regs[src_physreg + j];302}303}304305offset += reg_size(dst);306}307assert(offset == size);308309offset = 0;310for (unsigned i = 0; i < pcopy->dsts_count; i++) {311struct ir3_register *dst = pcopy->dsts[i];312physreg_t dst_physreg = ra_reg_get_physreg(dst);313struct file_state *file = ra_val_get_file(ctx, dst);314315for (unsigned j = 0; j < reg_size(dst); j++)316file->regs[dst_physreg + j] = srcs[offset + j];317318offset += reg_size(dst);319}320assert(offset == size);321}322323static void324propagate_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)325{326if (instr->opc == OPC_META_SPLIT)327propagate_split(ctx, instr);328else if (instr->opc == OPC_META_COLLECT)329propagate_collect(ctx, instr);330else if (instr->opc == OPC_META_PARALLEL_COPY)331propagate_parallelcopy(ctx, instr);332else333propagate_normal_instr(ctx, instr);334}335336static bool337propagate_block(struct ra_val_ctx *ctx, struct ir3_block *block)338{339ctx->reaching = ctx->block_reaching[block->index];340341foreach_instr (instr, &block->instr_list) {342propagate_instr(ctx, instr);343}344345bool progress = false;346for (unsigned i = 0; i < 2; i++) {347struct ir3_block *succ = block->successors[i];348if (!succ)349continue;350progress |=351merge_state(ctx, &ctx->block_reaching[succ->index], &ctx->reaching);352}353for (unsigned i = 0; i < 2; i++) {354struct ir3_block *succ = block->physical_successors[i];355if (!succ)356continue;357progress |= merge_state_physical(ctx, &ctx->block_reaching[succ->index],358&ctx->reaching);359}360return progress;361}362363static void364chase_definition(struct reg_state *state)365{366while (true) {367struct ir3_instruction *instr = state->def->instr;368switch (instr->opc) {369case OPC_META_SPLIT: {370struct ir3_register *new_def = instr->srcs[0]->def;371unsigned offset = instr->split.off * reg_elem_size(new_def);372*state = (struct reg_state){373.def = new_def,374.offset = state->offset + offset,375};376break;377}378case OPC_META_COLLECT: {379unsigned src_idx = state->offset / reg_elem_size(state->def);380unsigned src_offset = state->offset % reg_elem_size(state->def);381struct ir3_register *new_def = instr->srcs[src_idx]->def;382if (new_def) {383*state = (struct reg_state){384.def = new_def,385.offset = src_offset,386};387} else {388/* Bail on immed/const */389return;390}391break;392}393case OPC_META_PARALLEL_COPY: {394unsigned dst_idx = ~0;395for (unsigned i = 0; i < instr->dsts_count; i++) {396if (instr->dsts[i] == state->def) {397dst_idx = i;398break;399}400}401assert(dst_idx != ~0);402403struct ir3_register *new_def = instr->srcs[dst_idx]->def;404if (new_def) {405state->def = new_def;406} else {407/* Bail on immed/const */408return;409}410break;411}412default:413return;414}415}416}417418static void419dump_reg_state(struct reg_state *state)420{421if (state->def == UNDEF) {422fprintf(stderr, "no reaching definition");423} else if (state->def == OVERDEF) {424fprintf(stderr,425"more than one reaching definition or partial definition");426} else {427/* The analysis should always remove UNKNOWN eventually. */428assert(state->def != UNKNOWN);429430fprintf(stderr, "ssa_%u:%u(%sr%u.%c) + %u", state->def->instr->serialno,431state->def->name, (state->def->flags & IR3_REG_HALF) ? "h" : "",432state->def->num / 4, "xyzw"[state->def->num % 4],433state -> offset);434}435}436437static void438check_reaching_src(struct ra_val_ctx *ctx, struct ir3_instruction *instr,439struct ir3_register *src)440{441struct file_state *file = ra_val_get_file(ctx, src);442physreg_t physreg = ra_reg_get_physreg(src);443for (unsigned i = 0; i < reg_size(src); i++) {444struct reg_state expected = (struct reg_state){445.def = src->def,446.offset = i,447};448chase_definition(&expected);449450struct reg_state actual = file->regs[physreg + i];451452if (expected.def != actual.def || expected.offset != actual.offset) {453fprintf(454stderr,455"ra validation fail: wrong definition reaches source ssa_%u:%u + %u\n",456src->def->instr->serialno, src->def->name, i);457fprintf(stderr, "expected: ");458dump_reg_state(&expected);459fprintf(stderr, "\n");460fprintf(stderr, "actual: ");461dump_reg_state(&actual);462fprintf(stderr, "\n");463fprintf(stderr, "-> for instruction: ");464ir3_print_instr(instr);465ctx->failed = true;466}467}468}469470static void471check_reaching_instr(struct ra_val_ctx *ctx, struct ir3_instruction *instr)472{473if (instr->opc == OPC_META_SPLIT || instr->opc == OPC_META_COLLECT ||474instr->opc == OPC_META_PARALLEL_COPY || instr->opc == OPC_META_PHI) {475return;476}477478ra_foreach_src (src, instr) {479check_reaching_src(ctx, instr, src);480}481}482483static void484check_reaching_block(struct ra_val_ctx *ctx, struct ir3_block *block)485{486ctx->reaching = ctx->block_reaching[block->index];487488foreach_instr (instr, &block->instr_list) {489check_reaching_instr(ctx, instr);490propagate_instr(ctx, instr);491}492493for (unsigned i = 0; i < 2; i++) {494struct ir3_block *succ = block->successors[i];495if (!succ)496continue;497498unsigned pred_idx = ir3_block_get_pred_index(succ, block);499foreach_instr (instr, &succ->instr_list) {500if (instr->opc != OPC_META_PHI)501break;502if (instr->srcs[pred_idx]->def)503check_reaching_src(ctx, instr, instr->srcs[pred_idx]);504}505}506}507508static void509check_reaching_defs(struct ra_val_ctx *ctx, struct ir3 *ir)510{511ctx->block_reaching =512rzalloc_array(ctx, struct reaching_state, ctx->block_count);513514struct reaching_state *start = &ctx->block_reaching[0];515for (unsigned i = 0; i < ctx->full_size; i++)516start->full.regs[i].def = UNDEF;517for (unsigned i = 0; i < ctx->half_size; i++)518start->half.regs[i].def = UNDEF;519for (unsigned i = 0; i < RA_SHARED_SIZE; i++)520start->shared.regs[i].def = UNDEF;521522bool progress;523do {524progress = false;525foreach_block (block, &ir->block_list) {526progress |= propagate_block(ctx, block);527}528} while (progress);529530foreach_block (block, &ir->block_list) {531check_reaching_block(ctx, block);532}533534if (ctx->failed) {535fprintf(stderr, "failing shader:\n");536ir3_print(ir);537abort();538}539}540541void542ir3_ra_validate(struct ir3_shader_variant *v, unsigned full_size,543unsigned half_size, unsigned block_count)544{545#ifdef NDEBUG546#define VALIDATE 0547#else548#define VALIDATE 1549#endif550551if (!VALIDATE)552return;553554struct ra_val_ctx *ctx = rzalloc(NULL, struct ra_val_ctx);555ctx->merged_regs = v->mergedregs;556ctx->full_size = full_size;557ctx->half_size = half_size;558ctx->block_count = block_count;559560foreach_block (block, &v->ir->block_list) {561foreach_instr (instr, &block->instr_list) {562validate_simple(ctx, instr);563}564}565566check_reaching_defs(ctx, v->ir);567568ralloc_free(ctx);569}570571572