Path: blob/21.2-virgl/src/freedreno/ir3/ir3_array_to_ssa.c
4565 views
/*1* Copyright (C) 2021 Valve Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,19* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE20* SOFTWARE.21*/2223/* This pass lowers array accesses to SSA.24*25* After this pass, instructions writing arrays implicitly read the contents of26* the array defined in instr->dsts[0]->def (possibly a phi node), perform the27* operation, and store to instr->dsts[0].28*29* This makes arrays appear like "normal" SSA values, even if the false30* dependencies mean that they always stay in CSSA form (i.e. able to removed31* out-of-SSA with no copies.) While hopefully they shouldn't induce copies in32* most cases, we can't make that guarantee while also splitting spilling from33* RA and guaranteeing a certain number of registers are used, so we have to34* insert the phi nodes to be able to know when copying should happen.35*36* The implementation is based on the idea in "Simple and Efficient Construction37* of Static Single Assignment Form" of scanning backwards to find the38* definition. However, since we're not doing this on-the-fly we can simplify39* things a little by doing a pre-pass to get the last definition of each array40* in each block. Then we optimize trivial phis in a separate pass, "on the fly"41* so that we don't have to rewrite (and keep track of) users.42*/4344#include <stdlib.h>45#include "ir3.h"4647struct array_state {48struct ir3_register *live_in_definition;49struct ir3_register *live_out_definition;50bool constructed;51bool optimized;52};5354struct array_ctx {55struct array_state *states;56struct ir3 *ir;57unsigned array_count;58};5960static struct array_state *61get_state(struct array_ctx *ctx, struct ir3_block *block, unsigned id)62{63return &ctx->states[ctx->array_count * block->index + id];64}6566static struct ir3_register *read_value_beginning(struct array_ctx *ctx,67struct ir3_block *block,68struct ir3_array *arr);6970static struct ir3_register *71read_value_end(struct array_ctx *ctx, struct ir3_block *block,72struct ir3_array *arr)73{74struct array_state *state = get_state(ctx, block, arr->id);75if (state->live_out_definition)76return state->live_out_definition;7778state->live_out_definition = read_value_beginning(ctx, block, arr);79return state->live_out_definition;80}8182/* Roughly equivalent to readValueRecursive from the paper: */83static struct ir3_register *84read_value_beginning(struct array_ctx *ctx, struct ir3_block *block,85struct ir3_array *arr)86{87struct array_state *state = get_state(ctx, block, arr->id);8889if (state->constructed)90return state->live_in_definition;9192if (block->predecessors_count == 0) {93state->constructed = true;94return NULL;95}9697if (block->predecessors_count == 1) {98state->live_in_definition =99read_value_end(ctx, block->predecessors[0], arr);100state->constructed = true;101return state->live_in_definition;102}103104unsigned flags = IR3_REG_ARRAY | (arr->half ? IR3_REG_HALF : 0);105struct ir3_instruction *phi =106ir3_instr_create(block, OPC_META_PHI, 1, block->predecessors_count);107list_del(&phi->node);108list_add(&phi->node, &block->instr_list);109110struct ir3_register *dst = __ssa_dst(phi);111dst->flags |= flags;112dst->array.id = arr->id;113dst->size = arr->length;114115state->live_in_definition = phi->dsts[0];116state->constructed = true;117118for (unsigned i = 0; i < block->predecessors_count; i++) {119struct ir3_register *src =120read_value_end(ctx, block->predecessors[i], arr);121struct ir3_register *src_reg;122if (src) {123src_reg = __ssa_src(phi, src->instr, flags);124} else {125src_reg = ir3_src_create(phi, INVALID_REG, flags | IR3_REG_SSA);126}127src_reg->array.id = arr->id;128src_reg->size = arr->length;129}130return phi->dsts[0];131}132133static struct ir3_register *134remove_trivial_phi(struct ir3_instruction *phi)135{136/* Break cycles */137if (phi->data)138return phi->data;139140phi->data = phi->dsts[0];141142struct ir3_register *unique_def = NULL;143bool unique = true;144for (unsigned i = 0; i < phi->block->predecessors_count; i++) {145struct ir3_register *src = phi->srcs[i];146147/* If there are any undef sources, then the remaining sources may not148* dominate the phi node, even if they are all equal. So we need to149* bail out in this case.150*151* This seems to be a bug in the original paper.152*/153if (!src->def) {154unique = false;155break;156}157158struct ir3_instruction *src_instr = src->def->instr;159160/* phi sources which point to the phi itself don't count for161* figuring out if the phi is trivial162*/163if (src_instr == phi)164continue;165166if (src_instr->opc == OPC_META_PHI) {167src->def = remove_trivial_phi(src->def->instr);168}169170if (unique_def) {171if (unique_def != src->def) {172unique = false;173break;174}175} else {176unique_def = src->def;177}178}179180if (unique) {181phi->data = unique_def;182return unique_def;183} else {184return phi->dsts[0];185}186}187188static struct ir3_register *189lookup_value(struct ir3_register *reg)190{191if (reg->instr->opc == OPC_META_PHI)192return reg->instr->data;193return reg;194}195196static struct ir3_register *197lookup_live_in(struct array_ctx *ctx, struct ir3_block *block, unsigned id)198{199struct array_state *state = get_state(ctx, block, id);200if (state->live_in_definition)201return lookup_value(state->live_in_definition);202203return NULL;204}205206bool207ir3_array_to_ssa(struct ir3 *ir)208{209struct array_ctx ctx = {};210211foreach_array (array, &ir->array_list) {212ctx.array_count = MAX2(ctx.array_count, array->id + 1);213}214215if (ctx.array_count == 0)216return false;217218unsigned i = 0;219foreach_block (block, &ir->block_list) {220block->index = i++;221}222223ctx.ir = ir;224ctx.states = calloc(ctx.array_count * i, sizeof(struct array_state));225226foreach_block (block, &ir->block_list) {227foreach_instr (instr, &block->instr_list) {228foreach_dst (dst, instr) {229if (dst->flags & IR3_REG_ARRAY) {230struct array_state *state =231get_state(&ctx, block, dst->array.id);232state->live_out_definition = dst;233}234}235}236}237238foreach_block (block, &ir->block_list) {239foreach_instr (instr, &block->instr_list) {240if (instr->opc == OPC_META_PHI)241continue;242243foreach_dst (reg, instr) {244if ((reg->flags & IR3_REG_ARRAY) && !reg->tied) {245struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);246247/* Construct any phi nodes necessary to read this value */248read_value_beginning(&ctx, block, arr);249}250}251foreach_src (reg, instr) {252if ((reg->flags & IR3_REG_ARRAY) && !reg->def) {253struct ir3_array *arr = ir3_lookup_array(ir, reg->array.id);254255/* Construct any phi nodes necessary to read this value */256read_value_beginning(&ctx, block, arr);257}258}259}260}261262foreach_block (block, &ir->block_list) {263foreach_instr_safe (instr, &block->instr_list) {264if (instr->opc == OPC_META_PHI)265remove_trivial_phi(instr);266else267break;268}269}270271foreach_block (block, &ir->block_list) {272foreach_instr_safe (instr, &block->instr_list) {273if (instr->opc == OPC_META_PHI) {274if (!(instr->flags & IR3_REG_ARRAY))275continue;276if (instr->data != instr->dsts[0]) {277list_del(&instr->node);278continue;279}280for (unsigned i = 0; i < instr->srcs_count; i++) {281instr->srcs[i] = lookup_value(instr->srcs[i]);282}283} else {284foreach_dst (reg, instr) {285if ((reg->flags & IR3_REG_ARRAY)) {286if (!reg->tied) {287struct ir3_register *def =288lookup_live_in(&ctx, block, reg->array.id);289if (def)290ir3_reg_set_last_array(instr, reg, def);291}292reg->flags |= IR3_REG_SSA;293}294}295foreach_src (reg, instr) {296if ((reg->flags & IR3_REG_ARRAY)) {297/* It is assumed that before calling298* ir3_array_to_ssa(), reg->def was set to the299* previous writer of the array within the current300* block or NULL if none.301*/302if (!reg->def) {303reg->def = lookup_live_in(&ctx, block, reg->array.id);304}305reg->flags |= IR3_REG_SSA;306}307}308}309}310}311312free(ctx.states);313return true;314}315316317