Path: blob/21.2-virgl/src/intel/compiler/brw_fs_live_variables.cpp
4550 views
/*1* Copyright © 2012 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22* Authors:23* Eric Anholt <[email protected]>24*25*/2627#include "brw_fs.h"28#include "brw_fs_live_variables.h"2930using namespace brw;3132#define MAX_INSTRUCTION (1 << 30)3334/** @file brw_fs_live_variables.cpp35*36* Support for calculating liveness information about virtual GRFs.37*38* This produces a live interval for each whole virtual GRF. We could39* choose to expose per-component live intervals for VGRFs of size > 1,40* but we currently do not. It is easier for the consumers of this41* information to work with whole VGRFs.42*43* However, we internally track use/def information at the per-GRF level for44* greater accuracy. Large VGRFs may be accessed piecemeal over many45* (possibly non-adjacent) instructions. In this case, examining a single46* instruction is insufficient to decide whether a whole VGRF is ultimately47* used or defined. Tracking individual components allows us to easily48* assemble this information.49*50* See Muchnick's Advanced Compiler Design and Implementation, section51* 14.1 (p444).52*/5354void55fs_live_variables::setup_one_read(struct block_data *bd,56int ip, const fs_reg ®)57{58int var = var_from_reg(reg);59assert(var < num_vars);6061start[var] = MIN2(start[var], ip);62end[var] = MAX2(end[var], ip);6364/* The use[] bitset marks when the block makes use of a variable (VGRF65* channel) without having completely defined that variable within the66* block.67*/68if (!BITSET_TEST(bd->def, var))69BITSET_SET(bd->use, var);70}7172void73fs_live_variables::setup_one_write(struct block_data *bd, fs_inst *inst,74int ip, const fs_reg ®)75{76int var = var_from_reg(reg);77assert(var < num_vars);7879start[var] = MIN2(start[var], ip);80end[var] = MAX2(end[var], ip);8182/* The def[] bitset marks when an initialization in a block completely83* screens off previous updates of that variable (VGRF channel).84*/85if (inst->dst.file == VGRF) {86if (!inst->is_partial_write() && !BITSET_TEST(bd->use, var))87BITSET_SET(bd->def, var);8889BITSET_SET(bd->defout, var);90}91}9293/**94* Sets up the use[] and def[] bitsets.95*96* The basic-block-level live variable analysis needs to know which97* variables get used before they're completely defined, and which98* variables are completely defined before they're used.99*100* These are tracked at the per-component level, rather than whole VGRFs.101*/102void103fs_live_variables::setup_def_use()104{105int ip = 0;106107foreach_block (block, cfg) {108assert(ip == block->start_ip);109if (block->num > 0)110assert(cfg->blocks[block->num - 1]->end_ip == ip - 1);111112struct block_data *bd = &block_data[block->num];113114foreach_inst_in_block(fs_inst, inst, block) {115/* Set use[] for this instruction */116for (unsigned int i = 0; i < inst->sources; i++) {117fs_reg reg = inst->src[i];118119if (reg.file != VGRF)120continue;121122for (unsigned j = 0; j < regs_read(inst, i); j++) {123setup_one_read(bd, ip, reg);124reg.offset += REG_SIZE;125}126}127128bd->flag_use[0] |= inst->flags_read(devinfo) & ~bd->flag_def[0];129130/* Set def[] for this instruction */131if (inst->dst.file == VGRF) {132fs_reg reg = inst->dst;133for (unsigned j = 0; j < regs_written(inst); j++) {134setup_one_write(bd, inst, ip, reg);135reg.offset += REG_SIZE;136}137}138139if (!inst->predicate && inst->exec_size >= 8)140bd->flag_def[0] |= inst->flags_written(devinfo) & ~bd->flag_use[0];141142ip++;143}144}145}146147/**148* The algorithm incrementally sets bits in liveout and livein,149* propagating it through control flow. It will eventually terminate150* because it only ever adds bits, and stops when no bits are added in151* a pass.152*/153void154fs_live_variables::compute_live_variables()155{156bool cont = true;157158while (cont) {159cont = false;160161foreach_block_reverse (block, cfg) {162struct block_data *bd = &block_data[block->num];163164/* Update liveout */165foreach_list_typed(bblock_link, child_link, link, &block->children) {166struct block_data *child_bd = &block_data[child_link->block->num];167168for (int i = 0; i < bitset_words; i++) {169BITSET_WORD new_liveout = (child_bd->livein[i] &170~bd->liveout[i]);171if (new_liveout) {172bd->liveout[i] |= new_liveout;173cont = true;174}175}176BITSET_WORD new_liveout = (child_bd->flag_livein[0] &177~bd->flag_liveout[0]);178if (new_liveout) {179bd->flag_liveout[0] |= new_liveout;180cont = true;181}182}183184/* Update livein */185for (int i = 0; i < bitset_words; i++) {186BITSET_WORD new_livein = (bd->use[i] |187(bd->liveout[i] &188~bd->def[i]));189if (new_livein & ~bd->livein[i]) {190bd->livein[i] |= new_livein;191cont = true;192}193}194BITSET_WORD new_livein = (bd->flag_use[0] |195(bd->flag_liveout[0] &196~bd->flag_def[0]));197if (new_livein & ~bd->flag_livein[0]) {198bd->flag_livein[0] |= new_livein;199cont = true;200}201}202}203204/* Propagate defin and defout down the CFG to calculate the union of live205* variables potentially defined along any possible control flow path.206*/207do {208cont = false;209210foreach_block (block, cfg) {211const struct block_data *bd = &block_data[block->num];212213foreach_list_typed(bblock_link, child_link, link, &block->children) {214struct block_data *child_bd = &block_data[child_link->block->num];215216for (int i = 0; i < bitset_words; i++) {217const BITSET_WORD new_def = bd->defout[i] & ~child_bd->defin[i];218child_bd->defin[i] |= new_def;219child_bd->defout[i] |= new_def;220cont |= new_def;221}222}223}224} while (cont);225}226227/**228* Extend the start/end ranges for each variable to account for the229* new information calculated from control flow.230*/231void232fs_live_variables::compute_start_end()233{234foreach_block (block, cfg) {235struct block_data *bd = &block_data[block->num];236237for (int w = 0; w < bitset_words; w++) {238BITSET_WORD livedefin = bd->livein[w] & bd->defin[w];239BITSET_WORD livedefout = bd->liveout[w] & bd->defout[w];240BITSET_WORD livedefinout = livedefin | livedefout;241while (livedefinout) {242unsigned b = u_bit_scan(&livedefinout);243unsigned i = w * BITSET_WORDBITS + b;244if (livedefin & (1u << b)) {245start[i] = MIN2(start[i], block->start_ip);246end[i] = MAX2(end[i], block->start_ip);247}248if (livedefout & (1u << b)) {249start[i] = MIN2(start[i], block->end_ip);250end[i] = MAX2(end[i], block->end_ip);251}252}253}254}255}256257fs_live_variables::fs_live_variables(const backend_shader *s)258: devinfo(s->devinfo), cfg(s->cfg)259{260mem_ctx = ralloc_context(NULL);261262num_vgrfs = s->alloc.count;263num_vars = 0;264var_from_vgrf = rzalloc_array(mem_ctx, int, num_vgrfs);265for (int i = 0; i < num_vgrfs; i++) {266var_from_vgrf[i] = num_vars;267num_vars += s->alloc.sizes[i];268}269270vgrf_from_var = rzalloc_array(mem_ctx, int, num_vars);271for (int i = 0; i < num_vgrfs; i++) {272for (unsigned j = 0; j < s->alloc.sizes[i]; j++) {273vgrf_from_var[var_from_vgrf[i] + j] = i;274}275}276277start = ralloc_array(mem_ctx, int, num_vars);278end = rzalloc_array(mem_ctx, int, num_vars);279for (int i = 0; i < num_vars; i++) {280start[i] = MAX_INSTRUCTION;281end[i] = -1;282}283284vgrf_start = ralloc_array(mem_ctx, int, num_vgrfs);285vgrf_end = ralloc_array(mem_ctx, int, num_vgrfs);286for (int i = 0; i < num_vgrfs; i++) {287vgrf_start[i] = MAX_INSTRUCTION;288vgrf_end[i] = -1;289}290291block_data = rzalloc_array(mem_ctx, struct block_data, cfg->num_blocks);292293bitset_words = BITSET_WORDS(num_vars);294for (int i = 0; i < cfg->num_blocks; i++) {295block_data[i].def = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);296block_data[i].use = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);297block_data[i].livein = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);298block_data[i].liveout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);299block_data[i].defin = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);300block_data[i].defout = rzalloc_array(mem_ctx, BITSET_WORD, bitset_words);301302block_data[i].flag_def[0] = 0;303block_data[i].flag_use[0] = 0;304block_data[i].flag_livein[0] = 0;305block_data[i].flag_liveout[0] = 0;306}307308setup_def_use();309compute_live_variables();310compute_start_end();311312/* Merge the per-component live ranges to whole VGRF live ranges. */313for (int i = 0; i < num_vars; i++) {314const unsigned vgrf = vgrf_from_var[i];315vgrf_start[vgrf] = MIN2(vgrf_start[vgrf], start[i]);316vgrf_end[vgrf] = MAX2(vgrf_end[vgrf], end[i]);317}318}319320fs_live_variables::~fs_live_variables()321{322ralloc_free(mem_ctx);323}324325static bool326check_register_live_range(const fs_live_variables *live, int ip,327const fs_reg ®, unsigned n)328{329const unsigned var = live->var_from_reg(reg);330331if (var + n > unsigned(live->num_vars) ||332live->vgrf_start[reg.nr] > ip || live->vgrf_end[reg.nr] < ip)333return false;334335for (unsigned j = 0; j < n; j++) {336if (live->start[var + j] > ip || live->end[var + j] < ip)337return false;338}339340return true;341}342343bool344fs_live_variables::validate(const backend_shader *s) const345{346int ip = 0;347348foreach_block_and_inst(block, fs_inst, inst, s->cfg) {349for (unsigned i = 0; i < inst->sources; i++) {350if (inst->src[i].file == VGRF &&351!check_register_live_range(this, ip,352inst->src[i], regs_read(inst, i)))353return false;354}355356if (inst->dst.file == VGRF &&357!check_register_live_range(this, ip, inst->dst, regs_written(inst)))358return false;359360ip++;361}362363return true;364}365366bool367fs_live_variables::vars_interfere(int a, int b) const368{369return !(end[b] <= start[a] ||370end[a] <= start[b]);371}372373bool374fs_live_variables::vgrfs_interfere(int a, int b) const375{376return !(vgrf_end[a] <= vgrf_start[b] ||377vgrf_end[b] <= vgrf_start[a]);378}379380381