Path: blob/21.2-virgl/src/intel/compiler/brw_cfg.cpp
4550 views
/*1* Copyright © 2012 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*22* Authors:23* Eric Anholt <[email protected]>24*25*/2627#include "brw_cfg.h"28#include "brw_shader.h"2930/** @file brw_cfg.cpp31*32* Walks the shader instructions generated and creates a set of basic33* blocks with successor/predecessor edges connecting them.34*/3536using namespace brw;3738static bblock_t *39pop_stack(exec_list *list)40{41bblock_link *link = (bblock_link *)list->get_tail();42bblock_t *block = link->block;43link->link.remove();4445return block;46}4748static exec_node *49link(void *mem_ctx, bblock_t *block, enum bblock_link_kind kind)50{51bblock_link *l = new(mem_ctx) bblock_link(block, kind);52return &l->link;53}5455void56push_stack(exec_list *list, void *mem_ctx, bblock_t *block)57{58/* The kind of the link is immaterial, but we need to provide one since59* this is (ab)using the edge data structure in order to implement a stack.60*/61list->push_tail(link(mem_ctx, block, bblock_link_logical));62}6364bblock_t::bblock_t(cfg_t *cfg) :65cfg(cfg), start_ip(0), end_ip(0), end_ip_delta(0), num(0)66{67instructions.make_empty();68parents.make_empty();69children.make_empty();70}7172void73bblock_t::add_successor(void *mem_ctx, bblock_t *successor,74enum bblock_link_kind kind)75{76successor->parents.push_tail(::link(mem_ctx, this, kind));77children.push_tail(::link(mem_ctx, successor, kind));78}7980bool81bblock_t::is_predecessor_of(const bblock_t *block,82enum bblock_link_kind kind) const83{84foreach_list_typed_safe (bblock_link, parent, link, &block->parents) {85if (parent->block == this && parent->kind <= kind) {86return true;87}88}8990return false;91}9293bool94bblock_t::is_successor_of(const bblock_t *block,95enum bblock_link_kind kind) const96{97foreach_list_typed_safe (bblock_link, child, link, &block->children) {98if (child->block == this && child->kind <= kind) {99return true;100}101}102103return false;104}105106static bool107ends_block(const backend_instruction *inst)108{109enum opcode op = inst->opcode;110111return op == BRW_OPCODE_IF ||112op == BRW_OPCODE_ELSE ||113op == BRW_OPCODE_CONTINUE ||114op == BRW_OPCODE_BREAK ||115op == BRW_OPCODE_DO ||116op == BRW_OPCODE_WHILE;117}118119static bool120starts_block(const backend_instruction *inst)121{122enum opcode op = inst->opcode;123124return op == BRW_OPCODE_DO ||125op == BRW_OPCODE_ENDIF;126}127128bool129bblock_t::can_combine_with(const bblock_t *that) const130{131if ((const bblock_t *)this->link.next != that)132return false;133134if (ends_block(this->end()) ||135starts_block(that->start()))136return false;137138return true;139}140141void142bblock_t::combine_with(bblock_t *that)143{144assert(this->can_combine_with(that));145foreach_list_typed (bblock_link, link, link, &that->parents) {146assert(link->block == this);147}148149this->end_ip = that->end_ip;150this->instructions.append_list(&that->instructions);151152this->cfg->remove_block(that);153}154155void156bblock_t::dump() const157{158const backend_shader *s = this->cfg->s;159160int ip = this->start_ip;161foreach_inst_in_block(backend_instruction, inst, this) {162fprintf(stderr, "%5d: ", ip);163s->dump_instruction(inst);164ip++;165}166}167168cfg_t::cfg_t(const backend_shader *s, exec_list *instructions) :169s(s)170{171mem_ctx = ralloc_context(NULL);172block_list.make_empty();173blocks = NULL;174num_blocks = 0;175176bblock_t *cur = NULL;177int ip = 0;178179bblock_t *entry = new_block();180bblock_t *cur_if = NULL; /**< BB ending with IF. */181bblock_t *cur_else = NULL; /**< BB ending with ELSE. */182bblock_t *cur_endif = NULL; /**< BB starting with ENDIF. */183bblock_t *cur_do = NULL; /**< BB starting with DO. */184bblock_t *cur_while = NULL; /**< BB immediately following WHILE. */185exec_list if_stack, else_stack, do_stack, while_stack;186bblock_t *next;187188set_next_block(&cur, entry, ip);189190foreach_in_list_safe(backend_instruction, inst, instructions) {191/* set_next_block wants the post-incremented ip */192ip++;193194inst->exec_node::remove();195196switch (inst->opcode) {197case BRW_OPCODE_IF:198cur->instructions.push_tail(inst);199200/* Push our information onto a stack so we can recover from201* nested ifs.202*/203push_stack(&if_stack, mem_ctx, cur_if);204push_stack(&else_stack, mem_ctx, cur_else);205206cur_if = cur;207cur_else = NULL;208cur_endif = NULL;209210/* Set up our immediately following block, full of "then"211* instructions.212*/213next = new_block();214cur_if->add_successor(mem_ctx, next, bblock_link_logical);215216set_next_block(&cur, next, ip);217break;218219case BRW_OPCODE_ELSE:220cur->instructions.push_tail(inst);221222cur_else = cur;223224next = new_block();225assert(cur_if != NULL);226cur_if->add_successor(mem_ctx, next, bblock_link_logical);227cur_else->add_successor(mem_ctx, next, bblock_link_physical);228229set_next_block(&cur, next, ip);230break;231232case BRW_OPCODE_ENDIF: {233if (cur->instructions.is_empty()) {234/* New block was just created; use it. */235cur_endif = cur;236} else {237cur_endif = new_block();238239cur->add_successor(mem_ctx, cur_endif, bblock_link_logical);240241set_next_block(&cur, cur_endif, ip - 1);242}243244cur->instructions.push_tail(inst);245246if (cur_else) {247cur_else->add_successor(mem_ctx, cur_endif, bblock_link_logical);248} else {249assert(cur_if != NULL);250cur_if->add_successor(mem_ctx, cur_endif, bblock_link_logical);251}252253assert(cur_if->end()->opcode == BRW_OPCODE_IF);254assert(!cur_else || cur_else->end()->opcode == BRW_OPCODE_ELSE);255256/* Pop the stack so we're in the previous if/else/endif */257cur_if = pop_stack(&if_stack);258cur_else = pop_stack(&else_stack);259break;260}261case BRW_OPCODE_DO:262/* Push our information onto a stack so we can recover from263* nested loops.264*/265push_stack(&do_stack, mem_ctx, cur_do);266push_stack(&while_stack, mem_ctx, cur_while);267268/* Set up the block just after the while. Don't know when exactly269* it will start, yet.270*/271cur_while = new_block();272273if (cur->instructions.is_empty()) {274/* New block was just created; use it. */275cur_do = cur;276} else {277cur_do = new_block();278279cur->add_successor(mem_ctx, cur_do, bblock_link_logical);280281set_next_block(&cur, cur_do, ip - 1);282}283284cur->instructions.push_tail(inst);285286/* Represent divergent execution of the loop as a pair of alternative287* edges coming out of the DO instruction: For any physical iteration288* of the loop a given logical thread can either start off enabled289* (which is represented as the "next" successor), or disabled (if it290* has reached a non-uniform exit of the loop during a previous291* iteration, which is represented as the "cur_while" successor).292*293* The disabled edge will be taken by the logical thread anytime we294* arrive at the DO instruction through a back-edge coming from a295* conditional exit of the loop where divergent control flow started.296*297* This guarantees that there is a control-flow path from any298* divergence point of the loop into the convergence point299* (immediately past the WHILE instruction) such that it overlaps the300* whole IP region of divergent control flow (potentially the whole301* loop) *and* doesn't imply the execution of any instructions part302* of the loop (since the corresponding execution mask bit will be303* disabled for a diverging thread).304*305* This way we make sure that any variables that are live throughout306* the region of divergence for an inactive logical thread are also307* considered to interfere with any other variables assigned by308* active logical threads within the same physical region of the309* program, since otherwise we would risk cross-channel data310* corruption.311*/312next = new_block();313cur->add_successor(mem_ctx, next, bblock_link_logical);314cur->add_successor(mem_ctx, cur_while, bblock_link_physical);315set_next_block(&cur, next, ip);316break;317318case BRW_OPCODE_CONTINUE:319cur->instructions.push_tail(inst);320321/* A conditional CONTINUE may start a region of divergent control322* flow until the start of the next loop iteration (*not* until the323* end of the loop which is why the successor is not the top-level324* divergence point at cur_do). The live interval of any variable325* extending through a CONTINUE edge is guaranteed to overlap the326* whole region of divergent execution, because any variable live-out327* at the CONTINUE instruction will also be live-in at the top of the328* loop, and therefore also live-out at the bottom-most point of the329* loop which is reachable from the top (since a control flow path330* exists from a definition of the variable through this CONTINUE331* instruction, the top of the loop, the (reachable) bottom of the332* loop, the top of the loop again, into a use of the variable).333*/334assert(cur_do != NULL);335cur->add_successor(mem_ctx, cur_do->next(), bblock_link_logical);336337next = new_block();338if (inst->predicate)339cur->add_successor(mem_ctx, next, bblock_link_logical);340else341cur->add_successor(mem_ctx, next, bblock_link_physical);342343set_next_block(&cur, next, ip);344break;345346case BRW_OPCODE_BREAK:347cur->instructions.push_tail(inst);348349/* A conditional BREAK instruction may start a region of divergent350* control flow until the end of the loop if the condition is351* non-uniform, in which case the loop will execute additional352* iterations with the present channel disabled. We model this as a353* control flow path from the divergence point to the convergence354* point that overlaps the whole IP range of the loop and skips over355* the execution of any other instructions part of the loop.356*357* See the DO case for additional explanation.358*/359assert(cur_do != NULL);360cur->add_successor(mem_ctx, cur_do, bblock_link_physical);361cur->add_successor(mem_ctx, cur_while, bblock_link_logical);362363next = new_block();364if (inst->predicate)365cur->add_successor(mem_ctx, next, bblock_link_logical);366367set_next_block(&cur, next, ip);368break;369370case BRW_OPCODE_WHILE:371cur->instructions.push_tail(inst);372373assert(cur_do != NULL && cur_while != NULL);374375/* A conditional WHILE instruction may start a region of divergent376* control flow until the end of the loop, just like the BREAK377* instruction. See the BREAK case for more details. OTOH an378* unconditional WHILE instruction is non-divergent (just like an379* unconditional CONTINUE), and will necessarily lead to the380* execution of an additional iteration of the loop for all enabled381* channels, so we may skip over the divergence point at the top of382* the loop to keep the CFG as unambiguous as possible.383*/384if (inst->predicate) {385cur->add_successor(mem_ctx, cur_do, bblock_link_logical);386} else {387cur->add_successor(mem_ctx, cur_do->next(), bblock_link_logical);388}389390set_next_block(&cur, cur_while, ip);391392/* Pop the stack so we're in the previous loop */393cur_do = pop_stack(&do_stack);394cur_while = pop_stack(&while_stack);395break;396397default:398cur->instructions.push_tail(inst);399break;400}401}402403cur->end_ip = ip - 1;404405make_block_array();406}407408cfg_t::~cfg_t()409{410ralloc_free(mem_ctx);411}412413void414cfg_t::remove_block(bblock_t *block)415{416foreach_list_typed_safe (bblock_link, predecessor, link, &block->parents) {417/* Remove block from all of its predecessors' successor lists. */418foreach_list_typed_safe (bblock_link, successor, link,419&predecessor->block->children) {420if (block == successor->block) {421successor->link.remove();422ralloc_free(successor);423}424}425426/* Add removed-block's successors to its predecessors' successor lists. */427foreach_list_typed (bblock_link, successor, link, &block->children) {428if (!successor->block->is_successor_of(predecessor->block,429successor->kind)) {430predecessor->block->children.push_tail(link(mem_ctx,431successor->block,432successor->kind));433}434}435}436437foreach_list_typed_safe (bblock_link, successor, link, &block->children) {438/* Remove block from all of its childrens' parents lists. */439foreach_list_typed_safe (bblock_link, predecessor, link,440&successor->block->parents) {441if (block == predecessor->block) {442predecessor->link.remove();443ralloc_free(predecessor);444}445}446447/* Add removed-block's predecessors to its successors' predecessor lists. */448foreach_list_typed (bblock_link, predecessor, link, &block->parents) {449if (!predecessor->block->is_predecessor_of(successor->block,450predecessor->kind)) {451successor->block->parents.push_tail(link(mem_ctx,452predecessor->block,453predecessor->kind));454}455}456}457458block->link.remove();459460for (int b = block->num; b < this->num_blocks - 1; b++) {461this->blocks[b] = this->blocks[b + 1];462this->blocks[b]->num = b;463}464465this->blocks[this->num_blocks - 1]->num = this->num_blocks - 2;466this->num_blocks--;467}468469bblock_t *470cfg_t::new_block()471{472bblock_t *block = new(mem_ctx) bblock_t(this);473474return block;475}476477void478cfg_t::set_next_block(bblock_t **cur, bblock_t *block, int ip)479{480if (*cur) {481(*cur)->end_ip = ip - 1;482}483484block->start_ip = ip;485block->num = num_blocks++;486block_list.push_tail(&block->link);487*cur = block;488}489490void491cfg_t::make_block_array()492{493blocks = ralloc_array(mem_ctx, bblock_t *, num_blocks);494495int i = 0;496foreach_block (block, this) {497blocks[i++] = block;498}499assert(i == num_blocks);500}501502void503cfg_t::dump()504{505const idom_tree *idom = (s ? &s->idom_analysis.require() : NULL);506507foreach_block (block, this) {508if (idom && idom->parent(block))509fprintf(stderr, "START B%d IDOM(B%d)", block->num,510idom->parent(block)->num);511else512fprintf(stderr, "START B%d IDOM(none)", block->num);513514foreach_list_typed(bblock_link, link, link, &block->parents) {515fprintf(stderr, " <%cB%d",516link->kind == bblock_link_logical ? '-' : '~',517link->block->num);518}519fprintf(stderr, "\n");520if (s != NULL)521block->dump();522fprintf(stderr, "END B%d", block->num);523foreach_list_typed(bblock_link, link, link, &block->children) {524fprintf(stderr, " %c>B%d",525link->kind == bblock_link_logical ? '-' : '~',526link->block->num);527}528fprintf(stderr, "\n");529}530}531532/* Calculates the immediate dominator of each block, according to "A Simple,533* Fast Dominance Algorithm" by Keith D. Cooper, Timothy J. Harvey, and Ken534* Kennedy.535*536* The authors claim that for control flow graphs of sizes normally encountered537* (less than 1000 nodes) that this algorithm is significantly faster than538* others like Lengauer-Tarjan.539*/540idom_tree::idom_tree(const backend_shader *s) :541num_parents(s->cfg->num_blocks),542parents(new bblock_t *[num_parents]())543{544bool changed;545546parents[0] = s->cfg->blocks[0];547548do {549changed = false;550551foreach_block(block, s->cfg) {552if (block->num == 0)553continue;554555bblock_t *new_idom = NULL;556foreach_list_typed(bblock_link, parent_link, link, &block->parents) {557if (parent(parent_link->block)) {558new_idom = (new_idom ? intersect(new_idom, parent_link->block) :559parent_link->block);560}561}562563if (parent(block) != new_idom) {564parents[block->num] = new_idom;565changed = true;566}567}568} while (changed);569}570571idom_tree::~idom_tree()572{573delete[] parents;574}575576bblock_t *577idom_tree::intersect(bblock_t *b1, bblock_t *b2) const578{579/* Note, the comparisons here are the opposite of what the paper says580* because we index blocks from beginning -> end (i.e. reverse post-order)581* instead of post-order like they assume.582*/583while (b1->num != b2->num) {584while (b1->num > b2->num)585b1 = parent(b1);586while (b2->num > b1->num)587b2 = parent(b2);588}589assert(b1);590return b1;591}592593void594idom_tree::dump() const595{596printf("digraph DominanceTree {\n");597for (unsigned i = 0; i < num_parents; i++)598printf("\t%d -> %d\n", parents[i]->num, i);599printf("}\n");600}601602void603cfg_t::dump_cfg()604{605printf("digraph CFG {\n");606for (int b = 0; b < num_blocks; b++) {607bblock_t *block = this->blocks[b];608609foreach_list_typed_safe (bblock_link, child, link, &block->children) {610printf("\t%d -> %d\n", b, child->block->num);611}612}613printf("}\n");614}615616617