Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/gp/optimize.c
4574 views
/*1* Copyright (c) 2019 Connor Abbott2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include "gpir.h"2526/* Here we perform a few optimizations that can't currently be done in NIR:27*28* - Optimize the result of a conditional break/continue. In NIR something29* like:30*31* loop {32* ...33* if (cond)34* continue;35*36* would get lowered to:37*38* block_0:39* ...40* block_1:41* branch_cond !cond block_342* block_2:43* branch_uncond block_044* block_3:45* ...46*47* We recognize the conditional branch skipping over the unconditional48* branch, and turn it into:49*50* block_0:51* ...52* block_1:53* branch_cond cond block_054* block_2:55* block_3:56* ...57*58* - Optimize away nots of comparisons as produced by lowering ifs to59* branches, and nots of nots produced by the above optimization.60*61* - DCE62*/6364static void65optimize_branches(gpir_compiler *comp)66{67list_for_each_entry(gpir_block, block, &comp->block_list, list) {68/* Look for a block with a single unconditional branch. */69if (!list_is_singular(&block->node_list))70continue;7172gpir_node *node = list_first_entry(&block->node_list, gpir_node, list);73if (node->op != gpir_op_branch_uncond)74continue;7576gpir_block *target = gpir_node_to_branch(node)->dest;7778/* Find the previous block */79if (block->list.prev == &comp->block_list)80continue;8182gpir_block *prev_block = LIST_ENTRY(gpir_block, block->list.prev, list);83if (list_is_empty(&prev_block->node_list))84continue;8586/* Previous block must end with a conditional branch */87gpir_node *prev_block_last =88list_last_entry(&prev_block->node_list, gpir_node, list);89if (prev_block_last->op != gpir_op_branch_cond)90continue;9192/* That branch must branch to the block after this */93gpir_branch_node *prev_branch = gpir_node_to_branch(prev_block_last);94gpir_block *prev_target = prev_branch->dest;9596if (&prev_target->list != block->list.next)97continue;9899/* Hooray! Invert the conditional branch and change the target */100gpir_alu_node *cond = gpir_node_create(prev_block, gpir_op_not);101cond->children[0] = prev_branch->cond;102cond->num_child = 1;103gpir_node_add_dep(&cond->node, cond->children[0], GPIR_DEP_INPUT);104list_addtail(&cond->node.list, &prev_block_last->list);105gpir_node_insert_child(prev_block_last, prev_branch->cond, &cond->node);106prev_branch->dest = target;107prev_block->successors[1] = target;108109/* Delete the branch */110list_del(&node->list);111block->successors[0] = LIST_ENTRY(gpir_block, block->list.next, list);112}113}114115static void116optimize_not(gpir_compiler *comp)117{118list_for_each_entry(gpir_block, block, &comp->block_list, list) {119list_for_each_entry_rev(gpir_node, node, &block->node_list, list) {120if (node->op != gpir_op_not)121continue;122123gpir_alu_node *alu = gpir_node_to_alu(node);124125gpir_node *replace = NULL;126if (alu->children[0]->op == gpir_op_not) {127/* (not (not a)) -> a */128gpir_alu_node *child = gpir_node_to_alu(alu->children[0]);129replace = child->children[0];130} else if (alu->children[0]->op == gpir_op_ge ||131alu->children[0]->op == gpir_op_lt) {132/* (not (ge a, b)) -> (lt a, b) and133* (not (lt a, b)) -> (ge a, b)134*/135gpir_alu_node *child = gpir_node_to_alu(alu->children[0]);136gpir_op op = alu->children[0]->op == gpir_op_ge ?137gpir_op_lt : gpir_op_ge;138gpir_alu_node *new = gpir_node_create(block, op);139new->children[0] = child->children[0];140new->children[1] = child->children[1];141new->num_child = 2;142gpir_node_add_dep(&new->node, new->children[0], GPIR_DEP_INPUT);143gpir_node_add_dep(&new->node, new->children[1], GPIR_DEP_INPUT);144list_addtail(&new->node.list, &alu->children[0]->list);145replace = &new->node;146}147148if (replace) {149gpir_node_replace_succ(replace, node);150}151}152}153}154155/* Does what it says. In addition to removing unused nodes from the not156* optimization above, we need this to remove unused load_const nodes which157* were created from store_output intrinsics in NIR, since we ignore the158* offset.159*/160161static void162dead_code_eliminate(gpir_compiler *comp)163{164list_for_each_entry(gpir_block, block, &comp->block_list, list) {165list_for_each_entry_safe_rev(gpir_node, node, &block->node_list, list) {166if (node->type != gpir_node_type_store &&167node->type != gpir_node_type_branch &&168list_is_empty(&node->succ_list)) {169gpir_node_delete(node);170}171}172}173174/* Kill all the writes to regs that are never read. All the known175* instances of these are coming from the cycle-breaking register176* created in out-of-SSA. See resolve_parallel_copy() in nir_from_ssa.c177* Since we kill redundant movs when we translate nir into gpir, it178* results in this reg being written, but never read.179*/180BITSET_WORD *regs = rzalloc_array(comp, BITSET_WORD, comp->cur_reg);181list_for_each_entry(gpir_block, block, &comp->block_list, list) {182list_for_each_entry(gpir_node, node, &block->node_list, list) {183if (node->op != gpir_op_load_reg)184continue;185gpir_load_node *load = gpir_node_to_load(node);186BITSET_SET(regs, load->reg->index);187}188}189190list_for_each_entry(gpir_block, block, &comp->block_list, list) {191list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {192if (node->op != gpir_op_store_reg)193continue;194gpir_store_node *store = gpir_node_to_store(node);195if (!BITSET_TEST(regs, store->reg->index))196gpir_node_delete(node);197}198}199200ralloc_free(regs);201}202203bool204gpir_optimize(gpir_compiler *comp)205{206optimize_branches(comp);207optimize_not(comp);208dead_code_eliminate(comp);209210gpir_debug("after optimization\n");211gpir_node_print_prog_seq(comp);212213return true;214}215216217218