Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/gp/lower.c
4574 views
/*1* Copyright (c) 2017 Lima Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include "util/ralloc.h"2526#include "gpir.h"27#include "lima_context.h"2829static bool gpir_lower_const(gpir_compiler *comp)30{31int num_constant = 0;32list_for_each_entry(gpir_block, block, &comp->block_list, list) {33list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {34if (node->op == gpir_op_const) {35if (gpir_node_is_root(node))36gpir_node_delete(node);37else38num_constant++;39}40}41}4243if (num_constant) {44union fi *constant = ralloc_array(comp->prog, union fi, num_constant);45if (!constant)46return false;4748comp->prog->constant = constant;49comp->prog->state.constant_size = num_constant * sizeof(union fi);5051int index = 0;52list_for_each_entry(gpir_block, block, &comp->block_list, list) {53list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {54if (node->op == gpir_op_const) {55gpir_const_node *c = gpir_node_to_const(node);5657if (!gpir_node_is_root(node)) {58gpir_load_node *load = gpir_node_create(block, gpir_op_load_uniform);59if (unlikely(!load))60return false;6162load->index = comp->constant_base + (index >> 2);63load->component = index % 4;64constant[index++] = c->value;6566gpir_node_replace_succ(&load->node, node);6768list_addtail(&load->node.list, &node->list);6970gpir_debug("lower const create uniform %d for const %d\n",71load->node.index, node->index);72}7374gpir_node_delete(node);75}76}77}78}7980return true;81}8283/* duplicate load to all its successors */84static bool gpir_lower_load(gpir_compiler *comp)85{86list_for_each_entry(gpir_block, block, &comp->block_list, list) {87list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {88if (node->type == gpir_node_type_load) {89gpir_load_node *load = gpir_node_to_load(node);9091bool first = true;92gpir_node_foreach_succ_safe(node, dep) {93gpir_node *succ = dep->succ;9495if (first) {96first = false;97continue;98}99100gpir_node *new = gpir_node_create(succ->block, node->op);101if (unlikely(!new))102return false;103list_addtail(&new->list, &succ->list);104105gpir_debug("lower load create %d from %d for succ %d\n",106new->index, node->index, succ->index);107108gpir_load_node *nload = gpir_node_to_load(new);109nload->index = load->index;110nload->component = load->component;111nload->reg = load->reg;112113gpir_node_replace_pred(dep, new);114gpir_node_replace_child(succ, node, new);115}116}117}118}119120return true;121}122123static bool gpir_lower_neg(gpir_block *block, gpir_node *node)124{125gpir_alu_node *neg = gpir_node_to_alu(node);126gpir_node *child = neg->children[0];127128/* check if child can dest negate */129if (child->type == gpir_node_type_alu) {130/* negate must be its only successor */131if (list_is_singular(&child->succ_list) &&132gpir_op_infos[child->op].dest_neg) {133gpir_alu_node *alu = gpir_node_to_alu(child);134alu->dest_negate = !alu->dest_negate;135136gpir_node_replace_succ(child, node);137gpir_node_delete(node);138return true;139}140}141142/* check if child can src negate */143gpir_node_foreach_succ_safe(node, dep) {144gpir_node *succ = dep->succ;145if (succ->type != gpir_node_type_alu)146continue;147148bool success = true;149gpir_alu_node *alu = gpir_node_to_alu(dep->succ);150for (int i = 0; i < alu->num_child; i++) {151if (alu->children[i] == node) {152if (gpir_op_infos[succ->op].src_neg[i]) {153alu->children_negate[i] = !alu->children_negate[i];154alu->children[i] = child;155}156else157success = false;158}159}160161if (success)162gpir_node_replace_pred(dep, child);163}164165if (gpir_node_is_root(node))166gpir_node_delete(node);167168return true;169}170171static bool gpir_lower_complex(gpir_block *block, gpir_node *node)172{173gpir_alu_node *alu = gpir_node_to_alu(node);174gpir_node *child = alu->children[0];175176if (node->op == gpir_op_exp2) {177gpir_alu_node *preexp2 = gpir_node_create(block, gpir_op_preexp2);178if (unlikely(!preexp2))179return false;180181preexp2->children[0] = child;182preexp2->num_child = 1;183gpir_node_add_dep(&preexp2->node, child, GPIR_DEP_INPUT);184list_addtail(&preexp2->node.list, &node->list);185186child = &preexp2->node;187}188189gpir_alu_node *complex2 = gpir_node_create(block, gpir_op_complex2);190if (unlikely(!complex2))191return false;192193complex2->children[0] = child;194complex2->num_child = 1;195gpir_node_add_dep(&complex2->node, child, GPIR_DEP_INPUT);196list_addtail(&complex2->node.list, &node->list);197198int impl_op = 0;199switch (node->op) {200case gpir_op_rcp:201impl_op = gpir_op_rcp_impl;202break;203case gpir_op_rsqrt:204impl_op = gpir_op_rsqrt_impl;205break;206case gpir_op_exp2:207impl_op = gpir_op_exp2_impl;208break;209case gpir_op_log2:210impl_op = gpir_op_log2_impl;211break;212default:213assert(0);214}215216gpir_alu_node *impl = gpir_node_create(block, impl_op);217if (unlikely(!impl))218return false;219220impl->children[0] = child;221impl->num_child = 1;222gpir_node_add_dep(&impl->node, child, GPIR_DEP_INPUT);223list_addtail(&impl->node.list, &node->list);224225gpir_alu_node *complex1 = gpir_node_create(block, gpir_op_complex1);226complex1->children[0] = &impl->node;227complex1->children[1] = &complex2->node;228complex1->children[2] = child;229complex1->num_child = 3;230gpir_node_add_dep(&complex1->node, child, GPIR_DEP_INPUT);231gpir_node_add_dep(&complex1->node, &impl->node, GPIR_DEP_INPUT);232gpir_node_add_dep(&complex1->node, &complex2->node, GPIR_DEP_INPUT);233list_addtail(&complex1->node.list, &node->list);234235gpir_node *result = &complex1->node;236237if (node->op == gpir_op_log2) {238gpir_alu_node *postlog2 = gpir_node_create(block, gpir_op_postlog2);239if (unlikely(!postlog2))240return false;241242postlog2->children[0] = result;243postlog2->num_child = 1;244gpir_node_add_dep(&postlog2->node, result, GPIR_DEP_INPUT);245list_addtail(&postlog2->node.list, &node->list);246247result = &postlog2->node;248}249250gpir_node_replace_succ(result, node);251gpir_node_delete(node);252253return true;254}255256static bool gpir_lower_node_may_consume_two_slots(gpir_compiler *comp)257{258list_for_each_entry(gpir_block, block, &comp->block_list, list) {259list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {260if (gpir_op_infos[node->op].may_consume_two_slots) {261/* dummy_f/m are auxiliary nodes for value reg alloc:262* 1. before reg alloc, create fake nodes dummy_f, dummy_m,263* so the tree become: (dummy_m (node dummy_f))264* dummy_m can be spilled, but other nodes in the tree can't265* be spilled.266* 2. After reg allocation and fake dep add, merge all deps of267* dummy_m and dummy_f to node and remove dummy_m & dummy_f268*269* We may also not use dummy_f/m, but alloc two value reg for270* node. But that means we need to make sure there're 2 free271* slot after the node successors, but we just need one slot272* after to be able to schedule it because we can use one move for273* the two slot node. It's also not easy to handle the spill case274* for the alloc 2 value method.275*276* With the dummy_f/m method, there's no such requirement, the277* node can be scheduled only when there's two slots for it,278* otherwise a move. And the node can be spilled with one reg.279*/280gpir_node *dummy_m = gpir_node_create(block, gpir_op_dummy_m);281if (unlikely(!dummy_m))282return false;283list_add(&dummy_m->list, &node->list);284285gpir_node *dummy_f = gpir_node_create(block, gpir_op_dummy_f);286if (unlikely(!dummy_f))287return false;288list_add(&dummy_f->list, &node->list);289290gpir_alu_node *alu = gpir_node_to_alu(dummy_m);291alu->children[0] = node;292alu->children[1] = dummy_f;293alu->num_child = 2;294295gpir_node_replace_succ(dummy_m, node);296gpir_node_add_dep(dummy_m, node, GPIR_DEP_INPUT);297gpir_node_add_dep(dummy_m, dummy_f, GPIR_DEP_INPUT);298299}300}301}302303return true;304}305306/*307* There are no 'equal' or 'not-equal' opcodes.308* eq (a == b) is lowered to and(a >= b, b >= a)309* ne (a != b) is lowered to or(a < b, b < a)310*/311static bool gpir_lower_eq_ne(gpir_block *block, gpir_node *node)312{313gpir_op cmp_node_op;314gpir_op node_new_op;315switch (node->op) {316case gpir_op_eq:317cmp_node_op = gpir_op_ge;318node_new_op = gpir_op_min; /* and */319break;320case gpir_op_ne:321cmp_node_op = gpir_op_lt;322node_new_op = gpir_op_max; /* or */323break;324default:325unreachable("bad node op");326}327328gpir_alu_node *e = gpir_node_to_alu(node);329330gpir_alu_node *cmp1 = gpir_node_create(block, cmp_node_op);331list_addtail(&cmp1->node.list, &node->list);332gpir_alu_node *cmp2 = gpir_node_create(block, cmp_node_op);333list_addtail(&cmp2->node.list, &node->list);334335cmp1->children[0] = e->children[0];336cmp1->children[1] = e->children[1];337cmp1->num_child = 2;338339cmp2->children[0] = e->children[1];340cmp2->children[1] = e->children[0];341cmp2->num_child = 2;342343gpir_node_add_dep(&cmp1->node, e->children[0], GPIR_DEP_INPUT);344gpir_node_add_dep(&cmp1->node, e->children[1], GPIR_DEP_INPUT);345346gpir_node_add_dep(&cmp2->node, e->children[0], GPIR_DEP_INPUT);347gpir_node_add_dep(&cmp2->node, e->children[1], GPIR_DEP_INPUT);348349gpir_node_foreach_pred_safe(node, dep) {350gpir_node_remove_dep(node, dep->pred);351}352353gpir_node_add_dep(node, &cmp1->node, GPIR_DEP_INPUT);354gpir_node_add_dep(node, &cmp2->node, GPIR_DEP_INPUT);355356node->op = node_new_op;357e->children[0] = &cmp1->node;358e->children[1] = &cmp2->node;359e->num_child = 2;360361return true;362}363364/*365* There is no 'abs' opcode.366* abs(a) is lowered to max(a, -a)367*/368static bool gpir_lower_abs(gpir_block *block, gpir_node *node)369{370gpir_alu_node *alu = gpir_node_to_alu(node);371372assert(node->op == gpir_op_abs);373374node->op = gpir_op_max;375376alu->children[1] = alu->children[0];377alu->children_negate[1] = true;378alu->num_child = 2;379380return true;381}382383/*384* There is no 'not' opcode.385* not(a) is lowered to add(1, -a)386*/387static bool gpir_lower_not(gpir_block *block, gpir_node *node)388{389gpir_alu_node *alu = gpir_node_to_alu(node);390391assert(alu->node.op == gpir_op_not);392393node->op = gpir_op_add;394395gpir_node *node_const = gpir_node_create(block, gpir_op_const);396gpir_const_node *c = gpir_node_to_const(node_const);397398assert(c->node.op == gpir_op_const);399400list_addtail(&c->node.list, &node->list);401c->value.f = 1.0f;402gpir_node_add_dep(&alu->node, &c->node, GPIR_DEP_INPUT);403404alu->children_negate[1] = !alu->children_negate[0];405alu->children[1] = alu->children[0];406alu->children[0] = &c->node;407alu->num_child = 2;408409return true;410}411412/* There is no unconditional branch instruction, so we have to lower it to a413* conditional branch with a condition of 1.0.414*/415416static bool gpir_lower_branch_uncond(gpir_block *block, gpir_node *node)417{418gpir_branch_node *branch = gpir_node_to_branch(node);419420gpir_node *node_const = gpir_node_create(block, gpir_op_const);421gpir_const_node *c = gpir_node_to_const(node_const);422423list_addtail(&c->node.list, &node->list);424c->value.f = 1.0f;425gpir_node_add_dep(&branch->node, &c->node, GPIR_DEP_INPUT);426427branch->node.op = gpir_op_branch_cond;428branch->cond = node_const;429430return true;431}432433static bool (*gpir_pre_rsched_lower_funcs[gpir_op_num])(gpir_block *, gpir_node *) = {434[gpir_op_not] = gpir_lower_not,435[gpir_op_neg] = gpir_lower_neg,436[gpir_op_rcp] = gpir_lower_complex,437[gpir_op_rsqrt] = gpir_lower_complex,438[gpir_op_exp2] = gpir_lower_complex,439[gpir_op_log2] = gpir_lower_complex,440[gpir_op_eq] = gpir_lower_eq_ne,441[gpir_op_ne] = gpir_lower_eq_ne,442[gpir_op_abs] = gpir_lower_abs,443[gpir_op_branch_uncond] = gpir_lower_branch_uncond,444};445446bool gpir_pre_rsched_lower_prog(gpir_compiler *comp)447{448list_for_each_entry(gpir_block, block, &comp->block_list, list) {449list_for_each_entry_safe(gpir_node, node, &block->node_list, list) {450if (gpir_pre_rsched_lower_funcs[node->op] &&451!gpir_pre_rsched_lower_funcs[node->op](block, node))452return false;453}454}455456if (!gpir_lower_const(comp))457return false;458459if (!gpir_lower_load(comp))460return false;461462if (!gpir_lower_node_may_consume_two_slots(comp))463return false;464465gpir_debug("pre rsched lower prog\n");466gpir_node_print_prog_seq(comp);467return true;468}469470471472