Path: blob/21.2-virgl/src/gallium/drivers/lima/ir/pp/lower.c
4574 views
/*1* Copyright (c) 2017 Lima Project2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sub license,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the11* next paragraph) shall be included in all copies or substantial portions12* of the Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*22*/2324#include "util/bitscan.h"25#include "util/ralloc.h"2627#include "ppir.h"2829static bool ppir_lower_const(ppir_block *block, ppir_node *node)30{31if (ppir_node_is_root(node)) {32ppir_node_delete(node);33return true;34}3536assert(ppir_node_has_single_succ(node));3738ppir_node *succ = ppir_node_first_succ(node);39ppir_dest *dest = ppir_node_get_dest(node);4041switch (succ->type) {42case ppir_node_type_alu:43case ppir_node_type_branch:44/* ALU and branch can consume consts directly */45dest->type = ppir_target_pipeline;46/* Reg will be updated in node_to_instr later */47dest->pipeline = ppir_pipeline_reg_const0;4849/* single succ can still have multiple references to this node */50for (int i = 0; i < ppir_node_get_src_num(succ); i++) {51ppir_src *src = ppir_node_get_src(succ, i);52if (src && src->node == node) {53src->type = ppir_target_pipeline;54src->pipeline = ppir_pipeline_reg_const0;55}56}57return true;58default:59/* Create a move for everyone else */60break;61}6263ppir_node *move = ppir_node_insert_mov(node);64if (unlikely(!move))65return false;6667ppir_debug("lower const create move %d for %d\n",68move->index, node->index);6970/* Need to be careful with changing src/dst type here:71* it has to be done *after* successors have their children72* replaced, otherwise ppir_node_replace_child() won't find73* matching src/dst and as result won't work74*/75ppir_src *mov_src = ppir_node_get_src(move, 0);76mov_src->type = dest->type = ppir_target_pipeline;77mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_const0;7879return true;80}8182static bool ppir_lower_swap_args(ppir_block *block, ppir_node *node)83{84/* swapped op must be the next op */85node->op++;8687assert(node->type == ppir_node_type_alu);88ppir_alu_node *alu = ppir_node_to_alu(node);89assert(alu->num_src == 2);9091ppir_src tmp = alu->src[0];92alu->src[0] = alu->src[1];93alu->src[1] = tmp;94return true;95}9697static bool ppir_lower_load(ppir_block *block, ppir_node *node)98{99ppir_dest *dest = ppir_node_get_dest(node);100if (ppir_node_is_root(node) && dest->type == ppir_target_ssa) {101ppir_node_delete(node);102return true;103}104105/* load can have multiple successors in case if we duplicated load node106* that has load node in source107*/108if ((ppir_node_has_single_src_succ(node) || ppir_node_is_root(node)) &&109dest->type != ppir_target_register) {110ppir_node *succ = ppir_node_first_succ(node);111switch (succ->type) {112case ppir_node_type_alu:113case ppir_node_type_branch: {114/* single succ can still have multiple references to this node */115for (int i = 0; i < ppir_node_get_src_num(succ); i++) {116ppir_src *src = ppir_node_get_src(succ, i);117if (src && src->node == node) {118/* Can consume uniforms directly */119src->type = dest->type = ppir_target_pipeline;120src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;121}122}123return true;124}125default:126/* Create mov for everyone else */127break;128}129}130131ppir_node *move = ppir_node_insert_mov(node);132if (unlikely(!move))133return false;134135ppir_src *mov_src = ppir_node_get_src(move, 0);136mov_src->type = dest->type = ppir_target_pipeline;137mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_uniform;138139return true;140}141142static bool ppir_lower_ddxy(ppir_block *block, ppir_node *node)143{144assert(node->type == ppir_node_type_alu);145ppir_alu_node *alu = ppir_node_to_alu(node);146147alu->src[1] = alu->src[0];148if (node->op == ppir_op_ddx)149alu->src[1].negate = !alu->src[1].negate;150else if (node->op == ppir_op_ddy)151alu->src[0].negate = !alu->src[0].negate;152else153assert(0);154155alu->num_src = 2;156157return true;158}159160static bool ppir_lower_texture(ppir_block *block, ppir_node *node)161{162ppir_dest *dest = ppir_node_get_dest(node);163164if (ppir_node_has_single_succ(node) && dest->type == ppir_target_ssa) {165ppir_node *succ = ppir_node_first_succ(node);166dest->type = ppir_target_pipeline;167dest->pipeline = ppir_pipeline_reg_sampler;168169for (int i = 0; i < ppir_node_get_src_num(succ); i++) {170ppir_src *src = ppir_node_get_src(succ, i);171if (src && src->node == node) {172src->type = ppir_target_pipeline;173src->pipeline = ppir_pipeline_reg_sampler;174}175}176return true;177}178179/* Create move node as fallback */180ppir_node *move = ppir_node_insert_mov(node);181if (unlikely(!move))182return false;183184ppir_debug("lower texture create move %d for %d\n",185move->index, node->index);186187ppir_src *mov_src = ppir_node_get_src(move, 0);188mov_src->type = dest->type = ppir_target_pipeline;189mov_src->pipeline = dest->pipeline = ppir_pipeline_reg_sampler;190191return true;192}193194/* Check if the select condition and ensure it can be inserted to195* the scalar mul slot */196static bool ppir_lower_select(ppir_block *block, ppir_node *node)197{198ppir_alu_node *alu = ppir_node_to_alu(node);199ppir_src *src0 = &alu->src[0];200ppir_src *src1 = &alu->src[1];201ppir_src *src2 = &alu->src[2];202203/* If the condition is already an alu scalar whose only successor204* is the select node, just turn it into pipeline output. */205/* The (src2->node == cond) case is a tricky exception.206* The reason is that we must force cond to output to ^fmul -- but207* then it no longer writes to a register and it is impossible to208* reference ^fmul in src2. So in that exceptional case, also fall209* back to the mov. */210ppir_node *cond = src0->node;211if (cond &&212cond->type == ppir_node_type_alu &&213ppir_node_has_single_succ(cond) &&214ppir_target_is_scalar(ppir_node_get_dest(cond)) &&215ppir_node_schedulable_slot(cond, PPIR_INSTR_SLOT_ALU_SCL_MUL) &&216src2->node != cond) {217218ppir_dest *cond_dest = ppir_node_get_dest(cond);219cond_dest->type = ppir_target_pipeline;220cond_dest->pipeline = ppir_pipeline_reg_fmul;221222ppir_node_target_assign(src0, cond);223224/* src1 could also be a reference from the same node as225* the condition, so update it in that case. */226if (src1->node && src1->node == cond)227ppir_node_target_assign(src1, cond);228229return true;230}231232/* If the condition can't be used for any reason, insert a mov233* so that the condition can end up in ^fmul */234ppir_node *move = ppir_node_create(block, ppir_op_mov, -1, 0);235if (!move)236return false;237list_addtail(&move->list, &node->list);238239ppir_alu_node *move_alu = ppir_node_to_alu(move);240ppir_src *move_src = move_alu->src;241move_src->type = src0->type;242move_src->ssa = src0->ssa;243move_src->swizzle[0] = src0->swizzle[0];244move_alu->num_src = 1;245246ppir_dest *move_dest = &move_alu->dest;247move_dest->type = ppir_target_pipeline;248move_dest->pipeline = ppir_pipeline_reg_fmul;249move_dest->write_mask = 1;250251ppir_node *pred = src0->node;252ppir_dep *dep = ppir_dep_for_pred(node, pred);253if (dep)254ppir_node_replace_pred(dep, move);255else256ppir_node_add_dep(node, move, ppir_dep_src);257258/* pred can be a register */259if (pred)260ppir_node_add_dep(move, pred, ppir_dep_src);261262ppir_node_target_assign(src0, move);263264/* src1 could also be a reference from the same node as265* the condition, so update it in that case. */266if (src1->node && src1->node == pred)267ppir_node_target_assign(src1, move);268269return true;270}271272static bool ppir_lower_trunc(ppir_block *block, ppir_node *node)273{274/* Turn it into a mov with a round to integer output modifier */275ppir_alu_node *alu = ppir_node_to_alu(node);276ppir_dest *move_dest = &alu->dest;277move_dest->modifier = ppir_outmod_round;278node->op = ppir_op_mov;279280return true;281}282283static bool ppir_lower_abs(ppir_block *block, ppir_node *node)284{285/* Turn it into a mov and set the absolute modifier */286ppir_alu_node *alu = ppir_node_to_alu(node);287288assert(alu->num_src == 1);289290alu->src[0].absolute = true;291alu->src[0].negate = false;292node->op = ppir_op_mov;293294return true;295}296297static bool ppir_lower_neg(ppir_block *block, ppir_node *node)298{299/* Turn it into a mov and set the negate modifier */300ppir_alu_node *alu = ppir_node_to_alu(node);301302assert(alu->num_src == 1);303304alu->src[0].negate = !alu->src[0].negate;305node->op = ppir_op_mov;306307return true;308}309310static bool ppir_lower_sat(ppir_block *block, ppir_node *node)311{312/* Turn it into a mov with the saturate output modifier */313ppir_alu_node *alu = ppir_node_to_alu(node);314315assert(alu->num_src == 1);316317ppir_dest *move_dest = &alu->dest;318move_dest->modifier = ppir_outmod_clamp_fraction;319node->op = ppir_op_mov;320321return true;322}323324static bool ppir_lower_branch(ppir_block *block, ppir_node *node)325{326ppir_branch_node *branch = ppir_node_to_branch(node);327328/* Unconditional branch */329if (branch->num_src == 0)330return true;331332ppir_const_node *zero = ppir_node_create(block, ppir_op_const, -1, 0);333334if (!zero)335return false;336337zero->constant.value[0].f = 0;338zero->constant.num = 1;339zero->dest.type = ppir_target_pipeline;340zero->dest.pipeline = ppir_pipeline_reg_const0;341zero->dest.ssa.num_components = 1;342zero->dest.write_mask = 0x01;343344/* For now we're just comparing branch condition with 0,345* in future we should look whether it's possible to move346* comparision node into branch itself and use current347* way as a fallback for complex conditions.348*/349ppir_node_target_assign(&branch->src[1], &zero->node);350351if (branch->negate)352branch->cond_eq = true;353else {354branch->cond_gt = true;355branch->cond_lt = true;356}357358branch->num_src = 2;359360ppir_node_add_dep(&branch->node, &zero->node, ppir_dep_src);361list_addtail(&zero->node.list, &node->list);362363return true;364}365366static bool ppir_lower_accum(ppir_block *block, ppir_node *node)367{368/* If the last argument of a node placed in PPIR_INSTR_SLOT_ALU_SCL_ADD369* (or PPIR_INSTR_SLOT_ALU_VEC_ADD) is placed in370* PPIR_INSTR_SLOT_ALU_SCL_MUL (or PPIR_INSTR_SLOT_ALU_VEC_MUL) we cannot371* save a register (and an instruction) by using a pipeline register.372* Therefore it is interesting to make sure arguments of that type are373* the first argument by swapping arguments (if possible) */374ppir_alu_node *alu = ppir_node_to_alu(node);375376assert(alu->num_src >= 2);377378if (alu->src[0].type == ppir_target_pipeline)379return true;380381if (alu->src[0].type == ppir_target_ssa) {382int *src_0_slots = ppir_op_infos[alu->src[0].node->op].slots;383if (src_0_slots) {384for (int i = 0; src_0_slots[i] != PPIR_INSTR_SLOT_END; i++) {385if ((src_0_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) ||386(src_0_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) {387return true;388}389}390}391}392393int src_to_swap = -1;394for (int j = 1; j < alu->num_src; j++) {395if (alu->src[j].type != ppir_target_ssa)396continue;397int *src_slots = ppir_op_infos[alu->src[j].node->op].slots;398if (!src_slots)399continue;400for (int i = 0; src_slots[i] != PPIR_INSTR_SLOT_END; i++) {401if ((src_slots[i] == PPIR_INSTR_SLOT_ALU_SCL_MUL) ||402(src_slots[i] == PPIR_INSTR_SLOT_ALU_VEC_MUL)) {403src_to_swap = j;404break;405}406}407if (src_to_swap > 0)408break;409}410411if (src_to_swap < 0)412return true;413414/* Swap arguments so that we can use a pipeline register later on */415ppir_src tmp = alu->src[0];416alu->src[0] = alu->src[src_to_swap];417alu->src[src_to_swap] = tmp;418419return true;420}421422static bool (*ppir_lower_funcs[ppir_op_num])(ppir_block *, ppir_node *) = {423[ppir_op_abs] = ppir_lower_abs,424[ppir_op_neg] = ppir_lower_neg,425[ppir_op_const] = ppir_lower_const,426[ppir_op_ddx] = ppir_lower_ddxy,427[ppir_op_ddy] = ppir_lower_ddxy,428[ppir_op_lt] = ppir_lower_swap_args,429[ppir_op_le] = ppir_lower_swap_args,430[ppir_op_load_texture] = ppir_lower_texture,431[ppir_op_select] = ppir_lower_select,432[ppir_op_trunc] = ppir_lower_trunc,433[ppir_op_sat] = ppir_lower_sat,434[ppir_op_branch] = ppir_lower_branch,435[ppir_op_load_uniform] = ppir_lower_load,436[ppir_op_load_temp] = ppir_lower_load,437[ppir_op_add] = ppir_lower_accum,438[ppir_op_max] = ppir_lower_accum,439[ppir_op_min] = ppir_lower_accum,440[ppir_op_eq] = ppir_lower_accum,441[ppir_op_ne] = ppir_lower_accum,442};443444bool ppir_lower_prog(ppir_compiler *comp)445{446list_for_each_entry(ppir_block, block, &comp->block_list, list) {447list_for_each_entry_safe(ppir_node, node, &block->node_list, list) {448if (ppir_lower_funcs[node->op] &&449!ppir_lower_funcs[node->op](block, node))450return false;451}452}453454return true;455}456457458