Path: blob/21.2-virgl/src/freedreno/ir3/ir3_nir_move_varying_inputs.c
4565 views
/*1* Copyright © 2019 Red Hat2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "compiler/nir/nir_builder.h"24#include "ir3_nir.h"2526/**27* This pass moves varying fetches (and the instructions they depend on28* into the start block.29*30* We need to set the (ei) "end input" flag on the last varying fetch.31* And we want to ensure that all threads execute the instruction that32* sets (ei). The easiest way to ensure this is to move all varying33* fetches into the start block. Which is something we used to get for34* free by using lower_all_io_to_temps=true.35*36* This may come at the cost of additional register usage. OTOH setting37* the (ei) flag earlier probably frees up more VS to run.38*39* Not all varying fetches could be pulled into the start block.40* If there are fetches we couldn't pull, like load_interpolated_input41* with offset which depends on a non-reorderable ssbo load or on a42* phi node, this pass is skipped since it would be hard to find a place43* to set (ei) flag (beside at the very end).44* a5xx and a6xx do automatically release varying storage at the end.45*/4647typedef struct {48nir_block *start_block;49bool precondition_failed;50} precond_state;5152typedef struct {53nir_shader *shader;54nir_block *start_block;55} state;5657static void check_precondition_instr(precond_state *state, nir_instr *instr);58static void move_instruction_to_start_block(state *state, nir_instr *instr);5960static bool61check_precondition_src(nir_src *src, void *state)62{63check_precondition_instr(state, src->ssa->parent_instr);64return true;65}6667/* Recursively check if there is even a single dependency which68* cannot be moved.69*/70static void71check_precondition_instr(precond_state *state, nir_instr *instr)72{73if (instr->block == state->start_block)74return;7576switch (instr->type) {77case nir_instr_type_alu:78case nir_instr_type_deref:79case nir_instr_type_load_const:80case nir_instr_type_ssa_undef:81/* These could be safely moved around */82break;83case nir_instr_type_intrinsic: {84nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);85if (!nir_intrinsic_can_reorder(intr)) {86state->precondition_failed = true;87return;88}89break;90}91default:92state->precondition_failed = true;93return;94}9596nir_foreach_src(instr, check_precondition_src, state);97}9899static void100check_precondition_block(precond_state *state, nir_block *block)101{102nir_foreach_instr_safe (instr, block) {103if (instr->type != nir_instr_type_intrinsic)104continue;105106nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);107108switch (intr->intrinsic) {109case nir_intrinsic_load_interpolated_input:110case nir_intrinsic_load_input:111break;112default:113continue;114}115116check_precondition_instr(state, instr);117118if (state->precondition_failed)119return;120}121}122123static bool124move_src(nir_src *src, void *state)125{126/* At this point we shouldn't have any non-ssa src: */127debug_assert(src->is_ssa);128move_instruction_to_start_block(state, src->ssa->parent_instr);129return true;130}131132static void133move_instruction_to_start_block(state *state, nir_instr *instr)134{135/* nothing to do if the instruction is already in the start block */136if (instr->block == state->start_block)137return;138139/* first move (recursively) all src's to ensure they appear before140* load*_input that we are trying to move:141*/142nir_foreach_src(instr, move_src, state);143144/* and then move the instruction itself:145*/146exec_node_remove(&instr->node);147exec_list_push_tail(&state->start_block->instr_list, &instr->node);148instr->block = state->start_block;149}150151static bool152move_varying_inputs_block(state *state, nir_block *block)153{154bool progress = false;155156nir_foreach_instr_safe (instr, block) {157if (instr->type != nir_instr_type_intrinsic)158continue;159160nir_intrinsic_instr *intr = nir_instr_as_intrinsic(instr);161162switch (intr->intrinsic) {163case nir_intrinsic_load_interpolated_input:164case nir_intrinsic_load_input:165/* TODO any others to handle? */166break;167default:168continue;169}170171debug_assert(intr->dest.is_ssa);172173move_instruction_to_start_block(state, instr);174175progress = true;176}177178return progress;179}180181bool182ir3_nir_move_varying_inputs(nir_shader *shader)183{184bool progress = false;185186debug_assert(shader->info.stage == MESA_SHADER_FRAGMENT);187188nir_foreach_function (function, shader) {189precond_state state;190191if (!function->impl)192continue;193194state.precondition_failed = false;195state.start_block = nir_start_block(function->impl);196197nir_foreach_block (block, function->impl) {198if (block == state.start_block)199continue;200201check_precondition_block(&state, block);202203if (state.precondition_failed)204return false;205}206}207208nir_foreach_function (function, shader) {209state state;210211if (!function->impl)212continue;213214state.shader = shader;215state.start_block = nir_start_block(function->impl);216217bool progress = false;218nir_foreach_block (block, function->impl) {219/* don't need to move anything that is already in the first block */220if (block == state.start_block)221continue;222progress |= move_varying_inputs_block(&state, block);223}224225if (progress) {226nir_metadata_preserve(227function->impl, nir_metadata_block_index | nir_metadata_dominance);228}229}230231return progress;232}233234235