Path: blob/21.2-virgl/src/gallium/drivers/radeonsi/si_nir_optim.c
4570 views
/*1* Copyright 2021 Advanced Micro Devices, Inc.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "si_pipe.h"24#include "nir.h"25#include "nir_builder.h"26#include "nir_worklist.h"272829static bool30add_src_instr_to_worklist(nir_src *src, void *wl)31{32if (!src->is_ssa)33return false;3435nir_instr_worklist_push_tail(wl, src->ssa->parent_instr);36return true;37}3839static int40get_tex_unit(nir_tex_instr *tex)41{42int tex_index = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);43if (tex_index >= 0) {44nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_index].src);45nir_variable *var = nir_deref_instr_get_variable(deref);46return var ? var->data.binding : 0;47}48return -1;49}5051static int52check_instr_depends_on_tex(nir_intrinsic_instr *store)53{54int texunit = -1;55struct set *instrs = _mesa_set_create(NULL, _mesa_hash_pointer,56_mesa_key_pointer_equal);57nir_instr_worklist *work = nir_instr_worklist_create();5859_mesa_set_add(instrs, &store->instr);60add_src_instr_to_worklist(&store->src[0], work);6162nir_foreach_instr_in_worklist(instr, work) {63/* Don't process an instruction twice */64if (_mesa_set_search(instrs, instr))65continue;6667_mesa_set_add(instrs, instr);6869if (instr->type == nir_instr_type_alu ||70instr->type == nir_instr_type_load_const) {71/* TODO: ubo, etc */72if (!nir_foreach_src(instr, add_src_instr_to_worklist, work))73break;74continue;75} else if (instr->type == nir_instr_type_tex) {76if (texunit != -1) {77/* We can only depend on a single tex */78texunit = -1;79break;80} else {81texunit = get_tex_unit(nir_instr_as_tex(instr));82continue;83}84} else {85break;86}87}8889nir_instr_worklist_destroy(work);90_mesa_set_destroy(instrs, NULL);91return texunit;92}9394static bool95get_output_as_const_value(nir_shader *shader, float values[4])96{97nir_foreach_function(function, shader) {98nir_foreach_block_reverse(block, function->impl) {99nir_foreach_instr_reverse_safe(instr, block) {100switch (instr->type) {101case nir_instr_type_intrinsic: {102nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);103if (intrin->intrinsic == nir_intrinsic_store_output) {104nir_const_value *c = nir_src_as_const_value(intrin->src[0]);105if (c) {106nir_const_value_to_array(values, c, 4, f32);107return true;108}109return false;110}111FALLTHROUGH;112}113default:114continue;115}116}117}118}119return false;120}121122struct replace_param {123float value[4];124int *texunit;125};126127static bool128store_instr_depends_on_tex(nir_builder *b, nir_instr *instr, void *state)129{130if (instr->type != nir_instr_type_intrinsic)131return false;132133nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);134if (intrin->intrinsic != nir_intrinsic_store_output)135return false;136137struct replace_param *p = (struct replace_param*) state;138*(p->texunit) = check_instr_depends_on_tex(intrin);139140return *(p->texunit) != -1;141}142143144static bool145replace_tex_by_imm(nir_builder *b, nir_instr *instr, void *state)146{147if (instr->type != nir_instr_type_tex)148return false;149150nir_tex_instr *tex = nir_instr_as_tex(instr);151struct replace_param *p = (struct replace_param*) state;152153if (get_tex_unit(tex) != *(p->texunit))154return false;155156b->cursor = nir_instr_remove(&tex->instr);157nir_ssa_def *imm = nir_imm_vec4(b, p->value[0], p->value[1], p->value[2], p->value[3]);158nir_ssa_def_rewrite_uses(&tex->dest.ssa, imm);159return true;160}161162163/* This function returns true if a shader' sole output becomes constant when164* a given texunit is replaced by a constant value.165* The input constant value is passed as 'in' and the determined constant166* value is stored in 'out'. The texunit is also remembered.167*/168bool169si_nir_is_output_const_if_tex_is_const(nir_shader *shader, float *in, float *out, int *texunit)170{171assert(shader->info.stage == MESA_SHADER_FRAGMENT);172173if (BITSET_COUNT(shader->info.textures_used) == 0 ||174util_bitcount64(shader->info.outputs_written) != 1)175return false;176177/* Clone the shader */178nir_shader *sh = nir_shader_clone(ralloc_parent(shader), shader);179180struct replace_param p;181memcpy(p.value, in, 4 * sizeof(float));182p.texunit = texunit;183184/* Test if the single store_output only depends on constants and a single texture op */185if (nir_shader_instructions_pass(sh, store_instr_depends_on_tex, nir_metadata_all, &p)) {186assert(*p.texunit != -1);187188/* Replace nir_tex_instr using texunit by vec4(v) */189nir_shader_instructions_pass(sh, replace_tex_by_imm,190nir_metadata_block_index |191nir_metadata_dominance, &p);192193/* Optimize the cloned shader */194bool progress;195do {196progress = false;197NIR_PASS(progress, sh, nir_copy_prop);198NIR_PASS(progress, sh, nir_opt_remove_phis);199NIR_PASS(progress, sh, nir_opt_dce);200NIR_PASS(progress, sh, nir_opt_dead_cf);201NIR_PASS(progress, sh, nir_opt_algebraic);202NIR_PASS(progress, sh, nir_opt_constant_folding);203} while (progress);204205/* Is the output a constant value? */206if (get_output_as_const_value(sh, out)) {207ralloc_free(sh);208return true;209}210}211ralloc_free(sh);212return false;213}214215216