Path: blob/21.2-virgl/src/freedreno/vulkan/tu_nir_lower_multiview.c
4565 views
/*1* Copyright © 2020 Valve Corporation2* SPDX-License-Identifier: MIT3*/45#include "tu_private.h"6#include "nir_builder.h"78/* Some a6xx variants cannot support a non-contiguous multiview mask. Instead,9* inside the shader something like this needs to be inserted:10*11* gl_Position = ((1ull << gl_ViewIndex) & view_mask) ? gl_Position : vec4(0.);12*13* Scan backwards until we find the gl_Position write (there should only be14* one).15*/16static bool17lower_multiview_mask(nir_shader *nir, uint32_t *mask)18{19nir_function_impl *impl = nir_shader_get_entrypoint(nir);2021if (util_is_power_of_two_or_zero(*mask + 1)) {22nir_metadata_preserve(impl, nir_metadata_all);23return false;24}2526nir_builder b;27nir_builder_init(&b, impl);2829uint32_t old_mask = *mask;30*mask = BIT(util_logbase2(old_mask) + 1) - 1;3132nir_foreach_block_reverse(block, impl) {33nir_foreach_instr_reverse(instr, block) {34if (instr->type != nir_instr_type_intrinsic)35continue;3637nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);38if (intrin->intrinsic != nir_intrinsic_store_deref)39continue;4041nir_deref_instr *deref = nir_src_as_deref(intrin->src[0]);42if (!nir_deref_mode_is(deref, nir_var_shader_out))43continue;4445nir_variable *var = nir_deref_instr_get_variable(deref);46if (var->data.location != VARYING_SLOT_POS)47continue;4849assert(intrin->src[1].is_ssa);50nir_ssa_def *orig_src = intrin->src[1].ssa;51b.cursor = nir_before_instr(instr);5253/* ((1ull << gl_ViewIndex) & mask) != 0 */54nir_ssa_def *cmp =55nir_i2b(&b, nir_iand(&b, nir_imm_int(&b, old_mask),56nir_ishl(&b, nir_imm_int(&b, 1),57nir_load_view_index(&b))));5859nir_ssa_def *src = nir_bcsel(&b, cmp, orig_src, nir_imm_float(&b, 0.));60nir_instr_rewrite_src(instr, &intrin->src[1], nir_src_for_ssa(src));6162nir_metadata_preserve(impl, nir_metadata_block_index |63nir_metadata_dominance);64return true;65}66}6768nir_metadata_preserve(impl, nir_metadata_all);69return false;70}7172bool73tu_nir_lower_multiview(nir_shader *nir, uint32_t mask, bool *multi_pos_output,74struct tu_device *dev)75{76*multi_pos_output = false;7778bool progress = false;7980if (!dev->physical_device->info->a6xx.supports_multiview_mask)81NIR_PASS(progress, nir, lower_multiview_mask, &mask);8283unsigned num_views = util_logbase2(mask) + 1;8485/* Blob doesn't apply multipos optimization starting from 11 views86* even on a650, however in practice, with the limit of 16 views,87* tests pass on a640/a650 and fail on a630.88*/89unsigned max_views_for_multipos =90dev->physical_device->info->a6xx.supports_multiview_mask ? 16 : 10;9192/* Speculatively assign output locations so that we know num_outputs. We93* will assign output locations for real after this pass.94*/95unsigned num_outputs;96nir_assign_io_var_locations(nir, nir_var_shader_out, &num_outputs, MESA_SHADER_VERTEX);9798/* In addition to the generic checks done by NIR, check that we don't99* overflow VPC with the extra copies of gl_Position.100*/101if (likely(!(dev->physical_device->instance->debug_flags & TU_DEBUG_NOMULTIPOS)) &&102num_views <= max_views_for_multipos && num_outputs + (num_views - 1) <= 32 &&103nir_can_lower_multiview(nir)) {104*multi_pos_output = true;105106/* It appears that the multiview mask is ignored when multi-position107* output is enabled, so we have to write 0 to inactive views ourselves.108*/109NIR_PASS(progress, nir, lower_multiview_mask, &mask);110111NIR_PASS_V(nir, nir_lower_multiview, mask);112progress = true;113}114115return progress;116}117118119120