Path: blob/21.2-virgl/src/freedreno/vulkan/tu_shader.c
4565 views
/*1* Copyright © 2019 Google LLC2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER20* DEALINGS IN THE SOFTWARE.21*/2223#include "tu_private.h"2425#include "spirv/nir_spirv.h"26#include "util/mesa-sha1.h"27#include "nir/nir_xfb_info.h"28#include "nir/nir_vulkan.h"29#include "vk_util.h"3031#include "ir3/ir3_nir.h"3233nir_shader *34tu_spirv_to_nir(struct tu_device *dev,35const VkPipelineShaderStageCreateInfo *stage_info,36gl_shader_stage stage)37{38/* TODO these are made-up */39const struct spirv_to_nir_options spirv_options = {40.frag_coord_is_sysval = true,4142.ubo_addr_format = nir_address_format_vec2_index_32bit_offset,43.ssbo_addr_format = nir_address_format_vec2_index_32bit_offset,4445/* Accessed via stg/ldg */46.phys_ssbo_addr_format = nir_address_format_64bit_global,4748/* Accessed via the const register file */49.push_const_addr_format = nir_address_format_logical,5051/* Accessed via ldl/stl */52.shared_addr_format = nir_address_format_32bit_offset,5354/* Accessed via stg/ldg (not used with Vulkan?) */55.global_addr_format = nir_address_format_64bit_global,5657/* ViewID is a sysval in geometry stages and an input in the FS */58.view_index_is_input = stage == MESA_SHADER_FRAGMENT,59.caps = {60.transform_feedback = true,61.tessellation = true,62.draw_parameters = true,63.image_read_without_format = true,64.image_write_without_format = true,65.variable_pointers = true,66.stencil_export = true,67.multiview = true,68.shader_viewport_index_layer = true,69.geometry_streams = true,70.device_group = true,71.descriptor_indexing = true,72.descriptor_array_dynamic_indexing = true,73.descriptor_array_non_uniform_indexing = true,74.runtime_descriptor_array = true,75.float_controls = true,76.float16 = true,77.int16 = true,78.storage_16bit = dev->physical_device->info->a6xx.storage_16bit,79.demote_to_helper_invocation = true,80.vk_memory_model = true,81.vk_memory_model_device_scope = true,82.subgroup_basic = true,83.subgroup_ballot = true,84.subgroup_vote = true,85},86};8788const struct nir_lower_compute_system_values_options compute_sysval_options = {89.has_base_workgroup_id = true,90};9192const nir_shader_compiler_options *nir_options =93ir3_get_compiler_options(dev->compiler);9495/* convert VkSpecializationInfo */96const VkSpecializationInfo *spec_info = stage_info->pSpecializationInfo;97struct nir_spirv_specialization *spec = NULL;98uint32_t num_spec = 0;99if (spec_info && spec_info->mapEntryCount) {100spec = calloc(spec_info->mapEntryCount, sizeof(*spec));101if (!spec)102return NULL;103104for (uint32_t i = 0; i < spec_info->mapEntryCount; i++) {105const VkSpecializationMapEntry *entry = &spec_info->pMapEntries[i];106const void *data = spec_info->pData + entry->offset;107assert(data + entry->size <= spec_info->pData + spec_info->dataSize);108spec[i].id = entry->constantID;109switch (entry->size) {110case 8:111spec[i].value.u64 = *(const uint64_t *)data;112break;113case 4:114spec[i].value.u32 = *(const uint32_t *)data;115break;116case 2:117spec[i].value.u16 = *(const uint16_t *)data;118break;119case 1:120spec[i].value.u8 = *(const uint8_t *)data;121break;122default:123assert(!"Invalid spec constant size");124break;125}126spec[i].defined_on_module = false;127}128129num_spec = spec_info->mapEntryCount;130}131132struct vk_shader_module *module =133vk_shader_module_from_handle(stage_info->module);134assert(module->size % 4 == 0);135nir_shader *nir =136spirv_to_nir((void*)module->data, module->size / 4,137spec, num_spec, stage, stage_info->pName,138&spirv_options, nir_options);139140free(spec);141142assert(nir->info.stage == stage);143nir_validate_shader(nir, "after spirv_to_nir");144145if (unlikely(dev->physical_device->instance->debug_flags & TU_DEBUG_NIR)) {146fprintf(stderr, "translated nir:\n");147nir_print_shader(nir, stderr);148}149150/* multi step inlining procedure */151NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);152NIR_PASS_V(nir, nir_lower_returns);153NIR_PASS_V(nir, nir_inline_functions);154NIR_PASS_V(nir, nir_copy_prop);155NIR_PASS_V(nir, nir_opt_deref);156foreach_list_typed_safe(nir_function, func, node, &nir->functions) {157if (!func->is_entrypoint)158exec_node_remove(&func->node);159}160assert(exec_list_length(&nir->functions) == 1);161NIR_PASS_V(nir, nir_lower_variable_initializers, ~nir_var_function_temp);162163/* Split member structs. We do this before lower_io_to_temporaries so that164* it doesn't lower system values to temporaries by accident.165*/166NIR_PASS_V(nir, nir_split_var_copies);167NIR_PASS_V(nir, nir_split_per_member_structs);168169NIR_PASS_V(nir, nir_remove_dead_variables,170nir_var_shader_in | nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,171NULL);172173NIR_PASS_V(nir, nir_propagate_invariant, false);174175NIR_PASS_V(nir, nir_lower_global_vars_to_local);176NIR_PASS_V(nir, nir_split_var_copies);177NIR_PASS_V(nir, nir_lower_var_copies);178179NIR_PASS_V(nir, nir_opt_copy_prop_vars);180NIR_PASS_V(nir, nir_opt_combine_stores, nir_var_all);181182NIR_PASS_V(nir, nir_lower_is_helper_invocation);183184NIR_PASS_V(nir, nir_lower_system_values);185NIR_PASS_V(nir, nir_lower_compute_system_values, &compute_sysval_options);186187NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);188189NIR_PASS_V(nir, nir_lower_frexp);190191ir3_optimize_loop(dev->compiler, nir);192193return nir;194}195196static void197lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,198struct tu_shader *shader)199{200uint32_t base = nir_intrinsic_base(instr);201assert(base % 4 == 0);202assert(base >= shader->push_consts.lo * 16);203base -= shader->push_consts.lo * 16;204205nir_ssa_def *load =206nir_load_uniform(b, instr->num_components, instr->dest.ssa.bit_size,207nir_ushr(b, instr->src[0].ssa, nir_imm_int(b, 2)),208.base = base / 4);209210nir_ssa_def_rewrite_uses(&instr->dest.ssa, load);211212nir_instr_remove(&instr->instr);213}214215static void216lower_vulkan_resource_index(nir_builder *b, nir_intrinsic_instr *instr,217struct tu_shader *shader,218const struct tu_pipeline_layout *layout)219{220nir_ssa_def *vulkan_idx = instr->src[0].ssa;221222unsigned set = nir_intrinsic_desc_set(instr);223unsigned binding = nir_intrinsic_binding(instr);224struct tu_descriptor_set_layout *set_layout = layout->set[set].layout;225struct tu_descriptor_set_binding_layout *binding_layout =226&set_layout->binding[binding];227uint32_t base;228229shader->active_desc_sets |= 1u << set;230231switch (binding_layout->type) {232case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC:233case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC:234base = layout->set[set].dynamic_offset_start +235binding_layout->dynamic_offset_offset;236set = MAX_SETS;237break;238default:239base = binding_layout->offset / (4 * A6XX_TEX_CONST_DWORDS);240break;241}242243nir_ssa_def *def = nir_vec3(b, nir_imm_int(b, set),244nir_iadd(b, nir_imm_int(b, base), vulkan_idx),245nir_imm_int(b, 0));246247nir_ssa_def_rewrite_uses(&instr->dest.ssa, def);248nir_instr_remove(&instr->instr);249}250251static void252lower_vulkan_resource_reindex(nir_builder *b, nir_intrinsic_instr *instr)253{254nir_ssa_def *old_index = instr->src[0].ssa;255nir_ssa_def *delta = instr->src[1].ssa;256257nir_ssa_def *new_index =258nir_vec3(b, nir_channel(b, old_index, 0),259nir_iadd(b, nir_channel(b, old_index, 1), delta),260nir_channel(b, old_index, 2));261262nir_ssa_def_rewrite_uses(&instr->dest.ssa, new_index);263nir_instr_remove(&instr->instr);264}265266static void267lower_load_vulkan_descriptor(nir_intrinsic_instr *intrin)268{269/* Loading the descriptor happens as part of the load/store instruction so270* this is a no-op.271*/272nir_ssa_def_rewrite_uses_src(&intrin->dest.ssa, intrin->src[0]);273nir_instr_remove(&intrin->instr);274}275276static void277lower_ssbo_ubo_intrinsic(nir_builder *b, nir_intrinsic_instr *intrin)278{279const nir_intrinsic_info *info = &nir_intrinsic_infos[intrin->intrinsic];280281/* The bindless base is part of the instruction, which means that part of282* the "pointer" has to be constant. We solve this in the same way the blob283* does, by generating a bunch of if-statements. In the usual case where284* the descriptor set is constant we can skip that, though).285*/286287unsigned buffer_src;288if (intrin->intrinsic == nir_intrinsic_store_ssbo) {289/* This has the value first */290buffer_src = 1;291} else {292buffer_src = 0;293}294295nir_ssa_scalar scalar_idx = nir_ssa_scalar_resolved(intrin->src[buffer_src].ssa, 0);296nir_ssa_def *descriptor_idx = nir_channel(b, intrin->src[buffer_src].ssa, 1);297298nir_ssa_def *results[MAX_SETS + 1] = { NULL };299300if (nir_ssa_scalar_is_const(scalar_idx)) {301nir_ssa_def *bindless =302nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = nir_ssa_scalar_as_uint(scalar_idx));303nir_instr_rewrite_src_ssa(&intrin->instr, &intrin->src[buffer_src], bindless);304return;305}306307nir_ssa_def *base_idx = nir_channel(b, scalar_idx.def, scalar_idx.comp);308for (unsigned i = 0; i < MAX_SETS + 1; i++) {309/* if (base_idx == i) { ... */310nir_if *nif = nir_push_if(b, nir_ieq_imm(b, base_idx, i));311312nir_ssa_def *bindless =313nir_bindless_resource_ir3(b, 32, descriptor_idx, .desc_set = i);314315nir_intrinsic_instr *copy =316nir_intrinsic_instr_create(b->shader, intrin->intrinsic);317318copy->num_components = intrin->num_components;319320for (unsigned src = 0; src < info->num_srcs; src++) {321if (src == buffer_src)322copy->src[src] = nir_src_for_ssa(bindless);323else324copy->src[src] = nir_src_for_ssa(intrin->src[src].ssa);325}326327for (unsigned idx = 0; idx < info->num_indices; idx++) {328copy->const_index[idx] = intrin->const_index[idx];329}330331if (info->has_dest) {332nir_ssa_dest_init(©->instr, ©->dest,333intrin->dest.ssa.num_components,334intrin->dest.ssa.bit_size,335NULL);336results[i] = ©->dest.ssa;337}338339nir_builder_instr_insert(b, ©->instr);340341/* } else { ... */342nir_push_else(b, nif);343}344345nir_ssa_def *result =346nir_ssa_undef(b, intrin->dest.ssa.num_components, intrin->dest.ssa.bit_size);347for (int i = MAX_SETS; i >= 0; i--) {348nir_pop_if(b, NULL);349if (info->has_dest)350result = nir_if_phi(b, results[i], result);351}352353if (info->has_dest)354nir_ssa_def_rewrite_uses(&intrin->dest.ssa, result);355nir_instr_remove(&intrin->instr);356}357358static nir_ssa_def *359build_bindless(nir_builder *b, nir_deref_instr *deref, bool is_sampler,360struct tu_shader *shader,361const struct tu_pipeline_layout *layout)362{363nir_variable *var = nir_deref_instr_get_variable(deref);364365unsigned set = var->data.descriptor_set;366unsigned binding = var->data.binding;367const struct tu_descriptor_set_binding_layout *bind_layout =368&layout->set[set].layout->binding[binding];369370/* input attachments use non bindless workaround */371if (bind_layout->type == VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT) {372const struct glsl_type *glsl_type = glsl_without_array(var->type);373uint32_t idx = var->data.index * 2;374375BITSET_SET_RANGE(b->shader->info.textures_used, idx * 2, ((idx * 2) + (bind_layout->array_size * 2)) - 1);376377/* D24S8 workaround: stencil of D24S8 will be sampled as uint */378if (glsl_get_sampler_result_type(glsl_type) == GLSL_TYPE_UINT)379idx += 1;380381if (deref->deref_type == nir_deref_type_var)382return nir_imm_int(b, idx);383384nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);385return nir_iadd(b, nir_imm_int(b, idx),386nir_imul_imm(b, arr_index, 2));387}388389shader->active_desc_sets |= 1u << set;390391nir_ssa_def *desc_offset;392unsigned descriptor_stride;393unsigned offset = 0;394/* Samplers come second in combined image/sampler descriptors, see395* write_combined_image_sampler_descriptor().396*/397if (is_sampler && bind_layout->type ==398VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER) {399offset = 1;400}401desc_offset =402nir_imm_int(b, (bind_layout->offset / (4 * A6XX_TEX_CONST_DWORDS)) +403offset);404descriptor_stride = bind_layout->size / (4 * A6XX_TEX_CONST_DWORDS);405406if (deref->deref_type != nir_deref_type_var) {407assert(deref->deref_type == nir_deref_type_array);408409nir_ssa_def *arr_index = nir_ssa_for_src(b, deref->arr.index, 1);410desc_offset = nir_iadd(b, desc_offset,411nir_imul_imm(b, arr_index, descriptor_stride));412}413414return nir_bindless_resource_ir3(b, 32, desc_offset, .desc_set = set);415}416417static void418lower_image_deref(nir_builder *b,419nir_intrinsic_instr *instr, struct tu_shader *shader,420const struct tu_pipeline_layout *layout)421{422nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);423nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);424nir_rewrite_image_intrinsic(instr, bindless, true);425}426427static bool428lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,429struct tu_shader *shader,430const struct tu_pipeline_layout *layout)431{432switch (instr->intrinsic) {433case nir_intrinsic_load_push_constant:434lower_load_push_constant(b, instr, shader);435return true;436437case nir_intrinsic_load_vulkan_descriptor:438lower_load_vulkan_descriptor(instr);439return true;440441case nir_intrinsic_vulkan_resource_index:442lower_vulkan_resource_index(b, instr, shader, layout);443return true;444case nir_intrinsic_vulkan_resource_reindex:445lower_vulkan_resource_reindex(b, instr);446return true;447448case nir_intrinsic_load_ubo:449case nir_intrinsic_load_ssbo:450case nir_intrinsic_store_ssbo:451case nir_intrinsic_ssbo_atomic_add:452case nir_intrinsic_ssbo_atomic_imin:453case nir_intrinsic_ssbo_atomic_umin:454case nir_intrinsic_ssbo_atomic_imax:455case nir_intrinsic_ssbo_atomic_umax:456case nir_intrinsic_ssbo_atomic_and:457case nir_intrinsic_ssbo_atomic_or:458case nir_intrinsic_ssbo_atomic_xor:459case nir_intrinsic_ssbo_atomic_exchange:460case nir_intrinsic_ssbo_atomic_comp_swap:461case nir_intrinsic_ssbo_atomic_fadd:462case nir_intrinsic_ssbo_atomic_fmin:463case nir_intrinsic_ssbo_atomic_fmax:464case nir_intrinsic_ssbo_atomic_fcomp_swap:465case nir_intrinsic_get_ssbo_size:466lower_ssbo_ubo_intrinsic(b, instr);467return true;468469case nir_intrinsic_image_deref_load:470case nir_intrinsic_image_deref_store:471case nir_intrinsic_image_deref_atomic_add:472case nir_intrinsic_image_deref_atomic_imin:473case nir_intrinsic_image_deref_atomic_umin:474case nir_intrinsic_image_deref_atomic_imax:475case nir_intrinsic_image_deref_atomic_umax:476case nir_intrinsic_image_deref_atomic_and:477case nir_intrinsic_image_deref_atomic_or:478case nir_intrinsic_image_deref_atomic_xor:479case nir_intrinsic_image_deref_atomic_exchange:480case nir_intrinsic_image_deref_atomic_comp_swap:481case nir_intrinsic_image_deref_size:482case nir_intrinsic_image_deref_samples:483lower_image_deref(b, instr, shader, layout);484return true;485486default:487return false;488}489}490491static void492lower_tex_ycbcr(const struct tu_pipeline_layout *layout,493nir_builder *builder,494nir_tex_instr *tex)495{496int deref_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);497assert(deref_src_idx >= 0);498nir_deref_instr *deref = nir_src_as_deref(tex->src[deref_src_idx].src);499500nir_variable *var = nir_deref_instr_get_variable(deref);501const struct tu_descriptor_set_layout *set_layout =502layout->set[var->data.descriptor_set].layout;503const struct tu_descriptor_set_binding_layout *binding =504&set_layout->binding[var->data.binding];505const struct tu_sampler_ycbcr_conversion *ycbcr_samplers =506tu_immutable_ycbcr_samplers(set_layout, binding);507508if (!ycbcr_samplers)509return;510511/* For the following instructions, we don't apply any change */512if (tex->op == nir_texop_txs ||513tex->op == nir_texop_query_levels ||514tex->op == nir_texop_lod)515return;516517assert(tex->texture_index == 0);518unsigned array_index = 0;519if (deref->deref_type != nir_deref_type_var) {520assert(deref->deref_type == nir_deref_type_array);521if (!nir_src_is_const(deref->arr.index))522return;523array_index = nir_src_as_uint(deref->arr.index);524array_index = MIN2(array_index, binding->array_size - 1);525}526const struct tu_sampler_ycbcr_conversion *ycbcr_sampler = ycbcr_samplers + array_index;527528if (ycbcr_sampler->ycbcr_model == VK_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY)529return;530531builder->cursor = nir_after_instr(&tex->instr);532533uint8_t bits = vk_format_get_component_bits(ycbcr_sampler->format,534UTIL_FORMAT_COLORSPACE_RGB,535PIPE_SWIZZLE_X);536uint32_t bpcs[3] = {bits, bits, bits}; /* TODO: use right bpc for each channel ? */537nir_ssa_def *result = nir_convert_ycbcr_to_rgb(builder,538ycbcr_sampler->ycbcr_model,539ycbcr_sampler->ycbcr_range,540&tex->dest.ssa,541bpcs);542nir_ssa_def_rewrite_uses_after(&tex->dest.ssa, result,543result->parent_instr);544545builder->cursor = nir_before_instr(&tex->instr);546}547548static bool549lower_tex(nir_builder *b, nir_tex_instr *tex,550struct tu_shader *shader, const struct tu_pipeline_layout *layout)551{552lower_tex_ycbcr(layout, b, tex);553554int sampler_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_sampler_deref);555if (sampler_src_idx >= 0) {556nir_deref_instr *deref = nir_src_as_deref(tex->src[sampler_src_idx].src);557nir_ssa_def *bindless = build_bindless(b, deref, true, shader, layout);558nir_instr_rewrite_src(&tex->instr, &tex->src[sampler_src_idx].src,559nir_src_for_ssa(bindless));560tex->src[sampler_src_idx].src_type = nir_tex_src_sampler_handle;561}562563int tex_src_idx = nir_tex_instr_src_index(tex, nir_tex_src_texture_deref);564if (tex_src_idx >= 0) {565nir_deref_instr *deref = nir_src_as_deref(tex->src[tex_src_idx].src);566nir_ssa_def *bindless = build_bindless(b, deref, false, shader, layout);567nir_instr_rewrite_src(&tex->instr, &tex->src[tex_src_idx].src,568nir_src_for_ssa(bindless));569tex->src[tex_src_idx].src_type = nir_tex_src_texture_handle;570571/* for the input attachment case: */572if (bindless->parent_instr->type != nir_instr_type_intrinsic)573tex->src[tex_src_idx].src_type = nir_tex_src_texture_offset;574}575576return true;577}578579static bool580lower_impl(nir_function_impl *impl, struct tu_shader *shader,581const struct tu_pipeline_layout *layout)582{583nir_builder b;584nir_builder_init(&b, impl);585bool progress = false;586587nir_foreach_block(block, impl) {588nir_foreach_instr_safe(instr, block) {589b.cursor = nir_before_instr(instr);590switch (instr->type) {591case nir_instr_type_tex:592progress |= lower_tex(&b, nir_instr_as_tex(instr), shader, layout);593break;594case nir_instr_type_intrinsic:595progress |= lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader, layout);596break;597default:598break;599}600}601}602603if (progress)604nir_metadata_preserve(impl, nir_metadata_none);605else606nir_metadata_preserve(impl, nir_metadata_all);607608return progress;609}610611612/* Figure out the range of push constants that we're actually going to push to613* the shader, and tell the backend to reserve this range when pushing UBO614* constants.615*/616617static void618gather_push_constants(nir_shader *shader, struct tu_shader *tu_shader)619{620uint32_t min = UINT32_MAX, max = 0;621nir_foreach_function(function, shader) {622if (!function->impl)623continue;624625nir_foreach_block(block, function->impl) {626nir_foreach_instr_safe(instr, block) {627if (instr->type != nir_instr_type_intrinsic)628continue;629630nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);631if (intrin->intrinsic != nir_intrinsic_load_push_constant)632continue;633634uint32_t base = nir_intrinsic_base(intrin);635uint32_t range = nir_intrinsic_range(intrin);636min = MIN2(min, base);637max = MAX2(max, base + range);638break;639}640}641}642643if (min >= max) {644tu_shader->push_consts.lo = 0;645tu_shader->push_consts.count = 0;646return;647}648649/* CP_LOAD_STATE OFFSET and NUM_UNIT are in units of vec4 (4 dwords),650* however there's an alignment requirement of 4 on OFFSET. Expand the651* range and change units accordingly.652*/653tu_shader->push_consts.lo = (min / 16) / 4 * 4;654tu_shader->push_consts.count =655align(max, 16) / 16 - tu_shader->push_consts.lo;656}657658static bool659tu_lower_io(nir_shader *shader, struct tu_shader *tu_shader,660const struct tu_pipeline_layout *layout)661{662bool progress = false;663664gather_push_constants(shader, tu_shader);665666nir_foreach_function(function, shader) {667if (function->impl)668progress |= lower_impl(function->impl, tu_shader, layout);669}670671/* Remove now-unused variables so that when we gather the shader info later672* they won't be counted.673*/674675if (progress)676nir_opt_dce(shader);677678progress |=679nir_remove_dead_variables(shader,680nir_var_uniform | nir_var_mem_ubo | nir_var_mem_ssbo,681NULL);682683return progress;684}685686static bool687lower_image_size_filter(const nir_instr *instr, UNUSED const void *data)688{689if (instr->type != nir_instr_type_intrinsic)690return false;691692nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);693if(intrin->intrinsic != nir_intrinsic_bindless_image_size)694return false;695696return (intrin->num_components == 3 && nir_intrinsic_image_dim(intrin) == GLSL_SAMPLER_DIM_CUBE);697}698699/* imageSize() expects the last component of the return value to be the700* number of layers in the texture array. In the case of cube map array,701* it will return a ivec3, with the third component being the number of702* layer-faces. Therefore, we need to divide it by 6 (# faces of the703* cube map).704*/705static nir_ssa_def *706lower_image_size_lower(nir_builder *b, nir_instr *instr, UNUSED void *data)707{708nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);709b->cursor = nir_after_instr(&intrin->instr);710nir_ssa_def *channels[NIR_MAX_VEC_COMPONENTS];711for (unsigned i = 0; i < intrin->num_components; i++) {712channels[i] = nir_vector_extract(b, &intrin->dest.ssa, nir_imm_int(b, i));713}714715channels[2] = nir_idiv(b, channels[2], nir_imm_int(b, 6u));716nir_ssa_def *result = nir_vec(b, channels, intrin->num_components);717718return result;719}720721static bool722tu_lower_image_size(nir_shader *shader)723{724return nir_shader_lower_instructions(shader,725lower_image_size_filter,726lower_image_size_lower,727NULL);728}729730static void731shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)732{733assert(glsl_type_is_vector_or_scalar(type));734735unsigned comp_size =736glsl_type_is_boolean(type) ? 4 : glsl_get_bit_size(type) / 8;737unsigned length = glsl_get_vector_elements(type);738*size = comp_size * length;739*align = comp_size;740}741742static void743tu_gather_xfb_info(nir_shader *nir, struct ir3_stream_output_info *info)744{745nir_xfb_info *xfb = nir_gather_xfb_info(nir, NULL);746747if (!xfb)748return;749750uint8_t output_map[VARYING_SLOT_TESS_MAX];751memset(output_map, 0, sizeof(output_map));752753nir_foreach_shader_out_variable(var, nir) {754unsigned slots =755var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)756: glsl_count_attribute_slots(var->type, false);757for (unsigned i = 0; i < slots; i++)758output_map[var->data.location + i] = var->data.driver_location + i;759}760761assert(xfb->output_count < IR3_MAX_SO_OUTPUTS);762info->num_outputs = xfb->output_count;763764for (int i = 0; i < IR3_MAX_SO_BUFFERS; i++) {765info->stride[i] = xfb->buffers[i].stride / 4;766info->buffer_to_stream[i] = xfb->buffer_to_stream[i];767}768769info->streams_written = xfb->streams_written;770771for (int i = 0; i < xfb->output_count; i++) {772info->output[i].register_index = output_map[xfb->outputs[i].location];773info->output[i].start_component = xfb->outputs[i].component_offset;774info->output[i].num_components =775util_bitcount(xfb->outputs[i].component_mask);776info->output[i].output_buffer = xfb->outputs[i].buffer;777info->output[i].dst_offset = xfb->outputs[i].offset / 4;778info->output[i].stream = xfb->buffer_to_stream[xfb->outputs[i].buffer];779}780781ralloc_free(xfb);782}783784struct tu_shader *785tu_shader_create(struct tu_device *dev,786nir_shader *nir,787unsigned multiview_mask,788struct tu_pipeline_layout *layout,789const VkAllocationCallbacks *alloc)790{791struct tu_shader *shader;792793shader = vk_zalloc2(794&dev->vk.alloc, alloc,795sizeof(*shader),7968, VK_SYSTEM_ALLOCATION_SCOPE_COMMAND);797if (!shader)798return NULL;799800if (nir->info.stage == MESA_SHADER_FRAGMENT) {801NIR_PASS_V(nir, nir_lower_input_attachments,802&(nir_input_attachment_options) {803.use_fragcoord_sysval = true,804.use_layer_id_sysval = false,805/* When using multiview rendering, we must use806* gl_ViewIndex as the layer id to pass to the texture807* sampling function. gl_Layer doesn't work when808* multiview is enabled.809*/810.use_view_id_for_layer = multiview_mask != 0,811});812}813814/* This needs to happen before multiview lowering which rewrites store815* instructions of the position variable, so that we can just rewrite one816* store at the end instead of having to rewrite every store specified by817* the user.818*/819ir3_nir_lower_io_to_temporaries(nir);820821if (nir->info.stage == MESA_SHADER_VERTEX && multiview_mask) {822tu_nir_lower_multiview(nir, multiview_mask,823&shader->multi_pos_output, dev);824}825826NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,827nir_address_format_32bit_offset);828829NIR_PASS_V(nir, nir_lower_explicit_io,830nir_var_mem_ubo | nir_var_mem_ssbo,831nir_address_format_vec2_index_32bit_offset);832833if (nir->info.stage == MESA_SHADER_COMPUTE) {834NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,835nir_var_mem_shared, shared_type_info);836NIR_PASS_V(nir, nir_lower_explicit_io,837nir_var_mem_shared,838nir_address_format_32bit_offset);839}840841nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);842nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs, nir->info.stage);843844/* Gather information for transform feedback. This should be called after:845* - nir_split_per_member_structs.846* - nir_remove_dead_variables with varyings, so that we could align847* stream outputs correctly.848* - nir_assign_io_var_locations - to have valid driver_location849*/850struct ir3_stream_output_info so_info = {};851if (nir->info.stage == MESA_SHADER_VERTEX ||852nir->info.stage == MESA_SHADER_TESS_EVAL ||853nir->info.stage == MESA_SHADER_GEOMETRY)854tu_gather_xfb_info(nir, &so_info);855856NIR_PASS_V(nir, tu_lower_io, shader, layout);857858NIR_PASS_V(nir, tu_lower_image_size);859860nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));861862ir3_finalize_nir(dev->compiler, nir);863864shader->ir3_shader =865ir3_shader_from_nir(dev->compiler, nir,866align(shader->push_consts.count, 4),867&so_info);868869return shader;870}871872void873tu_shader_destroy(struct tu_device *dev,874struct tu_shader *shader,875const VkAllocationCallbacks *alloc)876{877ir3_shader_destroy(shader->ir3_shader);878879vk_free2(&dev->vk.alloc, alloc, shader);880}881882883