Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_pipeline.c
4560 views
/*1* Copyright © 2019 Raspberry Pi2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "vk_util.h"2425#include "v3dv_debug.h"26#include "v3dv_private.h"2728#include "vk_format_info.h"2930#include "common/v3d_debug.h"3132#include "compiler/nir/nir_builder.h"33#include "nir/nir_serialize.h"3435#include "util/u_atomic.h"3637#include "vulkan/util/vk_format.h"3839static VkResult40compute_vpm_config(struct v3dv_pipeline *pipeline);4142void43v3dv_print_v3d_key(struct v3d_key *key,44uint32_t v3d_key_size)45{46struct mesa_sha1 ctx;47unsigned char sha1[20];48char sha1buf[41];4950_mesa_sha1_init(&ctx);5152_mesa_sha1_update(&ctx, key, v3d_key_size);5354_mesa_sha1_final(&ctx, sha1);55_mesa_sha1_format(sha1buf, sha1);5657fprintf(stderr, "key %p: %s\n", key, sha1buf);58}5960static void61pipeline_compute_sha1_from_nir(nir_shader *nir,62unsigned char sha1[20])63{64assert(nir);65struct blob blob;66blob_init(&blob);6768nir_serialize(&blob, nir, false);69if (!blob.out_of_memory)70_mesa_sha1_compute(blob.data, blob.size, sha1);7172blob_finish(&blob);73}7475void76v3dv_shader_module_internal_init(struct v3dv_device *device,77struct vk_shader_module *module,78nir_shader *nir)79{80vk_object_base_init(&device->vk, &module->base,81VK_OBJECT_TYPE_SHADER_MODULE);82module->nir = nir;83module->size = 0;8485pipeline_compute_sha1_from_nir(nir, module->sha1);86}8788void89v3dv_shader_variant_destroy(struct v3dv_device *device,90struct v3dv_shader_variant *variant)91{92/* The assembly BO is shared by all variants in the pipeline, so it can't93* be freed here and should be freed with the pipeline94*/95ralloc_free(variant->prog_data.base);96vk_free(&device->vk.alloc, variant);97}9899static void100destroy_pipeline_stage(struct v3dv_device *device,101struct v3dv_pipeline_stage *p_stage,102const VkAllocationCallbacks *pAllocator)103{104if (!p_stage)105return;106107ralloc_free(p_stage->nir);108vk_free2(&device->vk.alloc, pAllocator, p_stage);109}110111static void112pipeline_free_stages(struct v3dv_device *device,113struct v3dv_pipeline *pipeline,114const VkAllocationCallbacks *pAllocator)115{116assert(pipeline);117118/* FIXME: we can't just use a loop over mesa stage due the bin, would be119* good to find an alternative.120*/121destroy_pipeline_stage(device, pipeline->vs, pAllocator);122destroy_pipeline_stage(device, pipeline->vs_bin, pAllocator);123destroy_pipeline_stage(device, pipeline->gs, pAllocator);124destroy_pipeline_stage(device, pipeline->gs_bin, pAllocator);125destroy_pipeline_stage(device, pipeline->fs, pAllocator);126destroy_pipeline_stage(device, pipeline->cs, pAllocator);127128pipeline->vs = NULL;129pipeline->vs_bin = NULL;130pipeline->gs = NULL;131pipeline->gs_bin = NULL;132pipeline->fs = NULL;133pipeline->cs = NULL;134}135136static void137v3dv_destroy_pipeline(struct v3dv_pipeline *pipeline,138struct v3dv_device *device,139const VkAllocationCallbacks *pAllocator)140{141if (!pipeline)142return;143144pipeline_free_stages(device, pipeline, pAllocator);145146if (pipeline->shared_data) {147v3dv_pipeline_shared_data_unref(device, pipeline->shared_data);148pipeline->shared_data = NULL;149}150151if (pipeline->spill.bo) {152assert(pipeline->spill.size_per_thread > 0);153v3dv_bo_free(device, pipeline->spill.bo);154}155156if (pipeline->default_attribute_values) {157v3dv_bo_free(device, pipeline->default_attribute_values);158pipeline->default_attribute_values = NULL;159}160161vk_object_free(&device->vk, pAllocator, pipeline);162}163164VKAPI_ATTR void VKAPI_CALL165v3dv_DestroyPipeline(VkDevice _device,166VkPipeline _pipeline,167const VkAllocationCallbacks *pAllocator)168{169V3DV_FROM_HANDLE(v3dv_device, device, _device);170V3DV_FROM_HANDLE(v3dv_pipeline, pipeline, _pipeline);171172if (!pipeline)173return;174175v3dv_destroy_pipeline(pipeline, device, pAllocator);176}177178static const struct spirv_to_nir_options default_spirv_options = {179.caps = {180.device_group = true,181.variable_pointers = true,182.subgroup_basic = true,183},184.ubo_addr_format = nir_address_format_32bit_index_offset,185.ssbo_addr_format = nir_address_format_32bit_index_offset,186.phys_ssbo_addr_format = nir_address_format_64bit_global,187.push_const_addr_format = nir_address_format_logical,188.shared_addr_format = nir_address_format_32bit_offset,189.frag_coord_is_sysval = false,190};191192const nir_shader_compiler_options v3dv_nir_options = {193.lower_add_sat = true,194.lower_all_io_to_temps = true,195.lower_extract_byte = true,196.lower_extract_word = true,197.lower_insert_byte = true,198.lower_insert_word = true,199.lower_bitfield_insert_to_shifts = true,200.lower_bitfield_extract_to_shifts = true,201.lower_bitfield_reverse = true,202.lower_bit_count = true,203.lower_cs_local_id_from_index = true,204.lower_ffract = true,205.lower_fmod = true,206.lower_pack_unorm_2x16 = true,207.lower_pack_snorm_2x16 = true,208.lower_unpack_unorm_2x16 = true,209.lower_unpack_snorm_2x16 = true,210.lower_pack_unorm_4x8 = true,211.lower_pack_snorm_4x8 = true,212.lower_unpack_unorm_4x8 = true,213.lower_unpack_snorm_4x8 = true,214.lower_pack_half_2x16 = true,215.lower_unpack_half_2x16 = true,216/* FIXME: see if we can avoid the uadd_carry and usub_borrow lowering and217* get the tests to pass since it might produce slightly better code.218*/219.lower_uadd_carry = true,220.lower_usub_borrow = true,221/* FIXME: check if we can use multop + umul24 to implement mul2x32_64222* without lowering.223*/224.lower_mul_2x32_64 = true,225.lower_fdiv = true,226.lower_find_lsb = true,227.lower_ffma16 = true,228.lower_ffma32 = true,229.lower_ffma64 = true,230.lower_flrp32 = true,231.lower_fpow = true,232.lower_fsat = true,233.lower_fsqrt = true,234.lower_ifind_msb = true,235.lower_isign = true,236.lower_ldexp = true,237.lower_mul_high = true,238.lower_wpos_pntc = true,239.lower_rotate = true,240.lower_to_scalar = true,241.lower_device_index_to_zero = true,242.has_fsub = true,243.has_isub = true,244.vertex_id_zero_based = false, /* FIXME: to set this to true, the intrinsic245* needs to be supported */246.lower_interpolate_at = true,247.max_unroll_iterations = 16,248.divergence_analysis_options =249nir_divergence_multiple_workgroup_per_compute_subgroup250};251252const nir_shader_compiler_options *253v3dv_pipeline_get_nir_options(void)254{255return &v3dv_nir_options;256}257258#define OPT(pass, ...) ({ \259bool this_progress = false; \260NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__); \261if (this_progress) \262progress = true; \263this_progress; \264})265266static void267nir_optimize(nir_shader *nir,268struct v3dv_pipeline_stage *stage,269bool allow_copies)270{271bool progress;272273do {274progress = false;275OPT(nir_split_array_vars, nir_var_function_temp);276OPT(nir_shrink_vec_array_vars, nir_var_function_temp);277OPT(nir_opt_deref);278OPT(nir_lower_vars_to_ssa);279if (allow_copies) {280/* Only run this pass in the first call to nir_optimize. Later calls281* assume that we've lowered away any copy_deref instructions and we282* don't want to introduce any more.283*/284OPT(nir_opt_find_array_copies);285}286OPT(nir_opt_copy_prop_vars);287OPT(nir_opt_dead_write_vars);288OPT(nir_opt_combine_stores, nir_var_all);289290OPT(nir_lower_alu_to_scalar, NULL, NULL);291292OPT(nir_copy_prop);293OPT(nir_lower_phis_to_scalar, false);294295OPT(nir_copy_prop);296OPT(nir_opt_dce);297OPT(nir_opt_cse);298OPT(nir_opt_combine_stores, nir_var_all);299300/* Passing 0 to the peephole select pass causes it to convert301* if-statements that contain only move instructions in the branches302* regardless of the count.303*304* Passing 1 to the peephole select pass causes it to convert305* if-statements that contain at most a single ALU instruction (total)306* in both branches.307*/308OPT(nir_opt_peephole_select, 0, false, false);309OPT(nir_opt_peephole_select, 8, false, true);310311OPT(nir_opt_intrinsics);312OPT(nir_opt_idiv_const, 32);313OPT(nir_opt_algebraic);314OPT(nir_opt_constant_folding);315316OPT(nir_opt_dead_cf);317318OPT(nir_opt_if, false);319OPT(nir_opt_conditional_discard);320321OPT(nir_opt_remove_phis);322OPT(nir_opt_undef);323OPT(nir_lower_pack);324} while (progress);325326OPT(nir_remove_dead_variables, nir_var_function_temp, NULL);327}328329static void330preprocess_nir(nir_shader *nir,331struct v3dv_pipeline_stage *stage)332{333/* Make sure we lower variable initializers on output variables so that334* nir_remove_dead_variables below sees the corresponding stores335*/336NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_shader_out);337338/* Now that we've deleted all but the main function, we can go ahead and339* lower the rest of the variable initializers.340*/341NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);342343/* Split member structs. We do this before lower_io_to_temporaries so that344* it doesn't lower system values to temporaries by accident.345*/346NIR_PASS_V(nir, nir_split_var_copies);347NIR_PASS_V(nir, nir_split_per_member_structs);348349if (nir->info.stage == MESA_SHADER_FRAGMENT)350NIR_PASS_V(nir, nir_lower_io_to_vector, nir_var_shader_out);351if (nir->info.stage == MESA_SHADER_FRAGMENT) {352NIR_PASS_V(nir, nir_lower_input_attachments,353&(nir_input_attachment_options) {354.use_fragcoord_sysval = false,355});356}357358NIR_PASS_V(nir, nir_lower_explicit_io,359nir_var_mem_push_const,360nir_address_format_32bit_offset);361362NIR_PASS_V(nir, nir_lower_explicit_io,363nir_var_mem_ubo | nir_var_mem_ssbo,364nir_address_format_32bit_index_offset);365366NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_in |367nir_var_shader_out | nir_var_system_value | nir_var_mem_shared,368NULL);369370NIR_PASS_V(nir, nir_propagate_invariant, false);371NIR_PASS_V(nir, nir_lower_io_to_temporaries,372nir_shader_get_entrypoint(nir), true, false);373374NIR_PASS_V(nir, nir_lower_system_values);375NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);376377NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);378379NIR_PASS_V(nir, nir_normalize_cubemap_coords);380381NIR_PASS_V(nir, nir_lower_global_vars_to_local);382383NIR_PASS_V(nir, nir_split_var_copies);384NIR_PASS_V(nir, nir_split_struct_vars, nir_var_function_temp);385386nir_optimize(nir, stage, true);387388NIR_PASS_V(nir, nir_lower_load_const_to_scalar);389390/* Lower a bunch of stuff */391NIR_PASS_V(nir, nir_lower_var_copies);392393NIR_PASS_V(nir, nir_lower_indirect_derefs, nir_var_shader_in, UINT32_MAX);394395NIR_PASS_V(nir, nir_lower_indirect_derefs,396nir_var_function_temp, 2);397398NIR_PASS_V(nir, nir_lower_array_deref_of_vec,399nir_var_mem_ubo | nir_var_mem_ssbo,400nir_lower_direct_array_deref_of_vec_load);401402NIR_PASS_V(nir, nir_lower_frexp);403404/* Get rid of split copies */405nir_optimize(nir, stage, false);406}407408/* FIXME: This is basically the same code at anv, tu and radv. Move to common409* place?410*/411static struct nir_spirv_specialization*412vk_spec_info_to_nir_spirv(const VkSpecializationInfo *spec_info,413uint32_t *out_num_spec_entries)414{415if (spec_info == NULL || spec_info->mapEntryCount == 0)416return NULL;417418uint32_t num_spec_entries = spec_info->mapEntryCount;419struct nir_spirv_specialization *spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));420421for (uint32_t i = 0; i < num_spec_entries; i++) {422VkSpecializationMapEntry entry = spec_info->pMapEntries[i];423const void *data = spec_info->pData + entry.offset;424assert(data + entry.size <= spec_info->pData + spec_info->dataSize);425426spec_entries[i].id = spec_info->pMapEntries[i].constantID;427switch (entry.size) {428case 8:429spec_entries[i].value.u64 = *(const uint64_t *)data;430break;431case 4:432spec_entries[i].value.u32 = *(const uint32_t *)data;433break;434case 2:435spec_entries[i].value.u16 = *(const uint16_t *)data;436break;437case 1:438spec_entries[i].value.u8 = *(const uint8_t *)data;439break;440default:441assert(!"Invalid spec constant size");442break;443}444}445446*out_num_spec_entries = num_spec_entries;447return spec_entries;448}449450static nir_shader *451shader_module_compile_to_nir(struct v3dv_device *device,452struct v3dv_pipeline_stage *stage)453{454nir_shader *nir;455const nir_shader_compiler_options *nir_options = &v3dv_nir_options;456457if (!stage->module->nir) {458uint32_t *spirv = (uint32_t *) stage->module->data;459assert(stage->module->size % 4 == 0);460461if (V3D_DEBUG & V3D_DEBUG_DUMP_SPIRV)462v3dv_print_spirv(stage->module->data, stage->module->size, stderr);463464uint32_t num_spec_entries = 0;465struct nir_spirv_specialization *spec_entries =466vk_spec_info_to_nir_spirv(stage->spec_info, &num_spec_entries);467const struct spirv_to_nir_options spirv_options = default_spirv_options;468nir = spirv_to_nir(spirv, stage->module->size / 4,469spec_entries, num_spec_entries,470broadcom_shader_stage_to_gl(stage->stage),471stage->entrypoint,472&spirv_options, nir_options);473assert(nir);474nir_validate_shader(nir, "after spirv_to_nir");475free(spec_entries);476} else {477/* For NIR modules created by the driver we can't consume the NIR478* directly, we need to clone it first, since ownership of the NIR code479* (as with SPIR-V code for SPIR-V shaders), belongs to the creator480* of the module and modules can be destroyed immediately after been used481* to create pipelines.482*/483nir = nir_shader_clone(NULL, stage->module->nir);484nir_validate_shader(nir, "nir module");485}486assert(nir->info.stage == broadcom_shader_stage_to_gl(stage->stage));487488if (V3D_DEBUG & (V3D_DEBUG_NIR |489v3d_debug_flag_for_shader_stage(490broadcom_shader_stage_to_gl(stage->stage)))) {491fprintf(stderr, "Initial form: %s prog %d NIR:\n",492broadcom_shader_stage_name(stage->stage),493stage->program_id);494nir_print_shader(nir, stderr);495fprintf(stderr, "\n");496}497498/* We have to lower away local variable initializers right before we499* inline functions. That way they get properly initialized at the top500* of the function and not at the top of its caller.501*/502NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);503NIR_PASS_V(nir, nir_lower_returns);504NIR_PASS_V(nir, nir_inline_functions);505NIR_PASS_V(nir, nir_opt_deref);506507/* Pick off the single entrypoint that we want */508foreach_list_typed_safe(nir_function, func, node, &nir->functions) {509if (func->is_entrypoint)510func->name = ralloc_strdup(func, "main");511else512exec_node_remove(&func->node);513}514assert(exec_list_length(&nir->functions) == 1);515516/* Vulkan uses the separate-shader linking model */517nir->info.separate_shader = true;518519preprocess_nir(nir, stage);520521return nir;522}523524static int525type_size_vec4(const struct glsl_type *type, bool bindless)526{527return glsl_count_attribute_slots(type, false);528}529530/* FIXME: the number of parameters for this method is somewhat big. Perhaps531* rethink.532*/533static unsigned534descriptor_map_add(struct v3dv_descriptor_map *map,535int set,536int binding,537int array_index,538int array_size,539uint8_t return_size)540{541assert(array_index < array_size);542assert(return_size == 16 || return_size == 32);543544unsigned index = 0;545for (unsigned i = 0; i < map->num_desc; i++) {546if (set == map->set[i] &&547binding == map->binding[i] &&548array_index == map->array_index[i]) {549assert(array_size == map->array_size[i]);550if (return_size != map->return_size[index]) {551/* It the return_size is different it means that the same sampler552* was used for operations with different precision553* requirement. In this case we need to ensure that we use the554* larger one.555*/556map->return_size[index] = 32;557}558return index;559}560index++;561}562563assert(index == map->num_desc);564565map->set[map->num_desc] = set;566map->binding[map->num_desc] = binding;567map->array_index[map->num_desc] = array_index;568map->array_size[map->num_desc] = array_size;569map->return_size[map->num_desc] = return_size;570map->num_desc++;571572return index;573}574575576static void577lower_load_push_constant(nir_builder *b, nir_intrinsic_instr *instr,578struct v3dv_pipeline *pipeline)579{580assert(instr->intrinsic == nir_intrinsic_load_push_constant);581instr->intrinsic = nir_intrinsic_load_uniform;582}583584static struct v3dv_descriptor_map*585pipeline_get_descriptor_map(struct v3dv_pipeline *pipeline,586VkDescriptorType desc_type,587gl_shader_stage gl_stage,588bool is_sampler)589{590enum broadcom_shader_stage broadcom_stage =591gl_shader_stage_to_broadcom(gl_stage);592593assert(pipeline->shared_data &&594pipeline->shared_data->maps[broadcom_stage]);595596switch(desc_type) {597case VK_DESCRIPTOR_TYPE_SAMPLER:598return &pipeline->shared_data->maps[broadcom_stage]->sampler_map;599case VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE:600case VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT:601case VK_DESCRIPTOR_TYPE_STORAGE_IMAGE:602case VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER:603case VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER:604return &pipeline->shared_data->maps[broadcom_stage]->texture_map;605case VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER:606return is_sampler ?607&pipeline->shared_data->maps[broadcom_stage]->sampler_map :608&pipeline->shared_data->maps[broadcom_stage]->texture_map;609case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:610return &pipeline->shared_data->maps[broadcom_stage]->ubo_map;611case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER:612return &pipeline->shared_data->maps[broadcom_stage]->ssbo_map;613default:614unreachable("Descriptor type unknown or not having a descriptor map");615}616}617618/* Gathers info from the intrinsic (set and binding) and then lowers it so it619* could be used by the v3d_compiler */620static void621lower_vulkan_resource_index(nir_builder *b,622nir_intrinsic_instr *instr,623nir_shader *shader,624struct v3dv_pipeline *pipeline,625const struct v3dv_pipeline_layout *layout)626{627assert(instr->intrinsic == nir_intrinsic_vulkan_resource_index);628629nir_const_value *const_val = nir_src_as_const_value(instr->src[0]);630631unsigned set = nir_intrinsic_desc_set(instr);632unsigned binding = nir_intrinsic_binding(instr);633struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;634struct v3dv_descriptor_set_binding_layout *binding_layout =635&set_layout->binding[binding];636unsigned index = 0;637const VkDescriptorType desc_type = nir_intrinsic_desc_type(instr);638639switch (desc_type) {640case VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER:641case VK_DESCRIPTOR_TYPE_STORAGE_BUFFER: {642struct v3dv_descriptor_map *descriptor_map =643pipeline_get_descriptor_map(pipeline, desc_type, shader->info.stage, false);644645if (!const_val)646unreachable("non-constant vulkan_resource_index array index");647648index = descriptor_map_add(descriptor_map, set, binding,649const_val->u32,650binding_layout->array_size,65132 /* return_size: doesn't really apply for this case */);652653if (desc_type == VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER) {654/* skip index 0 which is used for push constants */655index++;656}657break;658}659660default:661unreachable("unsupported desc_type for vulkan_resource_index");662break;663}664665/* Since we use the deref pass, both vulkan_resource_index and666* vulkan_load_descriptor return a vec2 providing an index and667* offset. Our backend compiler only cares about the index part.668*/669nir_ssa_def_rewrite_uses(&instr->dest.ssa,670nir_imm_ivec2(b, index, 0));671nir_instr_remove(&instr->instr);672}673674/* Returns return_size, so it could be used for the case of not having a675* sampler object676*/677static uint8_t678lower_tex_src_to_offset(nir_builder *b, nir_tex_instr *instr, unsigned src_idx,679nir_shader *shader,680struct v3dv_pipeline *pipeline,681const struct v3dv_pipeline_layout *layout)682{683nir_ssa_def *index = NULL;684unsigned base_index = 0;685unsigned array_elements = 1;686nir_tex_src *src = &instr->src[src_idx];687bool is_sampler = src->src_type == nir_tex_src_sampler_deref;688689/* We compute first the offsets */690nir_deref_instr *deref = nir_instr_as_deref(src->src.ssa->parent_instr);691while (deref->deref_type != nir_deref_type_var) {692assert(deref->parent.is_ssa);693nir_deref_instr *parent =694nir_instr_as_deref(deref->parent.ssa->parent_instr);695696assert(deref->deref_type == nir_deref_type_array);697698if (nir_src_is_const(deref->arr.index) && index == NULL) {699/* We're still building a direct index */700base_index += nir_src_as_uint(deref->arr.index) * array_elements;701} else {702if (index == NULL) {703/* We used to be direct but not anymore */704index = nir_imm_int(b, base_index);705base_index = 0;706}707708index = nir_iadd(b, index,709nir_imul(b, nir_imm_int(b, array_elements),710nir_ssa_for_src(b, deref->arr.index, 1)));711}712713array_elements *= glsl_get_length(parent->type);714715deref = parent;716}717718if (index)719index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));720721/* We have the offsets, we apply them, rewriting the source or removing722* instr if needed723*/724if (index) {725nir_instr_rewrite_src(&instr->instr, &src->src,726nir_src_for_ssa(index));727728src->src_type = is_sampler ?729nir_tex_src_sampler_offset :730nir_tex_src_texture_offset;731} else {732nir_tex_instr_remove_src(instr, src_idx);733}734735uint32_t set = deref->var->data.descriptor_set;736uint32_t binding = deref->var->data.binding;737/* FIXME: this is a really simplified check for the precision to be used738* for the sampling. Right now we are ony checking for the variables used739* on the operation itself, but there are other cases that we could use to740* infer the precision requirement.741*/742bool relaxed_precision = deref->var->data.precision == GLSL_PRECISION_MEDIUM ||743deref->var->data.precision == GLSL_PRECISION_LOW;744struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;745struct v3dv_descriptor_set_binding_layout *binding_layout =746&set_layout->binding[binding];747748/* For input attachments, the shader includes the attachment_idx. As we are749* treating them as a texture, we only want the base_index750*/751uint32_t array_index = binding_layout->type != VK_DESCRIPTOR_TYPE_INPUT_ATTACHMENT ?752deref->var->data.index + base_index :753base_index;754755uint8_t return_size = relaxed_precision || instr->is_shadow ? 16 : 32;756757struct v3dv_descriptor_map *map =758pipeline_get_descriptor_map(pipeline, binding_layout->type,759shader->info.stage, is_sampler);760int desc_index =761descriptor_map_add(map,762deref->var->data.descriptor_set,763deref->var->data.binding,764array_index,765binding_layout->array_size,766return_size);767768if (is_sampler)769instr->sampler_index = desc_index;770else771instr->texture_index = desc_index;772773return return_size;774}775776static bool777lower_sampler(nir_builder *b, nir_tex_instr *instr,778nir_shader *shader,779struct v3dv_pipeline *pipeline,780const struct v3dv_pipeline_layout *layout)781{782uint8_t return_size = 0;783784int texture_idx =785nir_tex_instr_src_index(instr, nir_tex_src_texture_deref);786787if (texture_idx >= 0)788return_size = lower_tex_src_to_offset(b, instr, texture_idx, shader,789pipeline, layout);790791int sampler_idx =792nir_tex_instr_src_index(instr, nir_tex_src_sampler_deref);793794if (sampler_idx >= 0)795lower_tex_src_to_offset(b, instr, sampler_idx, shader, pipeline, layout);796797if (texture_idx < 0 && sampler_idx < 0)798return false;799800/* If we don't have a sampler, we assign it the idx we reserve for this801* case, and we ensure that it is using the correct return size.802*/803if (sampler_idx < 0) {804instr->sampler_index = return_size == 16 ?805V3DV_NO_SAMPLER_16BIT_IDX : V3DV_NO_SAMPLER_32BIT_IDX;806}807808return true;809}810811/* FIXME: really similar to lower_tex_src_to_offset, perhaps refactor? */812static void813lower_image_deref(nir_builder *b,814nir_intrinsic_instr *instr,815nir_shader *shader,816struct v3dv_pipeline *pipeline,817const struct v3dv_pipeline_layout *layout)818{819nir_deref_instr *deref = nir_src_as_deref(instr->src[0]);820nir_ssa_def *index = NULL;821unsigned array_elements = 1;822unsigned base_index = 0;823824while (deref->deref_type != nir_deref_type_var) {825assert(deref->parent.is_ssa);826nir_deref_instr *parent =827nir_instr_as_deref(deref->parent.ssa->parent_instr);828829assert(deref->deref_type == nir_deref_type_array);830831if (nir_src_is_const(deref->arr.index) && index == NULL) {832/* We're still building a direct index */833base_index += nir_src_as_uint(deref->arr.index) * array_elements;834} else {835if (index == NULL) {836/* We used to be direct but not anymore */837index = nir_imm_int(b, base_index);838base_index = 0;839}840841index = nir_iadd(b, index,842nir_imul(b, nir_imm_int(b, array_elements),843nir_ssa_for_src(b, deref->arr.index, 1)));844}845846array_elements *= glsl_get_length(parent->type);847848deref = parent;849}850851if (index)852index = nir_umin(b, index, nir_imm_int(b, array_elements - 1));853854uint32_t set = deref->var->data.descriptor_set;855uint32_t binding = deref->var->data.binding;856struct v3dv_descriptor_set_layout *set_layout = layout->set[set].layout;857struct v3dv_descriptor_set_binding_layout *binding_layout =858&set_layout->binding[binding];859860uint32_t array_index = deref->var->data.index + base_index;861862assert(binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_IMAGE ||863binding_layout->type == VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER);864865struct v3dv_descriptor_map *map =866pipeline_get_descriptor_map(pipeline, binding_layout->type,867shader->info.stage, false);868869int desc_index =870descriptor_map_add(map,871deref->var->data.descriptor_set,872deref->var->data.binding,873array_index,874binding_layout->array_size,87532 /* return_size: doesn't apply for textures */);876877/* Note: we don't need to do anything here in relation to the precision and878* the output size because for images we can infer that info from the image879* intrinsic, that includes the image format (see880* NIR_INTRINSIC_FORMAT). That is done by the v3d compiler.881*/882883index = nir_imm_int(b, desc_index);884885nir_rewrite_image_intrinsic(instr, index, false);886}887888static bool889lower_intrinsic(nir_builder *b, nir_intrinsic_instr *instr,890nir_shader *shader,891struct v3dv_pipeline *pipeline,892const struct v3dv_pipeline_layout *layout)893{894switch (instr->intrinsic) {895case nir_intrinsic_load_layer_id:896/* FIXME: if layered rendering gets supported, this would need a real897* lowering898*/899nir_ssa_def_rewrite_uses(&instr->dest.ssa,900nir_imm_int(b, 0));901nir_instr_remove(&instr->instr);902return true;903904case nir_intrinsic_load_push_constant:905lower_load_push_constant(b, instr, pipeline);906return true;907908case nir_intrinsic_vulkan_resource_index:909lower_vulkan_resource_index(b, instr, shader, pipeline, layout);910return true;911912case nir_intrinsic_load_vulkan_descriptor: {913/* Loading the descriptor happens as part of load/store instructions,914* so for us this is a no-op.915*/916nir_ssa_def_rewrite_uses(&instr->dest.ssa, instr->src[0].ssa);917nir_instr_remove(&instr->instr);918return true;919}920921case nir_intrinsic_image_deref_load:922case nir_intrinsic_image_deref_store:923case nir_intrinsic_image_deref_atomic_add:924case nir_intrinsic_image_deref_atomic_imin:925case nir_intrinsic_image_deref_atomic_umin:926case nir_intrinsic_image_deref_atomic_imax:927case nir_intrinsic_image_deref_atomic_umax:928case nir_intrinsic_image_deref_atomic_and:929case nir_intrinsic_image_deref_atomic_or:930case nir_intrinsic_image_deref_atomic_xor:931case nir_intrinsic_image_deref_atomic_exchange:932case nir_intrinsic_image_deref_atomic_comp_swap:933case nir_intrinsic_image_deref_size:934case nir_intrinsic_image_deref_samples:935lower_image_deref(b, instr, shader, pipeline, layout);936return true;937938default:939return false;940}941}942943static bool944lower_impl(nir_function_impl *impl,945nir_shader *shader,946struct v3dv_pipeline *pipeline,947const struct v3dv_pipeline_layout *layout)948{949nir_builder b;950nir_builder_init(&b, impl);951bool progress = false;952953nir_foreach_block(block, impl) {954nir_foreach_instr_safe(instr, block) {955b.cursor = nir_before_instr(instr);956switch (instr->type) {957case nir_instr_type_tex:958progress |=959lower_sampler(&b, nir_instr_as_tex(instr), shader, pipeline, layout);960break;961case nir_instr_type_intrinsic:962progress |=963lower_intrinsic(&b, nir_instr_as_intrinsic(instr), shader,964pipeline, layout);965break;966default:967break;968}969}970}971972return progress;973}974975static bool976lower_pipeline_layout_info(nir_shader *shader,977struct v3dv_pipeline *pipeline,978const struct v3dv_pipeline_layout *layout)979{980bool progress = false;981982nir_foreach_function(function, shader) {983if (function->impl)984progress |= lower_impl(function->impl, shader, pipeline, layout);985}986987return progress;988}989990991static void992lower_fs_io(nir_shader *nir)993{994/* Our backend doesn't handle array fragment shader outputs */995NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);996NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_out, NULL);997998nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,999MESA_SHADER_FRAGMENT);10001001nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,1002MESA_SHADER_FRAGMENT);10031004NIR_PASS_V(nir, nir_lower_io, nir_var_shader_in | nir_var_shader_out,1005type_size_vec4, 0);1006}10071008static void1009lower_gs_io(struct nir_shader *nir)1010{1011NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);10121013nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,1014MESA_SHADER_GEOMETRY);10151016nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,1017MESA_SHADER_GEOMETRY);1018}10191020static void1021lower_vs_io(struct nir_shader *nir)1022{1023NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);10241025nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs,1026MESA_SHADER_VERTEX);10271028nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,1029MESA_SHADER_VERTEX);10301031/* FIXME: if we call nir_lower_io, we get a crash later. Likely because it1032* overlaps with v3d_nir_lower_io. Need further research though.1033*/1034}10351036static void1037shader_debug_output(const char *message, void *data)1038{1039/* FIXME: We probably don't want to debug anything extra here, and in fact1040* the compiler is not using this callback too much, only as an alternative1041* way to debug out the shaderdb stats, that you can already get using1042* V3D_DEBUG=shaderdb. Perhaps it would make sense to revisit the v3d1043* compiler to remove that callback.1044*/1045}10461047static void1048pipeline_populate_v3d_key(struct v3d_key *key,1049const struct v3dv_pipeline_stage *p_stage,1050uint32_t ucp_enables,1051bool robust_buffer_access)1052{1053assert(p_stage->pipeline->shared_data &&1054p_stage->pipeline->shared_data->maps[p_stage->stage]);10551056/* The following values are default values used at pipeline create. We use1057* there 32 bit as default return size.1058*/1059struct v3dv_descriptor_map *sampler_map =1060&p_stage->pipeline->shared_data->maps[p_stage->stage]->sampler_map;1061struct v3dv_descriptor_map *texture_map =1062&p_stage->pipeline->shared_data->maps[p_stage->stage]->texture_map;10631064key->num_tex_used = texture_map->num_desc;1065assert(key->num_tex_used <= V3D_MAX_TEXTURE_SAMPLERS);1066for (uint32_t tex_idx = 0; tex_idx < texture_map->num_desc; tex_idx++) {1067key->tex[tex_idx].swizzle[0] = PIPE_SWIZZLE_X;1068key->tex[tex_idx].swizzle[1] = PIPE_SWIZZLE_Y;1069key->tex[tex_idx].swizzle[2] = PIPE_SWIZZLE_Z;1070key->tex[tex_idx].swizzle[3] = PIPE_SWIZZLE_W;1071}10721073key->num_samplers_used = sampler_map->num_desc;1074assert(key->num_samplers_used <= V3D_MAX_TEXTURE_SAMPLERS);1075for (uint32_t sampler_idx = 0; sampler_idx < sampler_map->num_desc;1076sampler_idx++) {1077key->sampler[sampler_idx].return_size =1078sampler_map->return_size[sampler_idx];10791080key->sampler[sampler_idx].return_channels =1081key->sampler[sampler_idx].return_size == 32 ? 4 : 2;1082}10831084switch (p_stage->stage) {1085case BROADCOM_SHADER_VERTEX:1086case BROADCOM_SHADER_VERTEX_BIN:1087key->is_last_geometry_stage = p_stage->pipeline->gs == NULL;1088break;1089case BROADCOM_SHADER_GEOMETRY:1090case BROADCOM_SHADER_GEOMETRY_BIN:1091/* FIXME: while we don't implement tessellation shaders */1092key->is_last_geometry_stage = true;1093break;1094case BROADCOM_SHADER_FRAGMENT:1095case BROADCOM_SHADER_COMPUTE:1096key->is_last_geometry_stage = false;1097break;1098default:1099unreachable("unsupported shader stage");1100}11011102/* Vulkan doesn't have fixed function state for user clip planes. Instead,1103* shaders can write to gl_ClipDistance[], in which case the SPIR-V compiler1104* takes care of adding a single compact array variable at1105* VARYING_SLOT_CLIP_DIST0, so we don't need any user clip plane lowering.1106*1107* The only lowering we are interested is specific to the fragment shader,1108* where we want to emit discards to honor writes to gl_ClipDistance[] in1109* previous stages. This is done via nir_lower_clip_fs() so we only set up1110* the ucp enable mask for that stage.1111*/1112key->ucp_enables = ucp_enables;11131114key->robust_buffer_access = robust_buffer_access;11151116key->environment = V3D_ENVIRONMENT_VULKAN;1117}11181119/* FIXME: anv maps to hw primitive type. Perhaps eventually we would do the1120* same. For not using prim_mode that is the one already used on v3d1121*/1122static const enum pipe_prim_type vk_to_pipe_prim_type[] = {1123[VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = PIPE_PRIM_POINTS,1124[VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = PIPE_PRIM_LINES,1125[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = PIPE_PRIM_LINE_STRIP,1126[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = PIPE_PRIM_TRIANGLES,1127[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = PIPE_PRIM_TRIANGLE_STRIP,1128[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = PIPE_PRIM_TRIANGLE_FAN,1129[VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = PIPE_PRIM_LINES_ADJACENCY,1130[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_LINE_STRIP_ADJACENCY,1131[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLES_ADJACENCY,1132[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = PIPE_PRIM_TRIANGLE_STRIP_ADJACENCY,1133};11341135static const enum pipe_logicop vk_to_pipe_logicop[] = {1136[VK_LOGIC_OP_CLEAR] = PIPE_LOGICOP_CLEAR,1137[VK_LOGIC_OP_AND] = PIPE_LOGICOP_AND,1138[VK_LOGIC_OP_AND_REVERSE] = PIPE_LOGICOP_AND_REVERSE,1139[VK_LOGIC_OP_COPY] = PIPE_LOGICOP_COPY,1140[VK_LOGIC_OP_AND_INVERTED] = PIPE_LOGICOP_AND_INVERTED,1141[VK_LOGIC_OP_NO_OP] = PIPE_LOGICOP_NOOP,1142[VK_LOGIC_OP_XOR] = PIPE_LOGICOP_XOR,1143[VK_LOGIC_OP_OR] = PIPE_LOGICOP_OR,1144[VK_LOGIC_OP_NOR] = PIPE_LOGICOP_NOR,1145[VK_LOGIC_OP_EQUIVALENT] = PIPE_LOGICOP_EQUIV,1146[VK_LOGIC_OP_INVERT] = PIPE_LOGICOP_INVERT,1147[VK_LOGIC_OP_OR_REVERSE] = PIPE_LOGICOP_OR_REVERSE,1148[VK_LOGIC_OP_COPY_INVERTED] = PIPE_LOGICOP_COPY_INVERTED,1149[VK_LOGIC_OP_OR_INVERTED] = PIPE_LOGICOP_OR_INVERTED,1150[VK_LOGIC_OP_NAND] = PIPE_LOGICOP_NAND,1151[VK_LOGIC_OP_SET] = PIPE_LOGICOP_SET,1152};11531154static void1155pipeline_populate_v3d_fs_key(struct v3d_fs_key *key,1156const VkGraphicsPipelineCreateInfo *pCreateInfo,1157const struct v3dv_pipeline_stage *p_stage,1158bool has_geometry_shader,1159uint32_t ucp_enables)1160{1161assert(p_stage->stage == BROADCOM_SHADER_FRAGMENT);11621163memset(key, 0, sizeof(*key));11641165const bool rba = p_stage->pipeline->device->features.robustBufferAccess;1166pipeline_populate_v3d_key(&key->base, p_stage, ucp_enables, rba);11671168const VkPipelineInputAssemblyStateCreateInfo *ia_info =1169pCreateInfo->pInputAssemblyState;1170uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];11711172key->is_points = (topology == PIPE_PRIM_POINTS);1173key->is_lines = (topology >= PIPE_PRIM_LINES &&1174topology <= PIPE_PRIM_LINE_STRIP);1175key->has_gs = has_geometry_shader;11761177const VkPipelineColorBlendStateCreateInfo *cb_info =1178pCreateInfo->pColorBlendState;11791180key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?1181vk_to_pipe_logicop[cb_info->logicOp] :1182PIPE_LOGICOP_COPY;11831184const bool raster_enabled =1185!pCreateInfo->pRasterizationState->rasterizerDiscardEnable;11861187/* Multisample rasterization state must be ignored if rasterization1188* is disabled.1189*/1190const VkPipelineMultisampleStateCreateInfo *ms_info =1191raster_enabled ? pCreateInfo->pMultisampleState : NULL;1192if (ms_info) {1193assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||1194ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);1195key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;11961197if (key->msaa) {1198key->sample_coverage =1199p_stage->pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;1200key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;1201key->sample_alpha_to_one = ms_info->alphaToOneEnable;1202}1203}12041205/* This is intended for V3D versions before 4.1, otherwise we just use the1206* tile buffer load/store swap R/B bit.1207*/1208key->swap_color_rb = 0;12091210const struct v3dv_render_pass *pass =1211v3dv_render_pass_from_handle(pCreateInfo->renderPass);1212const struct v3dv_subpass *subpass = p_stage->pipeline->subpass;1213for (uint32_t i = 0; i < subpass->color_count; i++) {1214const uint32_t att_idx = subpass->color_attachments[i].attachment;1215if (att_idx == VK_ATTACHMENT_UNUSED)1216continue;12171218key->cbufs |= 1 << i;12191220VkFormat fb_format = pass->attachments[att_idx].desc.format;1221enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);12221223/* If logic operations are enabled then we might emit color reads and we1224* need to know the color buffer format and swizzle for that1225*/1226if (key->logicop_func != PIPE_LOGICOP_COPY) {1227key->color_fmt[i].format = fb_pipe_format;1228key->color_fmt[i].swizzle =1229v3dv_get_format_swizzle(p_stage->pipeline->device, fb_format);1230}12311232const struct util_format_description *desc =1233vk_format_description(fb_format);12341235if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&1236desc->channel[0].size == 32) {1237key->f32_color_rb |= 1 << i;1238}12391240if (p_stage->nir->info.fs.untyped_color_outputs) {1241if (util_format_is_pure_uint(fb_pipe_format))1242key->uint_color_rb |= 1 << i;1243else if (util_format_is_pure_sint(fb_pipe_format))1244key->int_color_rb |= 1 << i;1245}12461247if (key->is_points) {1248/* FIXME: The mask would need to be computed based on the shader1249* inputs. On gallium it is done at st_atom_rasterizer1250* (sprite_coord_enable). anv seems (need to confirm) to do that on1251* genX_pipeline (PointSpriteTextureCoordinateEnable). Would be also1252* better to have tests to guide filling the mask.1253*/1254key->point_sprite_mask = 0;12551256/* Vulkan mandates upper left. */1257key->point_coord_upper_left = true;1258}1259}1260}12611262static void1263setup_stage_outputs_from_next_stage_inputs(1264uint8_t next_stage_num_inputs,1265struct v3d_varying_slot *next_stage_input_slots,1266uint8_t *num_used_outputs,1267struct v3d_varying_slot *used_output_slots,1268uint32_t size_of_used_output_slots)1269{1270*num_used_outputs = next_stage_num_inputs;1271memcpy(used_output_slots, next_stage_input_slots, size_of_used_output_slots);1272}12731274static void1275pipeline_populate_v3d_gs_key(struct v3d_gs_key *key,1276const VkGraphicsPipelineCreateInfo *pCreateInfo,1277const struct v3dv_pipeline_stage *p_stage)1278{1279assert(p_stage->stage == BROADCOM_SHADER_GEOMETRY ||1280p_stage->stage == BROADCOM_SHADER_GEOMETRY_BIN);12811282memset(key, 0, sizeof(*key));12831284const bool rba = p_stage->pipeline->device->features.robustBufferAccess;1285pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);12861287struct v3dv_pipeline *pipeline = p_stage->pipeline;12881289key->per_vertex_point_size =1290p_stage->nir->info.outputs_written & (1ull << VARYING_SLOT_PSIZ);12911292key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);12931294assert(key->base.is_last_geometry_stage);1295if (key->is_coord) {1296/* Output varyings in the last binning shader are only used for transform1297* feedback. Set to 0 as VK_EXT_transform_feedback is not supported.1298*/1299key->num_used_outputs = 0;1300} else {1301struct v3dv_shader_variant *fs_variant =1302pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];13031304STATIC_ASSERT(sizeof(key->used_outputs) ==1305sizeof(fs_variant->prog_data.fs->input_slots));13061307setup_stage_outputs_from_next_stage_inputs(1308fs_variant->prog_data.fs->num_inputs,1309fs_variant->prog_data.fs->input_slots,1310&key->num_used_outputs,1311key->used_outputs,1312sizeof(key->used_outputs));1313}1314}13151316static void1317pipeline_populate_v3d_vs_key(struct v3d_vs_key *key,1318const VkGraphicsPipelineCreateInfo *pCreateInfo,1319const struct v3dv_pipeline_stage *p_stage)1320{1321assert(p_stage->stage == BROADCOM_SHADER_VERTEX ||1322p_stage->stage == BROADCOM_SHADER_VERTEX_BIN);13231324memset(key, 0, sizeof(*key));13251326const bool rba = p_stage->pipeline->device->features.robustBufferAccess;1327pipeline_populate_v3d_key(&key->base, p_stage, 0, rba);13281329struct v3dv_pipeline *pipeline = p_stage->pipeline;13301331/* Vulkan specifies a point size per vertex, so true for if the prim are1332* points, like on ES2)1333*/1334const VkPipelineInputAssemblyStateCreateInfo *ia_info =1335pCreateInfo->pInputAssemblyState;1336uint8_t topology = vk_to_pipe_prim_type[ia_info->topology];13371338/* FIXME: PRIM_POINTS is not enough, in gallium the full check is1339* PIPE_PRIM_POINTS && v3d->rasterizer->base.point_size_per_vertex */1340key->per_vertex_point_size = (topology == PIPE_PRIM_POINTS);13411342key->is_coord = broadcom_shader_stage_is_binning(p_stage->stage);13431344if (key->is_coord) { /* Binning VS*/1345if (key->base.is_last_geometry_stage) {1346/* Output varyings in the last binning shader are only used for1347* transform feedback. Set to 0 as VK_EXT_transform_feedback is not1348* supported.1349*/1350key->num_used_outputs = 0;1351} else {1352/* Linking against GS binning program */1353assert(pipeline->gs);1354struct v3dv_shader_variant *gs_bin_variant =1355pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];13561357STATIC_ASSERT(sizeof(key->used_outputs) ==1358sizeof(gs_bin_variant->prog_data.gs->input_slots));13591360setup_stage_outputs_from_next_stage_inputs(1361gs_bin_variant->prog_data.gs->num_inputs,1362gs_bin_variant->prog_data.gs->input_slots,1363&key->num_used_outputs,1364key->used_outputs,1365sizeof(key->used_outputs));1366}1367} else { /* Render VS */1368if (pipeline->gs) {1369/* Linking against GS render program */1370struct v3dv_shader_variant *gs_variant =1371pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];13721373STATIC_ASSERT(sizeof(key->used_outputs) ==1374sizeof(gs_variant->prog_data.gs->input_slots));13751376setup_stage_outputs_from_next_stage_inputs(1377gs_variant->prog_data.gs->num_inputs,1378gs_variant->prog_data.gs->input_slots,1379&key->num_used_outputs,1380key->used_outputs,1381sizeof(key->used_outputs));1382} else {1383/* Linking against FS program */1384struct v3dv_shader_variant *fs_variant =1385pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT];13861387STATIC_ASSERT(sizeof(key->used_outputs) ==1388sizeof(fs_variant->prog_data.fs->input_slots));13891390setup_stage_outputs_from_next_stage_inputs(1391fs_variant->prog_data.fs->num_inputs,1392fs_variant->prog_data.fs->input_slots,1393&key->num_used_outputs,1394key->used_outputs,1395sizeof(key->used_outputs));1396}1397}13981399const VkPipelineVertexInputStateCreateInfo *vi_info =1400pCreateInfo->pVertexInputState;1401for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {1402const VkVertexInputAttributeDescription *desc =1403&vi_info->pVertexAttributeDescriptions[i];1404assert(desc->location < MAX_VERTEX_ATTRIBS);1405if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)1406key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);1407}1408}14091410/**1411* Creates the initial form of the pipeline stage for a binning shader by1412* cloning the render shader and flagging it as a coordinate shader.1413*1414* Returns NULL if it was not able to allocate the object, so it should be1415* handled as a VK_ERROR_OUT_OF_HOST_MEMORY error.1416*/1417static struct v3dv_pipeline_stage *1418pipeline_stage_create_binning(const struct v3dv_pipeline_stage *src,1419const VkAllocationCallbacks *pAllocator)1420{1421struct v3dv_device *device = src->pipeline->device;14221423struct v3dv_pipeline_stage *p_stage =1424vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,1425VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);14261427if (p_stage == NULL)1428return NULL;14291430assert(src->stage == BROADCOM_SHADER_VERTEX ||1431src->stage == BROADCOM_SHADER_GEOMETRY);14321433enum broadcom_shader_stage bin_stage =1434src->stage == BROADCOM_SHADER_VERTEX ?1435BROADCOM_SHADER_VERTEX_BIN :1436BROADCOM_SHADER_GEOMETRY_BIN;14371438p_stage->pipeline = src->pipeline;1439p_stage->stage = bin_stage;1440p_stage->entrypoint = src->entrypoint;1441p_stage->module = src->module;1442p_stage->nir = src->nir ? nir_shader_clone(NULL, src->nir) : NULL;1443p_stage->spec_info = src->spec_info;1444memcpy(p_stage->shader_sha1, src->shader_sha1, 20);14451446return p_stage;1447}14481449/**1450* Returns false if it was not able to allocate or map the assembly bo memory.1451*/1452static bool1453upload_assembly(struct v3dv_pipeline *pipeline)1454{1455uint32_t total_size = 0;1456for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {1457struct v3dv_shader_variant *variant =1458pipeline->shared_data->variants[stage];14591460if (variant != NULL)1461total_size += variant->qpu_insts_size;1462}14631464struct v3dv_bo *bo = v3dv_bo_alloc(pipeline->device, total_size,1465"pipeline shader assembly", true);1466if (!bo) {1467fprintf(stderr, "failed to allocate memory for shader\n");1468return false;1469}14701471bool ok = v3dv_bo_map(pipeline->device, bo, total_size);1472if (!ok) {1473fprintf(stderr, "failed to map source shader buffer\n");1474return false;1475}14761477uint32_t offset = 0;1478for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {1479struct v3dv_shader_variant *variant =1480pipeline->shared_data->variants[stage];14811482if (variant != NULL) {1483variant->assembly_offset = offset;14841485memcpy(bo->map + offset, variant->qpu_insts, variant->qpu_insts_size);1486offset += variant->qpu_insts_size;14871488/* We dont need qpu_insts anymore. */1489free(variant->qpu_insts);1490variant->qpu_insts = NULL;1491}1492}1493assert(total_size == offset);14941495pipeline->shared_data->assembly_bo = bo;14961497return true;1498}14991500static void1501pipeline_hash_graphics(const struct v3dv_pipeline *pipeline,1502struct v3dv_pipeline_key *key,1503unsigned char *sha1_out)1504{1505struct mesa_sha1 ctx;1506_mesa_sha1_init(&ctx);15071508/* We need to include all shader stages in the sha1 key as linking may modify1509* the shader code in any stage. An alternative would be to use the1510* serialized NIR, but that seems like an overkill.1511*/1512_mesa_sha1_update(&ctx, pipeline->vs->shader_sha1,1513sizeof(pipeline->vs->shader_sha1));15141515if (pipeline->gs) {1516_mesa_sha1_update(&ctx, pipeline->gs->shader_sha1,1517sizeof(pipeline->gs->shader_sha1));1518}15191520_mesa_sha1_update(&ctx, pipeline->fs->shader_sha1,1521sizeof(pipeline->fs->shader_sha1));15221523_mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));15241525_mesa_sha1_final(&ctx, sha1_out);1526}15271528static void1529pipeline_hash_compute(const struct v3dv_pipeline *pipeline,1530struct v3dv_pipeline_key *key,1531unsigned char *sha1_out)1532{1533struct mesa_sha1 ctx;1534_mesa_sha1_init(&ctx);15351536_mesa_sha1_update(&ctx, pipeline->cs->shader_sha1,1537sizeof(pipeline->cs->shader_sha1));15381539_mesa_sha1_update(&ctx, key, sizeof(struct v3dv_pipeline_key));15401541_mesa_sha1_final(&ctx, sha1_out);1542}15431544/* Checks that the pipeline has enough spill size to use for any of their1545* variants1546*/1547static void1548pipeline_check_spill_size(struct v3dv_pipeline *pipeline)1549{1550uint32_t max_spill_size = 0;15511552for(uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {1553struct v3dv_shader_variant *variant =1554pipeline->shared_data->variants[stage];15551556if (variant != NULL) {1557max_spill_size = MAX2(variant->prog_data.base->spill_size,1558max_spill_size);1559}1560}15611562if (max_spill_size > 0) {1563struct v3dv_device *device = pipeline->device;15641565/* The TIDX register we use for choosing the area to access1566* for scratch space is: (core << 6) | (qpu << 2) | thread.1567* Even at minimum threadcount in a particular shader, that1568* means we still multiply by qpus by 4.1569*/1570const uint32_t total_spill_size =15714 * device->devinfo.qpu_count * max_spill_size;1572if (pipeline->spill.bo) {1573assert(pipeline->spill.size_per_thread > 0);1574v3dv_bo_free(device, pipeline->spill.bo);1575}1576pipeline->spill.bo =1577v3dv_bo_alloc(device, total_spill_size, "spill", true);1578pipeline->spill.size_per_thread = max_spill_size;1579}1580}15811582/**1583* Creates a new shader_variant_create. Note that for prog_data is not const,1584* so it is assumed that the caller will prove a pointer that the1585* shader_variant will own.1586*1587* Creation doesn't include allocate a BD to store the content of qpu_insts,1588* as we will try to share the same bo for several shader variants. Also note1589* that qpu_ints being NULL is valid, for example if we are creating the1590* shader_variants from the cache, so we can just upload the assembly of all1591* the shader stages at once.1592*/1593struct v3dv_shader_variant *1594v3dv_shader_variant_create(struct v3dv_device *device,1595enum broadcom_shader_stage stage,1596struct v3d_prog_data *prog_data,1597uint32_t prog_data_size,1598uint32_t assembly_offset,1599uint64_t *qpu_insts,1600uint32_t qpu_insts_size,1601VkResult *out_vk_result)1602{1603struct v3dv_shader_variant *variant =1604vk_zalloc(&device->vk.alloc, sizeof(*variant), 8,1605VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);16061607if (variant == NULL) {1608*out_vk_result = VK_ERROR_OUT_OF_HOST_MEMORY;1609return NULL;1610}16111612variant->stage = stage;1613variant->prog_data_size = prog_data_size;1614variant->prog_data.base = prog_data;16151616variant->assembly_offset = assembly_offset;1617variant->qpu_insts_size = qpu_insts_size;1618variant->qpu_insts = qpu_insts;16191620*out_vk_result = VK_SUCCESS;16211622return variant;1623}16241625/* For a given key, it returns the compiled version of the shader. Returns a1626* new reference to the shader_variant to the caller, or NULL.1627*1628* If the method returns NULL it means that something wrong happened:1629* * Not enough memory: this is one of the possible outcomes defined by1630* vkCreateXXXPipelines. out_vk_result will return the proper oom error.1631* * Compilation error: hypothetically this shouldn't happen, as the spec1632* states that vkShaderModule needs to be created with a valid SPIR-V, so1633* any compilation failure is a driver bug. In the practice, something as1634* common as failing to register allocate can lead to a compilation1635* failure. In that case the only option (for any driver) is1636* VK_ERROR_UNKNOWN, even if we know that the problem was a compiler1637* error.1638*/1639static struct v3dv_shader_variant *1640pipeline_compile_shader_variant(struct v3dv_pipeline_stage *p_stage,1641struct v3d_key *key,1642size_t key_size,1643const VkAllocationCallbacks *pAllocator,1644VkResult *out_vk_result)1645{1646struct v3dv_pipeline *pipeline = p_stage->pipeline;1647struct v3dv_physical_device *physical_device =1648&pipeline->device->instance->physicalDevice;1649const struct v3d_compiler *compiler = physical_device->compiler;16501651if (V3D_DEBUG & (V3D_DEBUG_NIR |1652v3d_debug_flag_for_shader_stage1653(broadcom_shader_stage_to_gl(p_stage->stage)))) {1654fprintf(stderr, "Just before v3d_compile: %s prog %d NIR:\n",1655broadcom_shader_stage_name(p_stage->stage),1656p_stage->program_id);1657nir_print_shader(p_stage->nir, stderr);1658fprintf(stderr, "\n");1659}16601661uint64_t *qpu_insts;1662uint32_t qpu_insts_size;1663struct v3d_prog_data *prog_data;1664uint32_t prog_data_size =1665v3d_prog_data_size(broadcom_shader_stage_to_gl(p_stage->stage));16661667qpu_insts = v3d_compile(compiler,1668key, &prog_data,1669p_stage->nir,1670shader_debug_output, NULL,1671p_stage->program_id, 0,1672&qpu_insts_size);16731674struct v3dv_shader_variant *variant = NULL;16751676if (!qpu_insts) {1677fprintf(stderr, "Failed to compile %s prog %d NIR to VIR\n",1678gl_shader_stage_name(p_stage->stage),1679p_stage->program_id);1680*out_vk_result = VK_ERROR_UNKNOWN;1681} else {1682variant =1683v3dv_shader_variant_create(pipeline->device, p_stage->stage,1684prog_data, prog_data_size,16850, /* assembly_offset, no final value yet */1686qpu_insts, qpu_insts_size,1687out_vk_result);1688}1689/* At this point we don't need anymore the nir shader, but we are freeing1690* all the temporary p_stage structs used during the pipeline creation when1691* we finish it, so let's not worry about freeing the nir here.1692*/16931694return variant;1695}16961697/* FIXME: C&P from st, common place? */1698static void1699st_nir_opts(nir_shader *nir)1700{1701bool progress;17021703do {1704progress = false;17051706NIR_PASS_V(nir, nir_lower_vars_to_ssa);17071708/* Linking deals with unused inputs/outputs, but here we can remove1709* things local to the shader in the hopes that we can cleanup other1710* things. This pass will also remove variables with only stores, so we1711* might be able to make progress after it.1712*/1713NIR_PASS(progress, nir, nir_remove_dead_variables,1714(nir_variable_mode)(nir_var_function_temp |1715nir_var_shader_temp |1716nir_var_mem_shared),1717NULL);17181719NIR_PASS(progress, nir, nir_opt_copy_prop_vars);1720NIR_PASS(progress, nir, nir_opt_dead_write_vars);17211722if (nir->options->lower_to_scalar) {1723NIR_PASS_V(nir, nir_lower_alu_to_scalar, NULL, NULL);1724NIR_PASS_V(nir, nir_lower_phis_to_scalar, false);1725}17261727NIR_PASS_V(nir, nir_lower_alu);1728NIR_PASS_V(nir, nir_lower_pack);1729NIR_PASS(progress, nir, nir_copy_prop);1730NIR_PASS(progress, nir, nir_opt_remove_phis);1731NIR_PASS(progress, nir, nir_opt_dce);1732if (nir_opt_trivial_continues(nir)) {1733progress = true;1734NIR_PASS(progress, nir, nir_copy_prop);1735NIR_PASS(progress, nir, nir_opt_dce);1736}1737NIR_PASS(progress, nir, nir_opt_if, false);1738NIR_PASS(progress, nir, nir_opt_dead_cf);1739NIR_PASS(progress, nir, nir_opt_cse);1740NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);17411742NIR_PASS(progress, nir, nir_opt_algebraic);1743NIR_PASS(progress, nir, nir_opt_constant_folding);17441745NIR_PASS(progress, nir, nir_opt_undef);1746NIR_PASS(progress, nir, nir_opt_conditional_discard);1747} while (progress);1748}17491750static void1751link_shaders(nir_shader *producer, nir_shader *consumer)1752{1753assert(producer);1754assert(consumer);17551756if (producer->options->lower_to_scalar) {1757NIR_PASS_V(producer, nir_lower_io_to_scalar_early, nir_var_shader_out);1758NIR_PASS_V(consumer, nir_lower_io_to_scalar_early, nir_var_shader_in);1759}17601761nir_lower_io_arrays_to_elements(producer, consumer);17621763st_nir_opts(producer);1764st_nir_opts(consumer);17651766if (nir_link_opt_varyings(producer, consumer))1767st_nir_opts(consumer);17681769NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);1770NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);17711772if (nir_remove_unused_varyings(producer, consumer)) {1773NIR_PASS_V(producer, nir_lower_global_vars_to_local);1774NIR_PASS_V(consumer, nir_lower_global_vars_to_local);17751776st_nir_opts(producer);1777st_nir_opts(consumer);17781779/* Optimizations can cause varyings to become unused.1780* nir_compact_varyings() depends on all dead varyings being removed so1781* we need to call nir_remove_dead_variables() again here.1782*/1783NIR_PASS_V(producer, nir_remove_dead_variables, nir_var_shader_out, NULL);1784NIR_PASS_V(consumer, nir_remove_dead_variables, nir_var_shader_in, NULL);1785}1786}17871788static void1789pipeline_lower_nir(struct v3dv_pipeline *pipeline,1790struct v3dv_pipeline_stage *p_stage,1791struct v3dv_pipeline_layout *layout)1792{1793assert(pipeline->shared_data &&1794pipeline->shared_data->maps[p_stage->stage]);17951796nir_shader_gather_info(p_stage->nir, nir_shader_get_entrypoint(p_stage->nir));17971798/* We add this because we need a valid sampler for nir_lower_tex to do1799* unpacking of the texture operation result, even for the case where there1800* is no sampler state.1801*1802* We add two of those, one for the case we need a 16bit return_size, and1803* another for the case we need a 32bit return size.1804*/1805UNUSED unsigned index =1806descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,1807-1, -1, -1, 0, 16);1808assert(index == V3DV_NO_SAMPLER_16BIT_IDX);18091810index =1811descriptor_map_add(&pipeline->shared_data->maps[p_stage->stage]->sampler_map,1812-2, -2, -2, 0, 32);1813assert(index == V3DV_NO_SAMPLER_32BIT_IDX);18141815/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */1816NIR_PASS_V(p_stage->nir, lower_pipeline_layout_info, pipeline, layout);1817}18181819/**1820* The SPIR-V compiler will insert a sized compact array for1821* VARYING_SLOT_CLIP_DIST0 if the vertex shader writes to gl_ClipDistance[],1822* where the size of the array determines the number of active clip planes.1823*/1824static uint32_t1825get_ucp_enable_mask(struct v3dv_pipeline_stage *p_stage)1826{1827assert(p_stage->stage == BROADCOM_SHADER_VERTEX);1828const nir_shader *shader = p_stage->nir;1829assert(shader);18301831nir_foreach_variable_with_modes(var, shader, nir_var_shader_out) {1832if (var->data.location == VARYING_SLOT_CLIP_DIST0) {1833assert(var->data.compact);1834return (1 << glsl_get_length(var->type)) - 1;1835}1836}1837return 0;1838}18391840static nir_shader *1841pipeline_stage_get_nir(struct v3dv_pipeline_stage *p_stage,1842struct v3dv_pipeline *pipeline,1843struct v3dv_pipeline_cache *cache)1844{1845nir_shader *nir = NULL;18461847nir = v3dv_pipeline_cache_search_for_nir(pipeline, cache,1848&v3dv_nir_options,1849p_stage->shader_sha1);18501851if (nir) {1852assert(nir->info.stage == broadcom_shader_stage_to_gl(p_stage->stage));1853return nir;1854}18551856nir = shader_module_compile_to_nir(pipeline->device, p_stage);18571858if (nir) {1859struct v3dv_pipeline_cache *default_cache =1860&pipeline->device->default_pipeline_cache;18611862v3dv_pipeline_cache_upload_nir(pipeline, cache, nir,1863p_stage->shader_sha1);18641865/* Ensure that the variant is on the default cache, as cmd_buffer could1866* need to change the current variant1867*/1868if (default_cache != cache) {1869v3dv_pipeline_cache_upload_nir(pipeline, default_cache, nir,1870p_stage->shader_sha1);1871}1872return nir;1873}18741875/* FIXME: this shouldn't happen, raise error? */1876return NULL;1877}18781879static void1880pipeline_hash_shader(const struct vk_shader_module *module,1881const char *entrypoint,1882gl_shader_stage stage,1883const VkSpecializationInfo *spec_info,1884unsigned char *sha1_out)1885{1886struct mesa_sha1 ctx;1887_mesa_sha1_init(&ctx);18881889_mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));1890_mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));1891_mesa_sha1_update(&ctx, &stage, sizeof(stage));1892if (spec_info) {1893_mesa_sha1_update(&ctx, spec_info->pMapEntries,1894spec_info->mapEntryCount *1895sizeof(*spec_info->pMapEntries));1896_mesa_sha1_update(&ctx, spec_info->pData,1897spec_info->dataSize);1898}18991900_mesa_sha1_final(&ctx, sha1_out);1901}19021903static VkResult1904pipeline_compile_vertex_shader(struct v3dv_pipeline *pipeline,1905const VkAllocationCallbacks *pAllocator,1906const VkGraphicsPipelineCreateInfo *pCreateInfo)1907{1908assert(pipeline->vs_bin != NULL);1909if (pipeline->vs_bin->nir == NULL) {1910assert(pipeline->vs->nir);1911pipeline->vs_bin->nir = nir_shader_clone(NULL, pipeline->vs->nir);1912}19131914VkResult vk_result;1915struct v3d_vs_key key;1916pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs);1917pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] =1918pipeline_compile_shader_variant(pipeline->vs, &key.base, sizeof(key),1919pAllocator, &vk_result);1920if (vk_result != VK_SUCCESS)1921return vk_result;19221923pipeline_populate_v3d_vs_key(&key, pCreateInfo, pipeline->vs_bin);1924pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN] =1925pipeline_compile_shader_variant(pipeline->vs_bin, &key.base, sizeof(key),1926pAllocator, &vk_result);19271928return vk_result;1929}19301931static VkResult1932pipeline_compile_geometry_shader(struct v3dv_pipeline *pipeline,1933const VkAllocationCallbacks *pAllocator,1934const VkGraphicsPipelineCreateInfo *pCreateInfo)1935{1936assert(pipeline->gs);19371938assert(pipeline->gs_bin != NULL);1939if (pipeline->gs_bin->nir == NULL) {1940assert(pipeline->gs->nir);1941pipeline->gs_bin->nir = nir_shader_clone(NULL, pipeline->gs->nir);1942}19431944VkResult vk_result;1945struct v3d_gs_key key;1946pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs);1947pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] =1948pipeline_compile_shader_variant(pipeline->gs, &key.base, sizeof(key),1949pAllocator, &vk_result);1950if (vk_result != VK_SUCCESS)1951return vk_result;19521953pipeline_populate_v3d_gs_key(&key, pCreateInfo, pipeline->gs_bin);1954pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN] =1955pipeline_compile_shader_variant(pipeline->gs_bin, &key.base, sizeof(key),1956pAllocator, &vk_result);19571958return vk_result;1959}19601961static VkResult1962pipeline_compile_fragment_shader(struct v3dv_pipeline *pipeline,1963const VkAllocationCallbacks *pAllocator,1964const VkGraphicsPipelineCreateInfo *pCreateInfo)1965{1966struct v3dv_pipeline_stage *p_stage = pipeline->vs;19671968p_stage = pipeline->fs;19691970struct v3d_fs_key key;19711972pipeline_populate_v3d_fs_key(&key, pCreateInfo, p_stage,1973pipeline->gs != NULL,1974get_ucp_enable_mask(pipeline->vs));19751976VkResult vk_result;1977pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT] =1978pipeline_compile_shader_variant(p_stage, &key.base, sizeof(key),1979pAllocator, &vk_result);19801981return vk_result;1982}19831984static void1985pipeline_populate_graphics_key(struct v3dv_pipeline *pipeline,1986struct v3dv_pipeline_key *key,1987const VkGraphicsPipelineCreateInfo *pCreateInfo)1988{1989memset(key, 0, sizeof(*key));1990key->robust_buffer_access =1991pipeline->device->features.robustBufferAccess;19921993const VkPipelineInputAssemblyStateCreateInfo *ia_info =1994pCreateInfo->pInputAssemblyState;1995key->topology = vk_to_pipe_prim_type[ia_info->topology];19961997const VkPipelineColorBlendStateCreateInfo *cb_info =1998pCreateInfo->pColorBlendState;1999key->logicop_func = cb_info && cb_info->logicOpEnable == VK_TRUE ?2000vk_to_pipe_logicop[cb_info->logicOp] :2001PIPE_LOGICOP_COPY;20022003const bool raster_enabled =2004!pCreateInfo->pRasterizationState->rasterizerDiscardEnable;20052006/* Multisample rasterization state must be ignored if rasterization2007* is disabled.2008*/2009const VkPipelineMultisampleStateCreateInfo *ms_info =2010raster_enabled ? pCreateInfo->pMultisampleState : NULL;2011if (ms_info) {2012assert(ms_info->rasterizationSamples == VK_SAMPLE_COUNT_1_BIT ||2013ms_info->rasterizationSamples == VK_SAMPLE_COUNT_4_BIT);2014key->msaa = ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT;20152016if (key->msaa) {2017key->sample_coverage =2018pipeline->sample_mask != (1 << V3D_MAX_SAMPLES) - 1;2019key->sample_alpha_to_coverage = ms_info->alphaToCoverageEnable;2020key->sample_alpha_to_one = ms_info->alphaToOneEnable;2021}2022}20232024const struct v3dv_render_pass *pass =2025v3dv_render_pass_from_handle(pCreateInfo->renderPass);2026const struct v3dv_subpass *subpass = pipeline->subpass;2027for (uint32_t i = 0; i < subpass->color_count; i++) {2028const uint32_t att_idx = subpass->color_attachments[i].attachment;2029if (att_idx == VK_ATTACHMENT_UNUSED)2030continue;20312032key->cbufs |= 1 << i;20332034VkFormat fb_format = pass->attachments[att_idx].desc.format;2035enum pipe_format fb_pipe_format = vk_format_to_pipe_format(fb_format);20362037/* If logic operations are enabled then we might emit color reads and we2038* need to know the color buffer format and swizzle for that2039*/2040if (key->logicop_func != PIPE_LOGICOP_COPY) {2041key->color_fmt[i].format = fb_pipe_format;2042key->color_fmt[i].swizzle = v3dv_get_format_swizzle(pipeline->device,2043fb_format);2044}20452046const struct util_format_description *desc =2047vk_format_description(fb_format);20482049if (desc->channel[0].type == UTIL_FORMAT_TYPE_FLOAT &&2050desc->channel[0].size == 32) {2051key->f32_color_rb |= 1 << i;2052}2053}20542055const VkPipelineVertexInputStateCreateInfo *vi_info =2056pCreateInfo->pVertexInputState;2057for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {2058const VkVertexInputAttributeDescription *desc =2059&vi_info->pVertexAttributeDescriptions[i];2060assert(desc->location < MAX_VERTEX_ATTRIBS);2061if (desc->format == VK_FORMAT_B8G8R8A8_UNORM)2062key->va_swap_rb_mask |= 1 << (VERT_ATTRIB_GENERIC0 + desc->location);2063}20642065}20662067static void2068pipeline_populate_compute_key(struct v3dv_pipeline *pipeline,2069struct v3dv_pipeline_key *key,2070const VkComputePipelineCreateInfo *pCreateInfo)2071{2072/* We use the same pipeline key for graphics and compute, but we don't need2073* to add a field to flag compute keys because this key is not used alone2074* to search in the cache, we also use the SPIR-V or the serialized NIR for2075* example, which already flags compute shaders.2076*/2077memset(key, 0, sizeof(*key));2078key->robust_buffer_access =2079pipeline->device->features.robustBufferAccess;2080}20812082static struct v3dv_pipeline_shared_data *2083v3dv_pipeline_shared_data_new_empty(const unsigned char sha1_key[20],2084struct v3dv_pipeline *pipeline,2085bool is_graphics_pipeline)2086{2087/* We create new_entry using the device alloc. Right now shared_data is ref2088* and unref by both the pipeline and the pipeline cache, so we can't2089* ensure that the cache or pipeline alloc will be available on the last2090* unref.2091*/2092struct v3dv_pipeline_shared_data *new_entry =2093vk_zalloc2(&pipeline->device->vk.alloc, NULL,2094sizeof(struct v3dv_pipeline_shared_data), 8,2095VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);20962097if (new_entry == NULL)2098return NULL;20992100for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {2101/* We don't need specific descriptor maps for binning stages we use the2102* map for the render stage.2103*/2104if (broadcom_shader_stage_is_binning(stage))2105continue;21062107if ((is_graphics_pipeline && stage == BROADCOM_SHADER_COMPUTE) ||2108(!is_graphics_pipeline && stage != BROADCOM_SHADER_COMPUTE)) {2109continue;2110}21112112if (stage == BROADCOM_SHADER_GEOMETRY && !pipeline->gs)2113continue;21142115struct v3dv_descriptor_maps *new_maps =2116vk_zalloc2(&pipeline->device->vk.alloc, NULL,2117sizeof(struct v3dv_descriptor_maps), 8,2118VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);21192120if (new_maps == NULL)2121goto fail;21222123new_entry->maps[stage] = new_maps;2124}21252126new_entry->maps[BROADCOM_SHADER_VERTEX_BIN] =2127new_entry->maps[BROADCOM_SHADER_VERTEX];21282129new_entry->maps[BROADCOM_SHADER_GEOMETRY_BIN] =2130new_entry->maps[BROADCOM_SHADER_GEOMETRY];21312132new_entry->ref_cnt = 1;2133memcpy(new_entry->sha1_key, sha1_key, 20);21342135return new_entry;21362137fail:2138if (new_entry != NULL) {2139for (uint8_t stage = 0; stage < BROADCOM_SHADER_STAGES; stage++) {2140if (new_entry->maps[stage] != NULL)2141vk_free(&pipeline->device->vk.alloc, new_entry->maps[stage]);2142}2143}21442145vk_free(&pipeline->device->vk.alloc, new_entry);21462147return NULL;2148}21492150/*2151* It compiles a pipeline. Note that it also allocate internal object, but if2152* some allocations success, but other fails, the method is not freeing the2153* successful ones.2154*2155* This is done to simplify the code, as what we do in this case is just call2156* the pipeline destroy method, and this would handle freeing the internal2157* objects allocated. We just need to be careful setting to NULL the objects2158* not allocated.2159*/2160static VkResult2161pipeline_compile_graphics(struct v3dv_pipeline *pipeline,2162struct v3dv_pipeline_cache *cache,2163const VkGraphicsPipelineCreateInfo *pCreateInfo,2164const VkAllocationCallbacks *pAllocator)2165{2166struct v3dv_device *device = pipeline->device;2167struct v3dv_physical_device *physical_device =2168&device->instance->physicalDevice;21692170/* First pass to get some common info from the shader, and create the2171* individual pipeline_stage objects2172*/2173for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {2174const VkPipelineShaderStageCreateInfo *sinfo = &pCreateInfo->pStages[i];2175gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);21762177struct v3dv_pipeline_stage *p_stage =2178vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,2179VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);21802181if (p_stage == NULL)2182return VK_ERROR_OUT_OF_HOST_MEMORY;21832184/* Note that we are assigning program_id slightly differently that2185* v3d. Here we are assigning one per pipeline stage, so vs and vs_bin2186* would have a different program_id, while v3d would have the same for2187* both. For the case of v3dv, it is more natural to have an id this way,2188* as right now we are using it for debugging, not for shader-db.2189*/2190p_stage->program_id =2191p_atomic_inc_return(&physical_device->next_program_id);21922193p_stage->pipeline = pipeline;2194p_stage->stage = gl_shader_stage_to_broadcom(stage);2195p_stage->entrypoint = sinfo->pName;2196p_stage->module = vk_shader_module_from_handle(sinfo->module);2197p_stage->spec_info = sinfo->pSpecializationInfo;21982199pipeline_hash_shader(p_stage->module,2200p_stage->entrypoint,2201stage,2202p_stage->spec_info,2203p_stage->shader_sha1);22042205pipeline->active_stages |= sinfo->stage;22062207/* We will try to get directly the compiled shader variant, so let's not2208* worry about getting the nir shader for now.2209*/2210p_stage->nir = NULL;22112212switch(stage) {2213case MESA_SHADER_VERTEX:2214pipeline->vs = p_stage;2215pipeline->vs_bin =2216pipeline_stage_create_binning(pipeline->vs, pAllocator);2217if (pipeline->vs_bin == NULL)2218return VK_ERROR_OUT_OF_HOST_MEMORY;2219break;22202221case MESA_SHADER_GEOMETRY:2222pipeline->has_gs = true;2223pipeline->gs = p_stage;2224pipeline->gs_bin =2225pipeline_stage_create_binning(pipeline->gs, pAllocator);2226if (pipeline->gs_bin == NULL)2227return VK_ERROR_OUT_OF_HOST_MEMORY;2228break;22292230case MESA_SHADER_FRAGMENT:2231pipeline->fs = p_stage;2232break;22332234default:2235unreachable("not supported shader stage");2236}2237}22382239/* Add a no-op fragment shader if needed */2240if (!pipeline->fs) {2241nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT,2242&v3dv_nir_options,2243"noop_fs");22442245struct v3dv_pipeline_stage *p_stage =2246vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*p_stage), 8,2247VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);22482249if (p_stage == NULL)2250return VK_ERROR_OUT_OF_HOST_MEMORY;22512252p_stage->pipeline = pipeline;2253p_stage->stage = BROADCOM_SHADER_FRAGMENT;2254p_stage->entrypoint = "main";2255p_stage->module = 0;2256p_stage->nir = b.shader;2257pipeline_compute_sha1_from_nir(p_stage->nir, p_stage->shader_sha1);2258p_stage->program_id =2259p_atomic_inc_return(&physical_device->next_program_id);22602261pipeline->fs = p_stage;2262pipeline->active_stages |= MESA_SHADER_FRAGMENT;2263}22642265/* First we try to get the variants from the pipeline cache */2266struct v3dv_pipeline_key pipeline_key;2267pipeline_populate_graphics_key(pipeline, &pipeline_key, pCreateInfo);2268unsigned char pipeline_sha1[20];2269pipeline_hash_graphics(pipeline, &pipeline_key, pipeline_sha1);22702271pipeline->shared_data =2272v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1);22732274if (pipeline->shared_data != NULL) {2275/* A correct pipeline must have at least a VS and FS */2276assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX]);2277assert(pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);2278assert(pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);2279assert(!pipeline->gs ||2280pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY]);2281assert(!pipeline->gs ||2282pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);2283goto success;2284}22852286/* Otherwise we try to get the NIR shaders (either from the original SPIR-V2287* shader or the pipeline cache) and compile.2288*/2289pipeline->shared_data =2290v3dv_pipeline_shared_data_new_empty(pipeline_sha1, pipeline, true);22912292if (!pipeline->vs->nir)2293pipeline->vs->nir = pipeline_stage_get_nir(pipeline->vs, pipeline, cache);2294if (pipeline->gs && !pipeline->gs->nir)2295pipeline->gs->nir = pipeline_stage_get_nir(pipeline->gs, pipeline, cache);2296if (!pipeline->fs->nir)2297pipeline->fs->nir = pipeline_stage_get_nir(pipeline->fs, pipeline, cache);22982299/* Linking + pipeline lowerings */2300if (pipeline->gs) {2301link_shaders(pipeline->gs->nir, pipeline->fs->nir);2302link_shaders(pipeline->vs->nir, pipeline->gs->nir);2303} else {2304link_shaders(pipeline->vs->nir, pipeline->fs->nir);2305}23062307pipeline_lower_nir(pipeline, pipeline->fs, pipeline->layout);2308lower_fs_io(pipeline->fs->nir);23092310if (pipeline->gs) {2311pipeline_lower_nir(pipeline, pipeline->gs, pipeline->layout);2312lower_gs_io(pipeline->vs->nir);2313}23142315pipeline_lower_nir(pipeline, pipeline->vs, pipeline->layout);2316lower_vs_io(pipeline->vs->nir);23172318/* Compiling to vir */2319VkResult vk_result;23202321/* We should have got all the variants or no variants from the cache */2322assert(!pipeline->shared_data->variants[BROADCOM_SHADER_FRAGMENT]);2323vk_result = pipeline_compile_fragment_shader(pipeline, pAllocator, pCreateInfo);2324if (vk_result != VK_SUCCESS)2325return vk_result;23262327assert(!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY] &&2328!pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN]);23292330if (pipeline->gs) {2331vk_result =2332pipeline_compile_geometry_shader(pipeline, pAllocator, pCreateInfo);2333if (vk_result != VK_SUCCESS)2334return vk_result;2335}23362337assert(!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX] &&2338!pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX_BIN]);23392340vk_result = pipeline_compile_vertex_shader(pipeline, pAllocator, pCreateInfo);2341if (vk_result != VK_SUCCESS)2342return vk_result;23432344if (!upload_assembly(pipeline))2345return VK_ERROR_OUT_OF_DEVICE_MEMORY;23462347v3dv_pipeline_cache_upload_pipeline(pipeline, cache);23482349success:2350/* Since we have the variants in the pipeline shared data we can now free2351* the pipeline stages.2352*/2353pipeline_free_stages(device, pipeline, pAllocator);23542355pipeline_check_spill_size(pipeline);23562357return compute_vpm_config(pipeline);2358}23592360static VkResult2361compute_vpm_config(struct v3dv_pipeline *pipeline)2362{2363struct v3dv_shader_variant *vs_variant =2364pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];2365struct v3dv_shader_variant *vs_bin_variant =2366pipeline->shared_data->variants[BROADCOM_SHADER_VERTEX];2367struct v3d_vs_prog_data *vs = vs_variant->prog_data.vs;2368struct v3d_vs_prog_data *vs_bin =vs_bin_variant->prog_data.vs;23692370struct v3d_gs_prog_data *gs = NULL;2371struct v3d_gs_prog_data *gs_bin = NULL;2372if (pipeline->has_gs) {2373struct v3dv_shader_variant *gs_variant =2374pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY];2375struct v3dv_shader_variant *gs_bin_variant =2376pipeline->shared_data->variants[BROADCOM_SHADER_GEOMETRY_BIN];2377gs = gs_variant->prog_data.gs;2378gs_bin = gs_bin_variant->prog_data.gs;2379}23802381if (!v3d_compute_vpm_config(&pipeline->device->devinfo,2382vs_bin, vs, gs_bin, gs,2383&pipeline->vpm_cfg_bin,2384&pipeline->vpm_cfg)) {2385return VK_ERROR_OUT_OF_DEVICE_MEMORY;2386}23872388return VK_SUCCESS;2389}23902391static unsigned2392v3dv_dynamic_state_mask(VkDynamicState state)2393{2394switch(state) {2395case VK_DYNAMIC_STATE_VIEWPORT:2396return V3DV_DYNAMIC_VIEWPORT;2397case VK_DYNAMIC_STATE_SCISSOR:2398return V3DV_DYNAMIC_SCISSOR;2399case VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK:2400return V3DV_DYNAMIC_STENCIL_COMPARE_MASK;2401case VK_DYNAMIC_STATE_STENCIL_WRITE_MASK:2402return V3DV_DYNAMIC_STENCIL_WRITE_MASK;2403case VK_DYNAMIC_STATE_STENCIL_REFERENCE:2404return V3DV_DYNAMIC_STENCIL_REFERENCE;2405case VK_DYNAMIC_STATE_BLEND_CONSTANTS:2406return V3DV_DYNAMIC_BLEND_CONSTANTS;2407case VK_DYNAMIC_STATE_DEPTH_BIAS:2408return V3DV_DYNAMIC_DEPTH_BIAS;2409case VK_DYNAMIC_STATE_LINE_WIDTH:2410return V3DV_DYNAMIC_LINE_WIDTH;24112412/* Depth bounds testing is not available in in V3D 4.2 so here we are just2413* ignoring this dynamic state. We are already asserting at pipeline creation2414* time that depth bounds testing is not enabled.2415*/2416case VK_DYNAMIC_STATE_DEPTH_BOUNDS:2417return 0;24182419default:2420unreachable("Unhandled dynamic state");2421}2422}24232424static void2425pipeline_init_dynamic_state(2426struct v3dv_pipeline *pipeline,2427const VkPipelineDynamicStateCreateInfo *pDynamicState,2428const VkPipelineViewportStateCreateInfo *pViewportState,2429const VkPipelineDepthStencilStateCreateInfo *pDepthStencilState,2430const VkPipelineColorBlendStateCreateInfo *pColorBlendState,2431const VkPipelineRasterizationStateCreateInfo *pRasterizationState)2432{2433pipeline->dynamic_state = default_dynamic_state;2434struct v3dv_dynamic_state *dynamic = &pipeline->dynamic_state;24352436/* Create a mask of enabled dynamic states */2437uint32_t dynamic_states = 0;2438if (pDynamicState) {2439uint32_t count = pDynamicState->dynamicStateCount;2440for (uint32_t s = 0; s < count; s++) {2441dynamic_states |=2442v3dv_dynamic_state_mask(pDynamicState->pDynamicStates[s]);2443}2444}24452446/* For any pipeline states that are not dynamic, set the dynamic state2447* from the static pipeline state.2448*/2449if (pViewportState) {2450if (!(dynamic_states & V3DV_DYNAMIC_VIEWPORT)) {2451dynamic->viewport.count = pViewportState->viewportCount;2452typed_memcpy(dynamic->viewport.viewports, pViewportState->pViewports,2453pViewportState->viewportCount);24542455for (uint32_t i = 0; i < dynamic->viewport.count; i++) {2456v3dv_viewport_compute_xform(&dynamic->viewport.viewports[i],2457dynamic->viewport.scale[i],2458dynamic->viewport.translate[i]);2459}2460}24612462if (!(dynamic_states & V3DV_DYNAMIC_SCISSOR)) {2463dynamic->scissor.count = pViewportState->scissorCount;2464typed_memcpy(dynamic->scissor.scissors, pViewportState->pScissors,2465pViewportState->scissorCount);2466}2467}24682469if (pDepthStencilState) {2470if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_COMPARE_MASK)) {2471dynamic->stencil_compare_mask.front =2472pDepthStencilState->front.compareMask;2473dynamic->stencil_compare_mask.back =2474pDepthStencilState->back.compareMask;2475}24762477if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_WRITE_MASK)) {2478dynamic->stencil_write_mask.front = pDepthStencilState->front.writeMask;2479dynamic->stencil_write_mask.back = pDepthStencilState->back.writeMask;2480}24812482if (!(dynamic_states & V3DV_DYNAMIC_STENCIL_REFERENCE)) {2483dynamic->stencil_reference.front = pDepthStencilState->front.reference;2484dynamic->stencil_reference.back = pDepthStencilState->back.reference;2485}2486}24872488if (pColorBlendState && !(dynamic_states & V3DV_DYNAMIC_BLEND_CONSTANTS)) {2489memcpy(dynamic->blend_constants, pColorBlendState->blendConstants,2490sizeof(dynamic->blend_constants));2491}24922493if (pRasterizationState) {2494if (pRasterizationState->depthBiasEnable &&2495!(dynamic_states & V3DV_DYNAMIC_DEPTH_BIAS)) {2496dynamic->depth_bias.constant_factor =2497pRasterizationState->depthBiasConstantFactor;2498dynamic->depth_bias.depth_bias_clamp =2499pRasterizationState->depthBiasClamp;2500dynamic->depth_bias.slope_factor =2501pRasterizationState->depthBiasSlopeFactor;2502}2503if (!(dynamic_states & V3DV_DYNAMIC_LINE_WIDTH))2504dynamic->line_width = pRasterizationState->lineWidth;2505}25062507pipeline->dynamic_state.mask = dynamic_states;2508}25092510static bool2511stencil_op_is_no_op(const VkStencilOpState *stencil)2512{2513return stencil->depthFailOp == VK_STENCIL_OP_KEEP &&2514stencil->compareOp == VK_COMPARE_OP_ALWAYS;2515}25162517static void2518enable_depth_bias(struct v3dv_pipeline *pipeline,2519const VkPipelineRasterizationStateCreateInfo *rs_info)2520{2521pipeline->depth_bias.enabled = false;2522pipeline->depth_bias.is_z16 = false;25232524if (!rs_info || !rs_info->depthBiasEnable)2525return;25262527/* Check the depth/stencil attachment description for the subpass used with2528* this pipeline.2529*/2530assert(pipeline->pass && pipeline->subpass);2531struct v3dv_render_pass *pass = pipeline->pass;2532struct v3dv_subpass *subpass = pipeline->subpass;25332534if (subpass->ds_attachment.attachment == VK_ATTACHMENT_UNUSED)2535return;25362537assert(subpass->ds_attachment.attachment < pass->attachment_count);2538struct v3dv_render_pass_attachment *att =2539&pass->attachments[subpass->ds_attachment.attachment];25402541if (att->desc.format == VK_FORMAT_D16_UNORM)2542pipeline->depth_bias.is_z16 = true;25432544pipeline->depth_bias.enabled = true;2545}25462547static void2548pipeline_set_ez_state(struct v3dv_pipeline *pipeline,2549const VkPipelineDepthStencilStateCreateInfo *ds_info)2550{2551if (!ds_info || !ds_info->depthTestEnable) {2552pipeline->ez_state = V3D_EZ_DISABLED;2553return;2554}25552556switch (ds_info->depthCompareOp) {2557case VK_COMPARE_OP_LESS:2558case VK_COMPARE_OP_LESS_OR_EQUAL:2559pipeline->ez_state = V3D_EZ_LT_LE;2560break;2561case VK_COMPARE_OP_GREATER:2562case VK_COMPARE_OP_GREATER_OR_EQUAL:2563pipeline->ez_state = V3D_EZ_GT_GE;2564break;2565case VK_COMPARE_OP_NEVER:2566case VK_COMPARE_OP_EQUAL:2567pipeline->ez_state = V3D_EZ_UNDECIDED;2568break;2569default:2570pipeline->ez_state = V3D_EZ_DISABLED;2571break;2572}25732574/* If stencil is enabled and is not a no-op, we need to disable EZ */2575if (ds_info->stencilTestEnable &&2576(!stencil_op_is_no_op(&ds_info->front) ||2577!stencil_op_is_no_op(&ds_info->back))) {2578pipeline->ez_state = V3D_EZ_DISABLED;2579}2580}25812582static bool2583pipeline_has_integer_vertex_attrib(struct v3dv_pipeline *pipeline)2584{2585for (uint8_t i = 0; i < pipeline->va_count; i++) {2586if (vk_format_is_int(pipeline->va[i].vk_format))2587return true;2588}2589return false;2590}25912592/* @pipeline can be NULL. We assume in that case that all the attributes have2593* a float format (we only create an all-float BO once and we reuse it with2594* all float pipelines), otherwise we look at the actual type of each2595* attribute used with the specific pipeline passed in.2596*/2597struct v3dv_bo *2598v3dv_pipeline_create_default_attribute_values(struct v3dv_device *device,2599struct v3dv_pipeline *pipeline)2600{2601uint32_t size = MAX_VERTEX_ATTRIBS * sizeof(float) * 4;2602struct v3dv_bo *bo;26032604bo = v3dv_bo_alloc(device, size, "default_vi_attributes", true);26052606if (!bo) {2607fprintf(stderr, "failed to allocate memory for the default "2608"attribute values\n");2609return NULL;2610}26112612bool ok = v3dv_bo_map(device, bo, size);2613if (!ok) {2614fprintf(stderr, "failed to map default attribute values buffer\n");2615return false;2616}26172618uint32_t *attrs = bo->map;2619uint8_t va_count = pipeline != NULL ? pipeline->va_count : 0;2620for (int i = 0; i < MAX_VERTEX_ATTRIBS; i++) {2621attrs[i * 4 + 0] = 0;2622attrs[i * 4 + 1] = 0;2623attrs[i * 4 + 2] = 0;2624VkFormat attr_format =2625pipeline != NULL ? pipeline->va[i].vk_format : VK_FORMAT_UNDEFINED;2626if (i < va_count && vk_format_is_int(attr_format)) {2627attrs[i * 4 + 3] = 1;2628} else {2629attrs[i * 4 + 3] = fui(1.0);2630}2631}26322633v3dv_bo_unmap(device, bo);26342635return bo;2636}26372638static void2639pipeline_set_sample_mask(struct v3dv_pipeline *pipeline,2640const VkPipelineMultisampleStateCreateInfo *ms_info)2641{2642pipeline->sample_mask = (1 << V3D_MAX_SAMPLES) - 1;26432644/* Ignore pSampleMask if we are not enabling multisampling. The hardware2645* requires this to be 0xf or 0x0 if using a single sample.2646*/2647if (ms_info && ms_info->pSampleMask &&2648ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT) {2649pipeline->sample_mask &= ms_info->pSampleMask[0];2650}2651}26522653static void2654pipeline_set_sample_rate_shading(struct v3dv_pipeline *pipeline,2655const VkPipelineMultisampleStateCreateInfo *ms_info)2656{2657pipeline->sample_rate_shading =2658ms_info && ms_info->rasterizationSamples > VK_SAMPLE_COUNT_1_BIT &&2659ms_info->sampleShadingEnable;2660}26612662static VkResult2663pipeline_init(struct v3dv_pipeline *pipeline,2664struct v3dv_device *device,2665struct v3dv_pipeline_cache *cache,2666const VkGraphicsPipelineCreateInfo *pCreateInfo,2667const VkAllocationCallbacks *pAllocator)2668{2669VkResult result = VK_SUCCESS;26702671pipeline->device = device;26722673V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, pCreateInfo->layout);2674pipeline->layout = layout;26752676V3DV_FROM_HANDLE(v3dv_render_pass, render_pass, pCreateInfo->renderPass);2677assert(pCreateInfo->subpass < render_pass->subpass_count);2678pipeline->pass = render_pass;2679pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];26802681const VkPipelineInputAssemblyStateCreateInfo *ia_info =2682pCreateInfo->pInputAssemblyState;2683pipeline->topology = vk_to_pipe_prim_type[ia_info->topology];26842685/* If rasterization is not enabled, various CreateInfo structs must be2686* ignored.2687*/2688const bool raster_enabled =2689!pCreateInfo->pRasterizationState->rasterizerDiscardEnable;26902691const VkPipelineViewportStateCreateInfo *vp_info =2692raster_enabled ? pCreateInfo->pViewportState : NULL;26932694const VkPipelineDepthStencilStateCreateInfo *ds_info =2695raster_enabled ? pCreateInfo->pDepthStencilState : NULL;26962697const VkPipelineRasterizationStateCreateInfo *rs_info =2698raster_enabled ? pCreateInfo->pRasterizationState : NULL;26992700const VkPipelineColorBlendStateCreateInfo *cb_info =2701raster_enabled ? pCreateInfo->pColorBlendState : NULL;27022703const VkPipelineMultisampleStateCreateInfo *ms_info =2704raster_enabled ? pCreateInfo->pMultisampleState : NULL;27052706pipeline_init_dynamic_state(pipeline,2707pCreateInfo->pDynamicState,2708vp_info, ds_info, cb_info, rs_info);27092710/* V3D 4.2 doesn't support depth bounds testing so we don't advertise that2711* feature and it shouldn't be used by any pipeline.2712*/2713assert(!ds_info || !ds_info->depthBoundsTestEnable);27142715v3dv_X(device, pipeline_pack_state)(pipeline, cb_info, ds_info,2716rs_info, ms_info);27172718pipeline_set_ez_state(pipeline, ds_info);2719enable_depth_bias(pipeline, rs_info);2720pipeline_set_sample_mask(pipeline, ms_info);2721pipeline_set_sample_rate_shading(pipeline, ms_info);27222723pipeline->primitive_restart =2724pCreateInfo->pInputAssemblyState->primitiveRestartEnable;27252726result = pipeline_compile_graphics(pipeline, cache, pCreateInfo, pAllocator);27272728if (result != VK_SUCCESS) {2729/* Caller would already destroy the pipeline, and we didn't allocate any2730* extra info. We don't need to do anything else.2731*/2732return result;2733}27342735v3dv_X(device, pipeline_pack_compile_state)(pipeline,2736pCreateInfo->pVertexInputState);27372738if (pipeline_has_integer_vertex_attrib(pipeline)) {2739pipeline->default_attribute_values =2740v3dv_pipeline_create_default_attribute_values(pipeline->device, pipeline);2741if (!pipeline->default_attribute_values)2742return VK_ERROR_OUT_OF_DEVICE_MEMORY;2743} else {2744pipeline->default_attribute_values = NULL;2745}27462747return result;2748}27492750static VkResult2751graphics_pipeline_create(VkDevice _device,2752VkPipelineCache _cache,2753const VkGraphicsPipelineCreateInfo *pCreateInfo,2754const VkAllocationCallbacks *pAllocator,2755VkPipeline *pPipeline)2756{2757V3DV_FROM_HANDLE(v3dv_device, device, _device);2758V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);27592760struct v3dv_pipeline *pipeline;2761VkResult result;27622763/* Use the default pipeline cache if none is specified */2764if (cache == NULL && device->instance->default_pipeline_cache_enabled)2765cache = &device->default_pipeline_cache;27662767pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),2768VK_OBJECT_TYPE_PIPELINE);27692770if (pipeline == NULL)2771return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);27722773result = pipeline_init(pipeline, device, cache,2774pCreateInfo,2775pAllocator);27762777if (result != VK_SUCCESS) {2778v3dv_destroy_pipeline(pipeline, device, pAllocator);2779return result;2780}27812782*pPipeline = v3dv_pipeline_to_handle(pipeline);27832784return VK_SUCCESS;2785}27862787VKAPI_ATTR VkResult VKAPI_CALL2788v3dv_CreateGraphicsPipelines(VkDevice _device,2789VkPipelineCache pipelineCache,2790uint32_t count,2791const VkGraphicsPipelineCreateInfo *pCreateInfos,2792const VkAllocationCallbacks *pAllocator,2793VkPipeline *pPipelines)2794{2795V3DV_FROM_HANDLE(v3dv_device, device, _device);2796VkResult result = VK_SUCCESS;27972798if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))2799mtx_lock(&device->pdevice->mutex);28002801for (uint32_t i = 0; i < count; i++) {2802VkResult local_result;28032804local_result = graphics_pipeline_create(_device,2805pipelineCache,2806&pCreateInfos[i],2807pAllocator,2808&pPipelines[i]);28092810if (local_result != VK_SUCCESS) {2811result = local_result;2812pPipelines[i] = VK_NULL_HANDLE;2813}2814}28152816if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))2817mtx_unlock(&device->pdevice->mutex);28182819return result;2820}28212822static void2823shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)2824{2825assert(glsl_type_is_vector_or_scalar(type));28262827uint32_t comp_size = glsl_type_is_boolean(type)2828? 4 : glsl_get_bit_size(type) / 8;2829unsigned length = glsl_get_vector_elements(type);2830*size = comp_size * length,2831*align = comp_size * (length == 3 ? 4 : length);2832}28332834static void2835lower_cs_shared(struct nir_shader *nir)2836{2837NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,2838nir_var_mem_shared, shared_type_info);2839NIR_PASS_V(nir, nir_lower_explicit_io,2840nir_var_mem_shared, nir_address_format_32bit_offset);2841}28422843static VkResult2844pipeline_compile_compute(struct v3dv_pipeline *pipeline,2845struct v3dv_pipeline_cache *cache,2846const VkComputePipelineCreateInfo *info,2847const VkAllocationCallbacks *alloc)2848{2849struct v3dv_device *device = pipeline->device;2850struct v3dv_physical_device *physical_device =2851&device->instance->physicalDevice;28522853const VkPipelineShaderStageCreateInfo *sinfo = &info->stage;2854gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);28552856struct v3dv_pipeline_stage *p_stage =2857vk_zalloc2(&device->vk.alloc, alloc, sizeof(*p_stage), 8,2858VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);2859if (!p_stage)2860return VK_ERROR_OUT_OF_HOST_MEMORY;28612862p_stage->program_id = p_atomic_inc_return(&physical_device->next_program_id);2863p_stage->pipeline = pipeline;2864p_stage->stage = gl_shader_stage_to_broadcom(stage);2865p_stage->entrypoint = sinfo->pName;2866p_stage->module = vk_shader_module_from_handle(sinfo->module);2867p_stage->spec_info = sinfo->pSpecializationInfo;28682869pipeline_hash_shader(p_stage->module,2870p_stage->entrypoint,2871stage,2872p_stage->spec_info,2873p_stage->shader_sha1);28742875/* We try to get directly the variant first from the cache */2876p_stage->nir = NULL;28772878pipeline->cs = p_stage;2879pipeline->active_stages |= sinfo->stage;28802881struct v3dv_pipeline_key pipeline_key;2882pipeline_populate_compute_key(pipeline, &pipeline_key, info);2883unsigned char pipeline_sha1[20];2884pipeline_hash_compute(pipeline, &pipeline_key, pipeline_sha1);28852886pipeline->shared_data =2887v3dv_pipeline_cache_search_for_pipeline(cache, pipeline_sha1);28882889if (pipeline->shared_data != NULL) {2890assert(pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE]);2891goto success;2892}28932894pipeline->shared_data = v3dv_pipeline_shared_data_new_empty(pipeline_sha1,2895pipeline,2896false);28972898/* If not found on cache, compile it */2899p_stage->nir = pipeline_stage_get_nir(p_stage, pipeline, cache);2900assert(p_stage->nir);29012902st_nir_opts(p_stage->nir);2903pipeline_lower_nir(pipeline, p_stage, pipeline->layout);2904lower_cs_shared(p_stage->nir);29052906VkResult result = VK_SUCCESS;29072908struct v3d_key key;2909memset(&key, 0, sizeof(key));2910pipeline_populate_v3d_key(&key, p_stage, 0,2911pipeline->device->features.robustBufferAccess);2912pipeline->shared_data->variants[BROADCOM_SHADER_COMPUTE] =2913pipeline_compile_shader_variant(p_stage, &key, sizeof(key),2914alloc, &result);29152916if (result != VK_SUCCESS)2917return result;29182919if (!upload_assembly(pipeline))2920return VK_ERROR_OUT_OF_DEVICE_MEMORY;29212922v3dv_pipeline_cache_upload_pipeline(pipeline, cache);2923/* As we got the variants in pipeline->shared_data, after compiling we2924* don't need the pipeline_stages2925*/2926pipeline_free_stages(device, pipeline, alloc);29272928success:2929pipeline_check_spill_size(pipeline);29302931return VK_SUCCESS;2932}29332934static VkResult2935compute_pipeline_init(struct v3dv_pipeline *pipeline,2936struct v3dv_device *device,2937struct v3dv_pipeline_cache *cache,2938const VkComputePipelineCreateInfo *info,2939const VkAllocationCallbacks *alloc)2940{2941V3DV_FROM_HANDLE(v3dv_pipeline_layout, layout, info->layout);29422943pipeline->device = device;2944pipeline->layout = layout;29452946VkResult result = pipeline_compile_compute(pipeline, cache, info, alloc);29472948return result;2949}29502951static VkResult2952compute_pipeline_create(VkDevice _device,2953VkPipelineCache _cache,2954const VkComputePipelineCreateInfo *pCreateInfo,2955const VkAllocationCallbacks *pAllocator,2956VkPipeline *pPipeline)2957{2958V3DV_FROM_HANDLE(v3dv_device, device, _device);2959V3DV_FROM_HANDLE(v3dv_pipeline_cache, cache, _cache);29602961struct v3dv_pipeline *pipeline;2962VkResult result;29632964/* Use the default pipeline cache if none is specified */2965if (cache == NULL && device->instance->default_pipeline_cache_enabled)2966cache = &device->default_pipeline_cache;29672968pipeline = vk_object_zalloc(&device->vk, pAllocator, sizeof(*pipeline),2969VK_OBJECT_TYPE_PIPELINE);2970if (pipeline == NULL)2971return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);29722973result = compute_pipeline_init(pipeline, device, cache,2974pCreateInfo, pAllocator);2975if (result != VK_SUCCESS) {2976v3dv_destroy_pipeline(pipeline, device, pAllocator);2977return result;2978}29792980*pPipeline = v3dv_pipeline_to_handle(pipeline);29812982return VK_SUCCESS;2983}29842985VKAPI_ATTR VkResult VKAPI_CALL2986v3dv_CreateComputePipelines(VkDevice _device,2987VkPipelineCache pipelineCache,2988uint32_t createInfoCount,2989const VkComputePipelineCreateInfo *pCreateInfos,2990const VkAllocationCallbacks *pAllocator,2991VkPipeline *pPipelines)2992{2993V3DV_FROM_HANDLE(v3dv_device, device, _device);2994VkResult result = VK_SUCCESS;29952996if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))2997mtx_lock(&device->pdevice->mutex);29982999for (uint32_t i = 0; i < createInfoCount; i++) {3000VkResult local_result;3001local_result = compute_pipeline_create(_device,3002pipelineCache,3003&pCreateInfos[i],3004pAllocator,3005&pPipelines[i]);30063007if (local_result != VK_SUCCESS) {3008result = local_result;3009pPipelines[i] = VK_NULL_HANDLE;3010}3011}30123013if (unlikely(V3D_DEBUG & V3D_DEBUG_SHADERS))3014mtx_unlock(&device->pdevice->mutex);30153016return result;3017}301830193020