Path: blob/21.2-virgl/src/intel/vulkan/anv_pipeline.c
4547 views
/*1* Copyright © 2015 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <assert.h>24#include <stdbool.h>25#include <string.h>26#include <unistd.h>27#include <fcntl.h>2829#include "util/mesa-sha1.h"30#include "util/os_time.h"31#include "common/intel_l3_config.h"32#include "common/intel_disasm.h"33#include "common/intel_sample_positions.h"34#include "anv_private.h"35#include "compiler/brw_nir.h"36#include "compiler/brw_nir_rt.h"37#include "anv_nir.h"38#include "nir/nir_xfb_info.h"39#include "spirv/nir_spirv.h"40#include "vk_util.h"4142/* Needed for SWIZZLE macros */43#include "program/prog_instruction.h"4445// Shader functions46#define SPIR_V_MAGIC_NUMBER 0x072302034748struct anv_spirv_debug_data {49struct anv_device *device;50const struct vk_shader_module *module;51};5253static void anv_spirv_nir_debug(void *private_data,54enum nir_spirv_debug_level level,55size_t spirv_offset,56const char *message)57{58struct anv_spirv_debug_data *debug_data = private_data;59struct anv_instance *instance = debug_data->device->physical->instance;6061static const VkDebugReportFlagsEXT vk_flags[] = {62[NIR_SPIRV_DEBUG_LEVEL_INFO] = VK_DEBUG_REPORT_INFORMATION_BIT_EXT,63[NIR_SPIRV_DEBUG_LEVEL_WARNING] = VK_DEBUG_REPORT_WARNING_BIT_EXT,64[NIR_SPIRV_DEBUG_LEVEL_ERROR] = VK_DEBUG_REPORT_ERROR_BIT_EXT,65};66char buffer[256];6768snprintf(buffer, sizeof(buffer), "SPIR-V offset %lu: %s", (unsigned long) spirv_offset, message);6970vk_debug_report(&instance->vk, vk_flags[level],71&debug_data->module->base,720, 0, "anv", buffer);73}7475/* Eventually, this will become part of anv_CreateShader. Unfortunately,76* we can't do that yet because we don't have the ability to copy nir.77*/78static nir_shader *79anv_shader_compile_to_nir(struct anv_device *device,80void *mem_ctx,81const struct vk_shader_module *module,82const char *entrypoint_name,83gl_shader_stage stage,84const VkSpecializationInfo *spec_info)85{86const struct anv_physical_device *pdevice = device->physical;87const struct brw_compiler *compiler = pdevice->compiler;88const nir_shader_compiler_options *nir_options =89compiler->glsl_compiler_options[stage].NirOptions;9091uint32_t *spirv = (uint32_t *) module->data;92assert(spirv[0] == SPIR_V_MAGIC_NUMBER);93assert(module->size % 4 == 0);9495uint32_t num_spec_entries = 0;96struct nir_spirv_specialization *spec_entries = NULL;97if (spec_info && spec_info->mapEntryCount > 0) {98num_spec_entries = spec_info->mapEntryCount;99spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));100for (uint32_t i = 0; i < num_spec_entries; i++) {101VkSpecializationMapEntry entry = spec_info->pMapEntries[i];102const void *data = spec_info->pData + entry.offset;103assert(data + entry.size <= spec_info->pData + spec_info->dataSize);104105spec_entries[i].id = spec_info->pMapEntries[i].constantID;106switch (entry.size) {107case 8:108spec_entries[i].value.u64 = *(const uint64_t *)data;109break;110case 4:111spec_entries[i].value.u32 = *(const uint32_t *)data;112break;113case 2:114spec_entries[i].value.u16 = *(const uint16_t *)data;115break;116case 1:117spec_entries[i].value.u8 = *(const uint8_t *)data;118break;119default:120assert(!"Invalid spec constant size");121break;122}123}124}125126struct anv_spirv_debug_data spirv_debug_data = {127.device = device,128.module = module,129};130struct spirv_to_nir_options spirv_options = {131.frag_coord_is_sysval = true,132.caps = {133.demote_to_helper_invocation = true,134.derivative_group = true,135.descriptor_array_dynamic_indexing = true,136.descriptor_array_non_uniform_indexing = true,137.descriptor_indexing = true,138.device_group = true,139.draw_parameters = true,140.float16 = pdevice->info.ver >= 8,141.float64 = pdevice->info.ver >= 8,142.fragment_shader_sample_interlock = pdevice->info.ver >= 9,143.fragment_shader_pixel_interlock = pdevice->info.ver >= 9,144.geometry_streams = true,145.image_write_without_format = true,146.int8 = pdevice->info.ver >= 8,147.int16 = pdevice->info.ver >= 8,148.int64 = pdevice->info.ver >= 8,149.int64_atomics = pdevice->info.ver >= 9 && pdevice->use_softpin,150.integer_functions2 = pdevice->info.ver >= 8,151.min_lod = true,152.multiview = true,153.physical_storage_buffer_address = pdevice->has_a64_buffer_access,154.post_depth_coverage = pdevice->info.ver >= 9,155.runtime_descriptor_array = true,156.float_controls = pdevice->info.ver >= 8,157.ray_tracing = pdevice->info.has_ray_tracing,158.shader_clock = true,159.shader_viewport_index_layer = true,160.stencil_export = pdevice->info.ver >= 9,161.storage_8bit = pdevice->info.ver >= 8,162.storage_16bit = pdevice->info.ver >= 8,163.subgroup_arithmetic = true,164.subgroup_basic = true,165.subgroup_ballot = true,166.subgroup_quad = true,167.subgroup_uniform_control_flow = true,168.subgroup_shuffle = true,169.subgroup_vote = true,170.tessellation = true,171.transform_feedback = pdevice->info.ver >= 8,172.variable_pointers = true,173.vk_memory_model = true,174.vk_memory_model_device_scope = true,175.workgroup_memory_explicit_layout = true,176.fragment_shading_rate = pdevice->info.ver >= 11,177},178.ubo_addr_format =179anv_nir_ubo_addr_format(pdevice, device->robust_buffer_access),180.ssbo_addr_format =181anv_nir_ssbo_addr_format(pdevice, device->robust_buffer_access),182.phys_ssbo_addr_format = nir_address_format_64bit_global,183.push_const_addr_format = nir_address_format_logical,184185/* TODO: Consider changing this to an address format that has the NULL186* pointer equals to 0. That might be a better format to play nice187* with certain code / code generators.188*/189.shared_addr_format = nir_address_format_32bit_offset,190.debug = {191.func = anv_spirv_nir_debug,192.private_data = &spirv_debug_data,193},194};195196197nir_shader *nir =198spirv_to_nir(spirv, module->size / 4,199spec_entries, num_spec_entries,200stage, entrypoint_name, &spirv_options, nir_options);201if (!nir) {202free(spec_entries);203return NULL;204}205206assert(nir->info.stage == stage);207nir_validate_shader(nir, "after spirv_to_nir");208nir_validate_ssa_dominance(nir, "after spirv_to_nir");209ralloc_steal(mem_ctx, nir);210211free(spec_entries);212213if (INTEL_DEBUG & intel_debug_flag_for_shader_stage(stage)) {214fprintf(stderr, "NIR (from SPIR-V) for %s shader:\n",215gl_shader_stage_name(stage));216nir_print_shader(nir, stderr);217}218219/* We have to lower away local constant initializers right before we220* inline functions. That way they get properly initialized at the top221* of the function and not at the top of its caller.222*/223NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);224NIR_PASS_V(nir, nir_lower_returns);225NIR_PASS_V(nir, nir_inline_functions);226NIR_PASS_V(nir, nir_copy_prop);227NIR_PASS_V(nir, nir_opt_deref);228229/* Pick off the single entrypoint that we want */230foreach_list_typed_safe(nir_function, func, node, &nir->functions) {231if (!func->is_entrypoint)232exec_node_remove(&func->node);233}234assert(exec_list_length(&nir->functions) == 1);235236/* Now that we've deleted all but the main function, we can go ahead and237* lower the rest of the constant initializers. We do this here so that238* nir_remove_dead_variables and split_per_member_structs below see the239* corresponding stores.240*/241NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);242243/* Split member structs. We do this before lower_io_to_temporaries so that244* it doesn't lower system values to temporaries by accident.245*/246NIR_PASS_V(nir, nir_split_var_copies);247NIR_PASS_V(nir, nir_split_per_member_structs);248249NIR_PASS_V(nir, nir_remove_dead_variables,250nir_var_shader_in | nir_var_shader_out | nir_var_system_value |251nir_var_shader_call_data | nir_var_ray_hit_attrib,252NULL);253254NIR_PASS_V(nir, nir_propagate_invariant, false);255NIR_PASS_V(nir, nir_lower_io_to_temporaries,256nir_shader_get_entrypoint(nir), true, false);257258NIR_PASS_V(nir, nir_lower_frexp);259260/* Vulkan uses the separate-shader linking model */261nir->info.separate_shader = true;262263brw_preprocess_nir(compiler, nir, NULL);264265return nir;266}267268VkResult269anv_pipeline_init(struct anv_pipeline *pipeline,270struct anv_device *device,271enum anv_pipeline_type type,272VkPipelineCreateFlags flags,273const VkAllocationCallbacks *pAllocator)274{275VkResult result;276277memset(pipeline, 0, sizeof(*pipeline));278279vk_object_base_init(&device->vk, &pipeline->base,280VK_OBJECT_TYPE_PIPELINE);281pipeline->device = device;282283/* It's the job of the child class to provide actual backing storage for284* the batch by setting batch.start, batch.next, and batch.end.285*/286pipeline->batch.alloc = pAllocator ? pAllocator : &device->vk.alloc;287pipeline->batch.relocs = &pipeline->batch_relocs;288pipeline->batch.status = VK_SUCCESS;289290result = anv_reloc_list_init(&pipeline->batch_relocs,291pipeline->batch.alloc);292if (result != VK_SUCCESS)293return result;294295pipeline->mem_ctx = ralloc_context(NULL);296297pipeline->type = type;298pipeline->flags = flags;299300util_dynarray_init(&pipeline->executables, pipeline->mem_ctx);301302return VK_SUCCESS;303}304305void306anv_pipeline_finish(struct anv_pipeline *pipeline,307struct anv_device *device,308const VkAllocationCallbacks *pAllocator)309{310anv_reloc_list_finish(&pipeline->batch_relocs,311pAllocator ? pAllocator : &device->vk.alloc);312ralloc_free(pipeline->mem_ctx);313vk_object_base_finish(&pipeline->base);314}315316void anv_DestroyPipeline(317VkDevice _device,318VkPipeline _pipeline,319const VkAllocationCallbacks* pAllocator)320{321ANV_FROM_HANDLE(anv_device, device, _device);322ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);323324if (!pipeline)325return;326327switch (pipeline->type) {328case ANV_PIPELINE_GRAPHICS: {329struct anv_graphics_pipeline *gfx_pipeline =330anv_pipeline_to_graphics(pipeline);331332if (gfx_pipeline->blend_state.map)333anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->blend_state);334if (gfx_pipeline->cps_state.map)335anv_state_pool_free(&device->dynamic_state_pool, gfx_pipeline->cps_state);336337for (unsigned s = 0; s < ARRAY_SIZE(gfx_pipeline->shaders); s++) {338if (gfx_pipeline->shaders[s])339anv_shader_bin_unref(device, gfx_pipeline->shaders[s]);340}341break;342}343344case ANV_PIPELINE_COMPUTE: {345struct anv_compute_pipeline *compute_pipeline =346anv_pipeline_to_compute(pipeline);347348if (compute_pipeline->cs)349anv_shader_bin_unref(device, compute_pipeline->cs);350351break;352}353354case ANV_PIPELINE_RAY_TRACING: {355struct anv_ray_tracing_pipeline *rt_pipeline =356anv_pipeline_to_ray_tracing(pipeline);357358util_dynarray_foreach(&rt_pipeline->shaders,359struct anv_shader_bin *, shader) {360anv_shader_bin_unref(device, *shader);361}362break;363}364365default:366unreachable("invalid pipeline type");367}368369anv_pipeline_finish(pipeline, device, pAllocator);370vk_free2(&device->vk.alloc, pAllocator, pipeline);371}372373static const uint32_t vk_to_intel_primitive_type[] = {374[VK_PRIMITIVE_TOPOLOGY_POINT_LIST] = _3DPRIM_POINTLIST,375[VK_PRIMITIVE_TOPOLOGY_LINE_LIST] = _3DPRIM_LINELIST,376[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP] = _3DPRIM_LINESTRIP,377[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST] = _3DPRIM_TRILIST,378[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP] = _3DPRIM_TRISTRIP,379[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_FAN] = _3DPRIM_TRIFAN,380[VK_PRIMITIVE_TOPOLOGY_LINE_LIST_WITH_ADJACENCY] = _3DPRIM_LINELIST_ADJ,381[VK_PRIMITIVE_TOPOLOGY_LINE_STRIP_WITH_ADJACENCY] = _3DPRIM_LINESTRIP_ADJ,382[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST_WITH_ADJACENCY] = _3DPRIM_TRILIST_ADJ,383[VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP_WITH_ADJACENCY] = _3DPRIM_TRISTRIP_ADJ,384};385386static void387populate_sampler_prog_key(const struct intel_device_info *devinfo,388struct brw_sampler_prog_key_data *key)389{390/* Almost all multisampled textures are compressed. The only time when we391* don't compress a multisampled texture is for 16x MSAA with a surface392* width greater than 8k which is a bit of an edge case. Since the sampler393* just ignores the MCS parameter to ld2ms when MCS is disabled, it's safe394* to tell the compiler to always assume compression.395*/396key->compressed_multisample_layout_mask = ~0;397398/* SkyLake added support for 16x MSAA. With this came a new message for399* reading from a 16x MSAA surface with compression. The new message was400* needed because now the MCS data is 64 bits instead of 32 or lower as is401* the case for 8x, 4x, and 2x. The key->msaa_16 bit-field controls which402* message we use. Fortunately, the 16x message works for 8x, 4x, and 2x403* so we can just use it unconditionally. This may not be quite as404* efficient but it saves us from recompiling.405*/406if (devinfo->ver >= 9)407key->msaa_16 = ~0;408409/* XXX: Handle texture swizzle on HSW- */410for (int i = 0; i < MAX_SAMPLERS; i++) {411/* Assume color sampler, no swizzling. (Works for BDW+) */412key->swizzles[i] = SWIZZLE_XYZW;413}414}415416static void417populate_base_prog_key(const struct intel_device_info *devinfo,418VkPipelineShaderStageCreateFlags flags,419bool robust_buffer_acccess,420struct brw_base_prog_key *key)421{422if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)423key->subgroup_size_type = BRW_SUBGROUP_SIZE_VARYING;424else425key->subgroup_size_type = BRW_SUBGROUP_SIZE_API_CONSTANT;426427key->robust_buffer_access = robust_buffer_acccess;428429populate_sampler_prog_key(devinfo, &key->tex);430}431432static void433populate_vs_prog_key(const struct intel_device_info *devinfo,434VkPipelineShaderStageCreateFlags flags,435bool robust_buffer_acccess,436struct brw_vs_prog_key *key)437{438memset(key, 0, sizeof(*key));439440populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);441442/* XXX: Handle vertex input work-arounds */443444/* XXX: Handle sampler_prog_key */445}446447static void448populate_tcs_prog_key(const struct intel_device_info *devinfo,449VkPipelineShaderStageCreateFlags flags,450bool robust_buffer_acccess,451unsigned input_vertices,452struct brw_tcs_prog_key *key)453{454memset(key, 0, sizeof(*key));455456populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);457458key->input_vertices = input_vertices;459}460461static void462populate_tes_prog_key(const struct intel_device_info *devinfo,463VkPipelineShaderStageCreateFlags flags,464bool robust_buffer_acccess,465struct brw_tes_prog_key *key)466{467memset(key, 0, sizeof(*key));468469populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);470}471472static void473populate_gs_prog_key(const struct intel_device_info *devinfo,474VkPipelineShaderStageCreateFlags flags,475bool robust_buffer_acccess,476struct brw_gs_prog_key *key)477{478memset(key, 0, sizeof(*key));479480populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);481}482483static bool484pipeline_has_coarse_pixel(const struct anv_graphics_pipeline *pipeline,485const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info)486{487if (pipeline->sample_shading_enable)488return false;489490/* Not dynamic & not specified for the pipeline. */491if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 && !fsr_info)492return false;493494/* Not dynamic & pipeline has a 1x1 fragment shading rate with no495* possibility for element of the pipeline to change the value.496*/497if ((pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE) == 0 &&498fsr_info->fragmentSize.width <= 1 &&499fsr_info->fragmentSize.height <= 1 &&500fsr_info->combinerOps[0] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR &&501fsr_info->combinerOps[1] == VK_FRAGMENT_SHADING_RATE_COMBINER_OP_KEEP_KHR)502return false;503504return true;505}506507static void508populate_wm_prog_key(const struct anv_graphics_pipeline *pipeline,509VkPipelineShaderStageCreateFlags flags,510bool robust_buffer_acccess,511const struct anv_subpass *subpass,512const VkPipelineMultisampleStateCreateInfo *ms_info,513const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_info,514struct brw_wm_prog_key *key)515{516const struct anv_device *device = pipeline->base.device;517const struct intel_device_info *devinfo = &device->info;518519memset(key, 0, sizeof(*key));520521populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);522523/* We set this to 0 here and set to the actual value before we call524* brw_compile_fs.525*/526key->input_slots_valid = 0;527528/* Vulkan doesn't specify a default */529key->high_quality_derivatives = false;530531/* XXX Vulkan doesn't appear to specify */532key->clamp_fragment_color = false;533534key->ignore_sample_mask_out = false;535536assert(subpass->color_count <= MAX_RTS);537for (uint32_t i = 0; i < subpass->color_count; i++) {538if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)539key->color_outputs_valid |= (1 << i);540}541542key->nr_color_regions = subpass->color_count;543544/* To reduce possible shader recompilations we would need to know if545* there is a SampleMask output variable to compute if we should emit546* code to workaround the issue that hardware disables alpha to coverage547* when there is SampleMask output.548*/549key->alpha_to_coverage = ms_info && ms_info->alphaToCoverageEnable;550551/* Vulkan doesn't support fixed-function alpha test */552key->alpha_test_replicate_alpha = false;553554if (ms_info) {555/* We should probably pull this out of the shader, but it's fairly556* harmless to compute it and then let dead-code take care of it.557*/558if (ms_info->rasterizationSamples > 1) {559key->persample_interp = ms_info->sampleShadingEnable &&560(ms_info->minSampleShading * ms_info->rasterizationSamples) > 1;561key->multisample_fbo = true;562}563564key->frag_coord_adds_sample_pos = key->persample_interp;565}566567key->coarse_pixel =568device->vk.enabled_extensions.KHR_fragment_shading_rate &&569pipeline_has_coarse_pixel(pipeline, fsr_info);570}571572static void573populate_cs_prog_key(const struct intel_device_info *devinfo,574VkPipelineShaderStageCreateFlags flags,575bool robust_buffer_acccess,576const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info,577struct brw_cs_prog_key *key)578{579memset(key, 0, sizeof(*key));580581populate_base_prog_key(devinfo, flags, robust_buffer_acccess, &key->base);582583if (rss_info) {584assert(key->base.subgroup_size_type != BRW_SUBGROUP_SIZE_VARYING);585586/* These enum values are expressly chosen to be equal to the subgroup587* size that they require.588*/589assert(rss_info->requiredSubgroupSize == 8 ||590rss_info->requiredSubgroupSize == 16 ||591rss_info->requiredSubgroupSize == 32);592key->base.subgroup_size_type = rss_info->requiredSubgroupSize;593} else if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_REQUIRE_FULL_SUBGROUPS_BIT_EXT) {594/* If the client expressly requests full subgroups and they don't595* specify a subgroup size, we need to pick one. If they're requested596* varying subgroup sizes, we set it to UNIFORM and let the back-end597* compiler pick. Otherwise, we specify the API value of 32.598* Performance will likely be terrible in this case but there's nothing599* we can do about that. The client should have chosen a size.600*/601if (flags & VK_PIPELINE_SHADER_STAGE_CREATE_ALLOW_VARYING_SUBGROUP_SIZE_BIT_EXT)602key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_UNIFORM;603else604key->base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_32;605}606}607608static void609populate_bs_prog_key(const struct intel_device_info *devinfo,610VkPipelineShaderStageCreateFlags flags,611bool robust_buffer_access,612struct brw_bs_prog_key *key)613{614memset(key, 0, sizeof(*key));615616populate_base_prog_key(devinfo, flags, robust_buffer_access, &key->base);617}618619struct anv_pipeline_stage {620gl_shader_stage stage;621622const struct vk_shader_module *module;623const char *entrypoint;624const VkSpecializationInfo *spec_info;625626unsigned char shader_sha1[20];627628union brw_any_prog_key key;629630struct {631gl_shader_stage stage;632unsigned char sha1[20];633} cache_key;634635nir_shader *nir;636637struct anv_pipeline_binding surface_to_descriptor[256];638struct anv_pipeline_binding sampler_to_descriptor[256];639struct anv_pipeline_bind_map bind_map;640641union brw_any_prog_data prog_data;642643uint32_t num_stats;644struct brw_compile_stats stats[3];645char *disasm[3];646647VkPipelineCreationFeedbackEXT feedback;648649const unsigned *code;650651struct anv_shader_bin *bin;652};653654static void655anv_pipeline_hash_shader(const struct vk_shader_module *module,656const char *entrypoint,657gl_shader_stage stage,658const VkSpecializationInfo *spec_info,659unsigned char *sha1_out)660{661struct mesa_sha1 ctx;662_mesa_sha1_init(&ctx);663664_mesa_sha1_update(&ctx, module->sha1, sizeof(module->sha1));665_mesa_sha1_update(&ctx, entrypoint, strlen(entrypoint));666_mesa_sha1_update(&ctx, &stage, sizeof(stage));667if (spec_info) {668_mesa_sha1_update(&ctx, spec_info->pMapEntries,669spec_info->mapEntryCount *670sizeof(*spec_info->pMapEntries));671_mesa_sha1_update(&ctx, spec_info->pData,672spec_info->dataSize);673}674675_mesa_sha1_final(&ctx, sha1_out);676}677678static void679anv_pipeline_hash_graphics(struct anv_graphics_pipeline *pipeline,680struct anv_pipeline_layout *layout,681struct anv_pipeline_stage *stages,682unsigned char *sha1_out)683{684struct mesa_sha1 ctx;685_mesa_sha1_init(&ctx);686687_mesa_sha1_update(&ctx, &pipeline->subpass->view_mask,688sizeof(pipeline->subpass->view_mask));689690if (layout)691_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));692693const bool rba = pipeline->base.device->robust_buffer_access;694_mesa_sha1_update(&ctx, &rba, sizeof(rba));695696for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {697if (stages[s].entrypoint) {698_mesa_sha1_update(&ctx, stages[s].shader_sha1,699sizeof(stages[s].shader_sha1));700_mesa_sha1_update(&ctx, &stages[s].key, brw_prog_key_size(s));701}702}703704_mesa_sha1_final(&ctx, sha1_out);705}706707static void708anv_pipeline_hash_compute(struct anv_compute_pipeline *pipeline,709struct anv_pipeline_layout *layout,710struct anv_pipeline_stage *stage,711unsigned char *sha1_out)712{713struct mesa_sha1 ctx;714_mesa_sha1_init(&ctx);715716if (layout)717_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));718719const bool rba = pipeline->base.device->robust_buffer_access;720_mesa_sha1_update(&ctx, &rba, sizeof(rba));721722_mesa_sha1_update(&ctx, stage->shader_sha1,723sizeof(stage->shader_sha1));724_mesa_sha1_update(&ctx, &stage->key.cs, sizeof(stage->key.cs));725726_mesa_sha1_final(&ctx, sha1_out);727}728729static void730anv_pipeline_hash_ray_tracing_shader(struct anv_ray_tracing_pipeline *pipeline,731struct anv_pipeline_layout *layout,732struct anv_pipeline_stage *stage,733unsigned char *sha1_out)734{735struct mesa_sha1 ctx;736_mesa_sha1_init(&ctx);737738if (layout != NULL)739_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));740741const bool rba = pipeline->base.device->robust_buffer_access;742_mesa_sha1_update(&ctx, &rba, sizeof(rba));743744_mesa_sha1_update(&ctx, stage->shader_sha1, sizeof(stage->shader_sha1));745_mesa_sha1_update(&ctx, &stage->key, sizeof(stage->key.bs));746747_mesa_sha1_final(&ctx, sha1_out);748}749750static void751anv_pipeline_hash_ray_tracing_combined_shader(struct anv_ray_tracing_pipeline *pipeline,752struct anv_pipeline_layout *layout,753struct anv_pipeline_stage *intersection,754struct anv_pipeline_stage *any_hit,755unsigned char *sha1_out)756{757struct mesa_sha1 ctx;758_mesa_sha1_init(&ctx);759760if (layout != NULL)761_mesa_sha1_update(&ctx, layout->sha1, sizeof(layout->sha1));762763const bool rba = pipeline->base.device->robust_buffer_access;764_mesa_sha1_update(&ctx, &rba, sizeof(rba));765766_mesa_sha1_update(&ctx, intersection->shader_sha1, sizeof(intersection->shader_sha1));767_mesa_sha1_update(&ctx, &intersection->key, sizeof(intersection->key.bs));768_mesa_sha1_update(&ctx, any_hit->shader_sha1, sizeof(any_hit->shader_sha1));769_mesa_sha1_update(&ctx, &any_hit->key, sizeof(any_hit->key.bs));770771_mesa_sha1_final(&ctx, sha1_out);772}773774static nir_shader *775anv_pipeline_stage_get_nir(struct anv_pipeline *pipeline,776struct anv_pipeline_cache *cache,777void *mem_ctx,778struct anv_pipeline_stage *stage)779{780const struct brw_compiler *compiler =781pipeline->device->physical->compiler;782const nir_shader_compiler_options *nir_options =783compiler->glsl_compiler_options[stage->stage].NirOptions;784nir_shader *nir;785786nir = anv_device_search_for_nir(pipeline->device, cache,787nir_options,788stage->shader_sha1,789mem_ctx);790if (nir) {791assert(nir->info.stage == stage->stage);792return nir;793}794795nir = anv_shader_compile_to_nir(pipeline->device,796mem_ctx,797stage->module,798stage->entrypoint,799stage->stage,800stage->spec_info);801if (nir) {802anv_device_upload_nir(pipeline->device, cache, nir, stage->shader_sha1);803return nir;804}805806return NULL;807}808809static void810shared_type_info(const struct glsl_type *type, unsigned *size, unsigned *align)811{812assert(glsl_type_is_vector_or_scalar(type));813814uint32_t comp_size = glsl_type_is_boolean(type)815? 4 : glsl_get_bit_size(type) / 8;816unsigned length = glsl_get_vector_elements(type);817*size = comp_size * length,818*align = comp_size * (length == 3 ? 4 : length);819}820821static void822anv_pipeline_lower_nir(struct anv_pipeline *pipeline,823void *mem_ctx,824struct anv_pipeline_stage *stage,825struct anv_pipeline_layout *layout)826{827const struct anv_physical_device *pdevice = pipeline->device->physical;828const struct brw_compiler *compiler = pdevice->compiler;829830struct brw_stage_prog_data *prog_data = &stage->prog_data.base;831nir_shader *nir = stage->nir;832833if (nir->info.stage == MESA_SHADER_FRAGMENT) {834/* Check if sample shading is enabled in the shader and toggle835* it on for the pipeline independent if sampleShadingEnable is set.836*/837nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));838if (nir->info.fs.uses_sample_shading)839anv_pipeline_to_graphics(pipeline)->sample_shading_enable = true;840841NIR_PASS_V(nir, nir_lower_wpos_center,842anv_pipeline_to_graphics(pipeline)->sample_shading_enable);843NIR_PASS_V(nir, nir_lower_input_attachments,844&(nir_input_attachment_options) {845.use_fragcoord_sysval = true,846.use_layer_id_sysval = true,847});848}849850NIR_PASS_V(nir, anv_nir_lower_ycbcr_textures, layout);851852if (pipeline->type == ANV_PIPELINE_GRAPHICS) {853NIR_PASS_V(nir, anv_nir_lower_multiview,854anv_pipeline_to_graphics(pipeline));855}856857nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));858859NIR_PASS_V(nir, brw_nir_lower_image_load_store, compiler->devinfo, NULL);860861NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_global,862nir_address_format_64bit_global);863NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,864nir_address_format_32bit_offset);865866/* Apply the actual pipeline layout to UBOs, SSBOs, and textures */867anv_nir_apply_pipeline_layout(pdevice,868pipeline->device->robust_buffer_access,869layout, nir, &stage->bind_map);870871NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ubo,872anv_nir_ubo_addr_format(pdevice,873pipeline->device->robust_buffer_access));874NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_ssbo,875anv_nir_ssbo_addr_format(pdevice,876pipeline->device->robust_buffer_access));877878/* First run copy-prop to get rid of all of the vec() that address879* calculations often create and then constant-fold so that, when we880* get to anv_nir_lower_ubo_loads, we can detect constant offsets.881*/882NIR_PASS_V(nir, nir_copy_prop);883NIR_PASS_V(nir, nir_opt_constant_folding);884885NIR_PASS_V(nir, anv_nir_lower_ubo_loads);886887/* We don't support non-uniform UBOs and non-uniform SSBO access is888* handled naturally by falling back to A64 messages.889*/890NIR_PASS_V(nir, nir_lower_non_uniform_access,891&(nir_lower_non_uniform_access_options) {892.types = nir_lower_non_uniform_texture_access |893nir_lower_non_uniform_image_access,894.callback = NULL,895});896897anv_nir_compute_push_layout(pdevice, pipeline->device->robust_buffer_access,898nir, prog_data, &stage->bind_map, mem_ctx);899900if (gl_shader_stage_uses_workgroup(nir->info.stage)) {901if (!nir->info.shared_memory_explicit_layout) {902NIR_PASS_V(nir, nir_lower_vars_to_explicit_types,903nir_var_mem_shared, shared_type_info);904}905906NIR_PASS_V(nir, nir_lower_explicit_io,907nir_var_mem_shared, nir_address_format_32bit_offset);908909if (nir->info.zero_initialize_shared_memory &&910nir->info.shared_size > 0) {911/* The effective Shared Local Memory size is at least 1024 bytes and912* is always rounded to a power of two, so it is OK to align the size913* used by the shader to chunk_size -- which does simplify the logic.914*/915const unsigned chunk_size = 16;916const unsigned shared_size = ALIGN(nir->info.shared_size, chunk_size);917assert(shared_size <=918intel_calculate_slm_size(compiler->devinfo->ver, nir->info.shared_size));919920NIR_PASS_V(nir, nir_zero_initialize_shared_memory,921shared_size, chunk_size);922}923}924925stage->nir = nir;926}927928static void929anv_pipeline_link_vs(const struct brw_compiler *compiler,930struct anv_pipeline_stage *vs_stage,931struct anv_pipeline_stage *next_stage)932{933if (next_stage)934brw_nir_link_shaders(compiler, vs_stage->nir, next_stage->nir);935}936937static void938anv_pipeline_compile_vs(const struct brw_compiler *compiler,939void *mem_ctx,940struct anv_graphics_pipeline *pipeline,941struct anv_pipeline_stage *vs_stage)942{943/* When using Primitive Replication for multiview, each view gets its own944* position slot.945*/946uint32_t pos_slots = pipeline->use_primitive_replication ?947anv_subpass_view_count(pipeline->subpass) : 1;948949brw_compute_vue_map(compiler->devinfo,950&vs_stage->prog_data.vs.base.vue_map,951vs_stage->nir->info.outputs_written,952vs_stage->nir->info.separate_shader,953pos_slots);954955vs_stage->num_stats = 1;956957struct brw_compile_vs_params params = {958.nir = vs_stage->nir,959.key = &vs_stage->key.vs,960.prog_data = &vs_stage->prog_data.vs,961.stats = vs_stage->stats,962.log_data = pipeline->base.device,963};964965vs_stage->code = brw_compile_vs(compiler, mem_ctx, ¶ms);966}967968static void969merge_tess_info(struct shader_info *tes_info,970const struct shader_info *tcs_info)971{972/* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:973*974* "PointMode. Controls generation of points rather than triangles975* or lines. This functionality defaults to disabled, and is976* enabled if either shader stage includes the execution mode.977*978* and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,979* PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,980* and OutputVertices, it says:981*982* "One mode must be set in at least one of the tessellation983* shader stages."984*985* So, the fields can be set in either the TCS or TES, but they must986* agree if set in both. Our backend looks at TES, so bitwise-or in987* the values from the TCS.988*/989assert(tcs_info->tess.tcs_vertices_out == 0 ||990tes_info->tess.tcs_vertices_out == 0 ||991tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);992tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;993994assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||995tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||996tcs_info->tess.spacing == tes_info->tess.spacing);997tes_info->tess.spacing |= tcs_info->tess.spacing;998999assert(tcs_info->tess.primitive_mode == 0 ||1000tes_info->tess.primitive_mode == 0 ||1001tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);1002tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;1003tes_info->tess.ccw |= tcs_info->tess.ccw;1004tes_info->tess.point_mode |= tcs_info->tess.point_mode;1005}10061007static void1008anv_pipeline_link_tcs(const struct brw_compiler *compiler,1009struct anv_pipeline_stage *tcs_stage,1010struct anv_pipeline_stage *tes_stage)1011{1012assert(tes_stage && tes_stage->stage == MESA_SHADER_TESS_EVAL);10131014brw_nir_link_shaders(compiler, tcs_stage->nir, tes_stage->nir);10151016nir_lower_patch_vertices(tes_stage->nir,1017tcs_stage->nir->info.tess.tcs_vertices_out,1018NULL);10191020/* Copy TCS info into the TES info */1021merge_tess_info(&tes_stage->nir->info, &tcs_stage->nir->info);10221023/* Whacking the key after cache lookup is a bit sketchy, but all of1024* this comes from the SPIR-V, which is part of the hash used for the1025* pipeline cache. So it should be safe.1026*/1027tcs_stage->key.tcs.tes_primitive_mode =1028tes_stage->nir->info.tess.primitive_mode;1029tcs_stage->key.tcs.quads_workaround =1030compiler->devinfo->ver < 9 &&1031tes_stage->nir->info.tess.primitive_mode == 7 /* GL_QUADS */ &&1032tes_stage->nir->info.tess.spacing == TESS_SPACING_EQUAL;1033}10341035static void1036anv_pipeline_compile_tcs(const struct brw_compiler *compiler,1037void *mem_ctx,1038struct anv_device *device,1039struct anv_pipeline_stage *tcs_stage,1040struct anv_pipeline_stage *prev_stage)1041{1042tcs_stage->key.tcs.outputs_written =1043tcs_stage->nir->info.outputs_written;1044tcs_stage->key.tcs.patch_outputs_written =1045tcs_stage->nir->info.patch_outputs_written;10461047tcs_stage->num_stats = 1;1048tcs_stage->code = brw_compile_tcs(compiler, device, mem_ctx,1049&tcs_stage->key.tcs,1050&tcs_stage->prog_data.tcs,1051tcs_stage->nir, -1,1052tcs_stage->stats, NULL);1053}10541055static void1056anv_pipeline_link_tes(const struct brw_compiler *compiler,1057struct anv_pipeline_stage *tes_stage,1058struct anv_pipeline_stage *next_stage)1059{1060if (next_stage)1061brw_nir_link_shaders(compiler, tes_stage->nir, next_stage->nir);1062}10631064static void1065anv_pipeline_compile_tes(const struct brw_compiler *compiler,1066void *mem_ctx,1067struct anv_device *device,1068struct anv_pipeline_stage *tes_stage,1069struct anv_pipeline_stage *tcs_stage)1070{1071tes_stage->key.tes.inputs_read =1072tcs_stage->nir->info.outputs_written;1073tes_stage->key.tes.patch_inputs_read =1074tcs_stage->nir->info.patch_outputs_written;10751076tes_stage->num_stats = 1;1077tes_stage->code = brw_compile_tes(compiler, device, mem_ctx,1078&tes_stage->key.tes,1079&tcs_stage->prog_data.tcs.base.vue_map,1080&tes_stage->prog_data.tes,1081tes_stage->nir, -1,1082tes_stage->stats, NULL);1083}10841085static void1086anv_pipeline_link_gs(const struct brw_compiler *compiler,1087struct anv_pipeline_stage *gs_stage,1088struct anv_pipeline_stage *next_stage)1089{1090if (next_stage)1091brw_nir_link_shaders(compiler, gs_stage->nir, next_stage->nir);1092}10931094static void1095anv_pipeline_compile_gs(const struct brw_compiler *compiler,1096void *mem_ctx,1097struct anv_device *device,1098struct anv_pipeline_stage *gs_stage,1099struct anv_pipeline_stage *prev_stage)1100{1101brw_compute_vue_map(compiler->devinfo,1102&gs_stage->prog_data.gs.base.vue_map,1103gs_stage->nir->info.outputs_written,1104gs_stage->nir->info.separate_shader, 1);11051106gs_stage->num_stats = 1;1107gs_stage->code = brw_compile_gs(compiler, device, mem_ctx,1108&gs_stage->key.gs,1109&gs_stage->prog_data.gs,1110gs_stage->nir, -1,1111gs_stage->stats, NULL);1112}11131114static void1115anv_pipeline_link_fs(const struct brw_compiler *compiler,1116struct anv_pipeline_stage *stage)1117{1118unsigned num_rt_bindings;1119struct anv_pipeline_binding rt_bindings[MAX_RTS];1120if (stage->key.wm.nr_color_regions > 0) {1121assert(stage->key.wm.nr_color_regions <= MAX_RTS);1122for (unsigned rt = 0; rt < stage->key.wm.nr_color_regions; rt++) {1123if (stage->key.wm.color_outputs_valid & BITFIELD_BIT(rt)) {1124rt_bindings[rt] = (struct anv_pipeline_binding) {1125.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,1126.index = rt,1127};1128} else {1129/* Setup a null render target */1130rt_bindings[rt] = (struct anv_pipeline_binding) {1131.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,1132.index = UINT32_MAX,1133};1134}1135}1136num_rt_bindings = stage->key.wm.nr_color_regions;1137} else {1138/* Setup a null render target */1139rt_bindings[0] = (struct anv_pipeline_binding) {1140.set = ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS,1141.index = UINT32_MAX,1142};1143num_rt_bindings = 1;1144}11451146assert(num_rt_bindings <= MAX_RTS);1147assert(stage->bind_map.surface_count == 0);1148typed_memcpy(stage->bind_map.surface_to_descriptor,1149rt_bindings, num_rt_bindings);1150stage->bind_map.surface_count += num_rt_bindings;11511152/* Now that we've set up the color attachments, we can go through and1153* eliminate any shader outputs that map to VK_ATTACHMENT_UNUSED in the1154* hopes that dead code can clean them up in this and any earlier shader1155* stages.1156*/1157nir_function_impl *impl = nir_shader_get_entrypoint(stage->nir);1158bool deleted_output = false;1159nir_foreach_shader_out_variable_safe(var, stage->nir) {1160/* TODO: We don't delete depth/stencil writes. We probably could if the1161* subpass doesn't have a depth/stencil attachment.1162*/1163if (var->data.location < FRAG_RESULT_DATA0)1164continue;11651166const unsigned rt = var->data.location - FRAG_RESULT_DATA0;11671168/* If this is the RT at location 0 and we have alpha to coverage1169* enabled we still need that write because it will affect the coverage1170* mask even if it's never written to a color target.1171*/1172if (rt == 0 && stage->key.wm.alpha_to_coverage)1173continue;11741175const unsigned array_len =1176glsl_type_is_array(var->type) ? glsl_get_length(var->type) : 1;1177assert(rt + array_len <= MAX_RTS);11781179if (rt >= MAX_RTS || !(stage->key.wm.color_outputs_valid &1180BITFIELD_RANGE(rt, array_len))) {1181deleted_output = true;1182var->data.mode = nir_var_function_temp;1183exec_node_remove(&var->node);1184exec_list_push_tail(&impl->locals, &var->node);1185}1186}11871188if (deleted_output)1189nir_fixup_deref_modes(stage->nir);11901191/* We stored the number of subpass color attachments in nr_color_regions1192* when calculating the key for caching. Now that we've computed the bind1193* map, we can reduce this to the actual max before we go into the back-end1194* compiler.1195*/1196stage->key.wm.nr_color_regions =1197util_last_bit(stage->key.wm.color_outputs_valid);1198}11991200static void1201anv_pipeline_compile_fs(const struct brw_compiler *compiler,1202void *mem_ctx,1203struct anv_device *device,1204struct anv_pipeline_stage *fs_stage,1205struct anv_pipeline_stage *prev_stage)1206{1207/* TODO: we could set this to 0 based on the information in nir_shader, but1208* we need this before we call spirv_to_nir.1209*/1210assert(prev_stage);1211fs_stage->key.wm.input_slots_valid =1212prev_stage->prog_data.vue.vue_map.slots_valid;12131214struct brw_compile_fs_params params = {1215.nir = fs_stage->nir,1216.key = &fs_stage->key.wm,1217.prog_data = &fs_stage->prog_data.wm,12181219.allow_spilling = true,1220.stats = fs_stage->stats,1221.log_data = device,1222};12231224fs_stage->code = brw_compile_fs(compiler, mem_ctx, ¶ms);12251226fs_stage->num_stats = (uint32_t)fs_stage->prog_data.wm.dispatch_8 +1227(uint32_t)fs_stage->prog_data.wm.dispatch_16 +1228(uint32_t)fs_stage->prog_data.wm.dispatch_32;12291230if (fs_stage->key.wm.color_outputs_valid == 0 &&1231!fs_stage->prog_data.wm.has_side_effects &&1232!fs_stage->prog_data.wm.uses_omask &&1233!fs_stage->key.wm.alpha_to_coverage &&1234!fs_stage->prog_data.wm.uses_kill &&1235fs_stage->prog_data.wm.computed_depth_mode == BRW_PSCDEPTH_OFF &&1236!fs_stage->prog_data.wm.computed_stencil) {1237/* This fragment shader has no outputs and no side effects. Go ahead1238* and return the code pointer so we don't accidentally think the1239* compile failed but zero out prog_data which will set program_size to1240* zero and disable the stage.1241*/1242memset(&fs_stage->prog_data, 0, sizeof(fs_stage->prog_data));1243}1244}12451246static void1247anv_pipeline_add_executable(struct anv_pipeline *pipeline,1248struct anv_pipeline_stage *stage,1249struct brw_compile_stats *stats,1250uint32_t code_offset)1251{1252char *nir = NULL;1253if (stage->nir &&1254(pipeline->flags &1255VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {1256nir = nir_shader_as_str(stage->nir, pipeline->mem_ctx);1257}12581259char *disasm = NULL;1260if (stage->code &&1261(pipeline->flags &1262VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR)) {1263char *stream_data = NULL;1264size_t stream_size = 0;1265FILE *stream = open_memstream(&stream_data, &stream_size);12661267uint32_t push_size = 0;1268for (unsigned i = 0; i < 4; i++)1269push_size += stage->bind_map.push_ranges[i].length;1270if (push_size > 0) {1271fprintf(stream, "Push constant ranges:\n");1272for (unsigned i = 0; i < 4; i++) {1273if (stage->bind_map.push_ranges[i].length == 0)1274continue;12751276fprintf(stream, " RANGE%d (%dB): ", i,1277stage->bind_map.push_ranges[i].length * 32);12781279switch (stage->bind_map.push_ranges[i].set) {1280case ANV_DESCRIPTOR_SET_NULL:1281fprintf(stream, "NULL");1282break;12831284case ANV_DESCRIPTOR_SET_PUSH_CONSTANTS:1285fprintf(stream, "Vulkan push constants and API params");1286break;12871288case ANV_DESCRIPTOR_SET_DESCRIPTORS:1289fprintf(stream, "Descriptor buffer for set %d (start=%dB)",1290stage->bind_map.push_ranges[i].index,1291stage->bind_map.push_ranges[i].start * 32);1292break;12931294case ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS:1295unreachable("gl_NumWorkgroups is never pushed");12961297case ANV_DESCRIPTOR_SET_SHADER_CONSTANTS:1298fprintf(stream, "Inline shader constant data (start=%dB)",1299stage->bind_map.push_ranges[i].start * 32);1300break;13011302case ANV_DESCRIPTOR_SET_COLOR_ATTACHMENTS:1303unreachable("Color attachments can't be pushed");13041305default:1306fprintf(stream, "UBO (set=%d binding=%d start=%dB)",1307stage->bind_map.push_ranges[i].set,1308stage->bind_map.push_ranges[i].index,1309stage->bind_map.push_ranges[i].start * 32);1310break;1311}1312fprintf(stream, "\n");1313}1314fprintf(stream, "\n");1315}13161317/* Creating this is far cheaper than it looks. It's perfectly fine to1318* do it for every binary.1319*/1320intel_disassemble(&pipeline->device->info,1321stage->code, code_offset, stream);13221323fclose(stream);13241325/* Copy it to a ralloc'd thing */1326disasm = ralloc_size(pipeline->mem_ctx, stream_size + 1);1327memcpy(disasm, stream_data, stream_size);1328disasm[stream_size] = 0;13291330free(stream_data);1331}13321333const struct anv_pipeline_executable exe = {1334.stage = stage->stage,1335.stats = *stats,1336.nir = nir,1337.disasm = disasm,1338};1339util_dynarray_append(&pipeline->executables,1340struct anv_pipeline_executable, exe);1341}13421343static void1344anv_pipeline_add_executables(struct anv_pipeline *pipeline,1345struct anv_pipeline_stage *stage,1346struct anv_shader_bin *bin)1347{1348if (stage->stage == MESA_SHADER_FRAGMENT) {1349/* We pull the prog data and stats out of the anv_shader_bin because1350* the anv_pipeline_stage may not be fully populated if we successfully1351* looked up the shader in a cache.1352*/1353const struct brw_wm_prog_data *wm_prog_data =1354(const struct brw_wm_prog_data *)bin->prog_data;1355struct brw_compile_stats *stats = bin->stats;13561357if (wm_prog_data->dispatch_8) {1358anv_pipeline_add_executable(pipeline, stage, stats++, 0);1359}13601361if (wm_prog_data->dispatch_16) {1362anv_pipeline_add_executable(pipeline, stage, stats++,1363wm_prog_data->prog_offset_16);1364}13651366if (wm_prog_data->dispatch_32) {1367anv_pipeline_add_executable(pipeline, stage, stats++,1368wm_prog_data->prog_offset_32);1369}1370} else {1371anv_pipeline_add_executable(pipeline, stage, bin->stats, 0);1372}1373}13741375static void1376anv_pipeline_init_from_cached_graphics(struct anv_graphics_pipeline *pipeline)1377{1378/* TODO: Cache this pipeline-wide information. */13791380/* Primitive replication depends on information from all the shaders.1381* Recover this bit from the fact that we have more than one position slot1382* in the vertex shader when using it.1383*/1384assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);1385int pos_slots = 0;1386const struct brw_vue_prog_data *vue_prog_data =1387(const void *) pipeline->shaders[MESA_SHADER_VERTEX]->prog_data;1388const struct brw_vue_map *vue_map = &vue_prog_data->vue_map;1389for (int i = 0; i < vue_map->num_slots; i++) {1390if (vue_map->slot_to_varying[i] == VARYING_SLOT_POS)1391pos_slots++;1392}1393pipeline->use_primitive_replication = pos_slots > 1;1394}13951396static VkResult1397anv_pipeline_compile_graphics(struct anv_graphics_pipeline *pipeline,1398struct anv_pipeline_cache *cache,1399const VkGraphicsPipelineCreateInfo *info)1400{1401VkPipelineCreationFeedbackEXT pipeline_feedback = {1402.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,1403};1404int64_t pipeline_start = os_time_get_nano();14051406const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;1407struct anv_pipeline_stage stages[MESA_SHADER_STAGES] = {};14081409pipeline->active_stages = 0;14101411/* Information on which states are considered dynamic. */1412const VkPipelineDynamicStateCreateInfo *dyn_info =1413info->pDynamicState;1414uint32_t dynamic_states = 0;1415if (dyn_info) {1416for (unsigned i = 0; i < dyn_info->dynamicStateCount; i++)1417dynamic_states |=1418anv_cmd_dirty_bit_for_vk_dynamic_state(dyn_info->pDynamicStates[i]);1419}14201421VkResult result;1422for (uint32_t i = 0; i < info->stageCount; i++) {1423const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];1424gl_shader_stage stage = vk_to_mesa_shader_stage(sinfo->stage);14251426pipeline->active_stages |= sinfo->stage;14271428int64_t stage_start = os_time_get_nano();14291430stages[stage].stage = stage;1431stages[stage].module = vk_shader_module_from_handle(sinfo->module);1432stages[stage].entrypoint = sinfo->pName;1433stages[stage].spec_info = sinfo->pSpecializationInfo;1434anv_pipeline_hash_shader(stages[stage].module,1435stages[stage].entrypoint,1436stage,1437stages[stage].spec_info,1438stages[stage].shader_sha1);14391440const struct intel_device_info *devinfo = &pipeline->base.device->info;1441switch (stage) {1442case MESA_SHADER_VERTEX:1443populate_vs_prog_key(devinfo, sinfo->flags,1444pipeline->base.device->robust_buffer_access,1445&stages[stage].key.vs);1446break;1447case MESA_SHADER_TESS_CTRL:1448populate_tcs_prog_key(devinfo, sinfo->flags,1449pipeline->base.device->robust_buffer_access,1450info->pTessellationState->patchControlPoints,1451&stages[stage].key.tcs);1452break;1453case MESA_SHADER_TESS_EVAL:1454populate_tes_prog_key(devinfo, sinfo->flags,1455pipeline->base.device->robust_buffer_access,1456&stages[stage].key.tes);1457break;1458case MESA_SHADER_GEOMETRY:1459populate_gs_prog_key(devinfo, sinfo->flags,1460pipeline->base.device->robust_buffer_access,1461&stages[stage].key.gs);1462break;1463case MESA_SHADER_FRAGMENT: {1464const bool raster_enabled =1465!info->pRasterizationState->rasterizerDiscardEnable ||1466dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE;1467populate_wm_prog_key(pipeline, sinfo->flags,1468pipeline->base.device->robust_buffer_access,1469pipeline->subpass,1470raster_enabled ? info->pMultisampleState : NULL,1471vk_find_struct_const(info->pNext,1472PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR),1473&stages[stage].key.wm);1474break;1475}1476default:1477unreachable("Invalid graphics shader stage");1478}14791480stages[stage].feedback.duration += os_time_get_nano() - stage_start;1481stages[stage].feedback.flags |= VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT;1482}14831484if (pipeline->active_stages & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)1485pipeline->active_stages |= VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;14861487assert(pipeline->active_stages & VK_SHADER_STAGE_VERTEX_BIT);14881489ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);14901491unsigned char sha1[20];1492anv_pipeline_hash_graphics(pipeline, layout, stages, sha1);14931494for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {1495if (!stages[s].entrypoint)1496continue;14971498stages[s].cache_key.stage = s;1499memcpy(stages[s].cache_key.sha1, sha1, sizeof(sha1));1500}15011502const bool skip_cache_lookup =1503(pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);15041505if (!skip_cache_lookup) {1506unsigned found = 0;1507unsigned cache_hits = 0;1508for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {1509if (!stages[s].entrypoint)1510continue;15111512int64_t stage_start = os_time_get_nano();15131514bool cache_hit;1515struct anv_shader_bin *bin =1516anv_device_search_for_kernel(pipeline->base.device, cache,1517&stages[s].cache_key,1518sizeof(stages[s].cache_key), &cache_hit);1519if (bin) {1520found++;1521pipeline->shaders[s] = bin;1522}15231524if (cache_hit) {1525cache_hits++;1526stages[s].feedback.flags |=1527VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;1528}1529stages[s].feedback.duration += os_time_get_nano() - stage_start;1530}15311532if (found == __builtin_popcount(pipeline->active_stages)) {1533if (cache_hits == found) {1534pipeline_feedback.flags |=1535VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;1536}1537/* We found all our shaders in the cache. We're done. */1538for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {1539if (!stages[s].entrypoint)1540continue;15411542anv_pipeline_add_executables(&pipeline->base, &stages[s],1543pipeline->shaders[s]);1544}1545anv_pipeline_init_from_cached_graphics(pipeline);1546goto done;1547} else if (found > 0) {1548/* We found some but not all of our shaders. This shouldn't happen1549* most of the time but it can if we have a partially populated1550* pipeline cache.1551*/1552assert(found < __builtin_popcount(pipeline->active_stages));15531554vk_debug_report(&pipeline->base.device->physical->instance->vk,1555VK_DEBUG_REPORT_WARNING_BIT_EXT |1556VK_DEBUG_REPORT_PERFORMANCE_WARNING_BIT_EXT,1557&cache->base, 0, 0, "anv",1558"Found a partial pipeline in the cache. This is "1559"most likely caused by an incomplete pipeline cache "1560"import or export");15611562/* We're going to have to recompile anyway, so just throw away our1563* references to the shaders in the cache. We'll get them out of the1564* cache again as part of the compilation process.1565*/1566for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {1567stages[s].feedback.flags = 0;1568if (pipeline->shaders[s]) {1569anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);1570pipeline->shaders[s] = NULL;1571}1572}1573}1574}15751576if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT)1577return VK_PIPELINE_COMPILE_REQUIRED_EXT;15781579void *pipeline_ctx = ralloc_context(NULL);15801581for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {1582if (!stages[s].entrypoint)1583continue;15841585int64_t stage_start = os_time_get_nano();15861587assert(stages[s].stage == s);1588assert(pipeline->shaders[s] == NULL);15891590stages[s].bind_map = (struct anv_pipeline_bind_map) {1591.surface_to_descriptor = stages[s].surface_to_descriptor,1592.sampler_to_descriptor = stages[s].sampler_to_descriptor1593};15941595stages[s].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,1596pipeline_ctx,1597&stages[s]);1598if (stages[s].nir == NULL) {1599result = vk_error(VK_ERROR_UNKNOWN);1600goto fail;1601}16021603/* This is rather ugly.1604*1605* Any variable annotated as interpolated by sample essentially disables1606* coarse pixel shading. Unfortunately the CTS tests exercising this set1607* the varying value in the previous stage using a constant. Our NIR1608* infrastructure is clever enough to lookup variables across stages and1609* constant fold, removing the variable. So in order to comply with CTS1610* we have check variables here.1611*/1612if (s == MESA_SHADER_FRAGMENT) {1613nir_foreach_variable_in_list(var, &stages[s].nir->variables) {1614if (var->data.sample) {1615stages[s].key.wm.coarse_pixel = false;1616break;1617}1618}1619}16201621stages[s].feedback.duration += os_time_get_nano() - stage_start;1622}16231624/* Walk backwards to link */1625struct anv_pipeline_stage *next_stage = NULL;1626for (int s = ARRAY_SIZE(pipeline->shaders) - 1; s >= 0; s--) {1627if (!stages[s].entrypoint)1628continue;16291630switch (s) {1631case MESA_SHADER_VERTEX:1632anv_pipeline_link_vs(compiler, &stages[s], next_stage);1633break;1634case MESA_SHADER_TESS_CTRL:1635anv_pipeline_link_tcs(compiler, &stages[s], next_stage);1636break;1637case MESA_SHADER_TESS_EVAL:1638anv_pipeline_link_tes(compiler, &stages[s], next_stage);1639break;1640case MESA_SHADER_GEOMETRY:1641anv_pipeline_link_gs(compiler, &stages[s], next_stage);1642break;1643case MESA_SHADER_FRAGMENT:1644anv_pipeline_link_fs(compiler, &stages[s]);1645break;1646default:1647unreachable("Invalid graphics shader stage");1648}16491650next_stage = &stages[s];1651}16521653if (pipeline->base.device->info.ver >= 12 &&1654pipeline->subpass->view_mask != 0) {1655/* For some pipelines HW Primitive Replication can be used instead of1656* instancing to implement Multiview. This depend on how viewIndex is1657* used in all the active shaders, so this check can't be done per1658* individual shaders.1659*/1660nir_shader *shaders[MESA_SHADER_STAGES] = {};1661for (unsigned s = 0; s < MESA_SHADER_STAGES; s++)1662shaders[s] = stages[s].nir;16631664pipeline->use_primitive_replication =1665anv_check_for_primitive_replication(shaders, pipeline);1666} else {1667pipeline->use_primitive_replication = false;1668}16691670struct anv_pipeline_stage *prev_stage = NULL;1671for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {1672if (!stages[s].entrypoint)1673continue;16741675int64_t stage_start = os_time_get_nano();16761677void *stage_ctx = ralloc_context(NULL);16781679anv_pipeline_lower_nir(&pipeline->base, stage_ctx, &stages[s], layout);16801681if (prev_stage && compiler->glsl_compiler_options[s].NirOptions->unify_interfaces) {1682prev_stage->nir->info.outputs_written |= stages[s].nir->info.inputs_read &1683~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);1684stages[s].nir->info.inputs_read |= prev_stage->nir->info.outputs_written &1685~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);1686prev_stage->nir->info.patch_outputs_written |= stages[s].nir->info.patch_inputs_read;1687stages[s].nir->info.patch_inputs_read |= prev_stage->nir->info.patch_outputs_written;1688}16891690ralloc_free(stage_ctx);16911692stages[s].feedback.duration += os_time_get_nano() - stage_start;16931694prev_stage = &stages[s];1695}16961697prev_stage = NULL;1698for (unsigned s = 0; s < MESA_SHADER_STAGES; s++) {1699if (!stages[s].entrypoint)1700continue;17011702int64_t stage_start = os_time_get_nano();17031704void *stage_ctx = ralloc_context(NULL);17051706nir_xfb_info *xfb_info = NULL;1707if (s == MESA_SHADER_VERTEX ||1708s == MESA_SHADER_TESS_EVAL ||1709s == MESA_SHADER_GEOMETRY)1710xfb_info = nir_gather_xfb_info(stages[s].nir, stage_ctx);17111712switch (s) {1713case MESA_SHADER_VERTEX:1714anv_pipeline_compile_vs(compiler, stage_ctx, pipeline,1715&stages[s]);1716break;1717case MESA_SHADER_TESS_CTRL:1718anv_pipeline_compile_tcs(compiler, stage_ctx, pipeline->base.device,1719&stages[s], prev_stage);1720break;1721case MESA_SHADER_TESS_EVAL:1722anv_pipeline_compile_tes(compiler, stage_ctx, pipeline->base.device,1723&stages[s], prev_stage);1724break;1725case MESA_SHADER_GEOMETRY:1726anv_pipeline_compile_gs(compiler, stage_ctx, pipeline->base.device,1727&stages[s], prev_stage);1728break;1729case MESA_SHADER_FRAGMENT:1730anv_pipeline_compile_fs(compiler, stage_ctx, pipeline->base.device,1731&stages[s], prev_stage);1732break;1733default:1734unreachable("Invalid graphics shader stage");1735}1736if (stages[s].code == NULL) {1737ralloc_free(stage_ctx);1738result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);1739goto fail;1740}17411742anv_nir_validate_push_layout(&stages[s].prog_data.base,1743&stages[s].bind_map);17441745struct anv_shader_bin *bin =1746anv_device_upload_kernel(pipeline->base.device, cache, s,1747&stages[s].cache_key,1748sizeof(stages[s].cache_key),1749stages[s].code,1750stages[s].prog_data.base.program_size,1751&stages[s].prog_data.base,1752brw_prog_data_size(s),1753stages[s].stats, stages[s].num_stats,1754xfb_info, &stages[s].bind_map);1755if (!bin) {1756ralloc_free(stage_ctx);1757result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);1758goto fail;1759}17601761anv_pipeline_add_executables(&pipeline->base, &stages[s], bin);17621763pipeline->shaders[s] = bin;1764ralloc_free(stage_ctx);17651766stages[s].feedback.duration += os_time_get_nano() - stage_start;17671768prev_stage = &stages[s];1769}17701771ralloc_free(pipeline_ctx);17721773done:17741775if (pipeline->shaders[MESA_SHADER_FRAGMENT] &&1776pipeline->shaders[MESA_SHADER_FRAGMENT]->prog_data->program_size == 0) {1777/* This can happen if we decided to implicitly disable the fragment1778* shader. See anv_pipeline_compile_fs().1779*/1780anv_shader_bin_unref(pipeline->base.device,1781pipeline->shaders[MESA_SHADER_FRAGMENT]);1782pipeline->shaders[MESA_SHADER_FRAGMENT] = NULL;1783pipeline->active_stages &= ~VK_SHADER_STAGE_FRAGMENT_BIT;1784}17851786pipeline_feedback.duration = os_time_get_nano() - pipeline_start;17871788const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =1789vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);1790if (create_feedback) {1791*create_feedback->pPipelineCreationFeedback = pipeline_feedback;17921793assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);1794for (uint32_t i = 0; i < info->stageCount; i++) {1795gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);1796create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;1797}1798}17991800return VK_SUCCESS;18011802fail:1803ralloc_free(pipeline_ctx);18041805for (unsigned s = 0; s < ARRAY_SIZE(pipeline->shaders); s++) {1806if (pipeline->shaders[s])1807anv_shader_bin_unref(pipeline->base.device, pipeline->shaders[s]);1808}18091810return result;1811}18121813VkResult1814anv_pipeline_compile_cs(struct anv_compute_pipeline *pipeline,1815struct anv_pipeline_cache *cache,1816const VkComputePipelineCreateInfo *info,1817const struct vk_shader_module *module,1818const char *entrypoint,1819const VkSpecializationInfo *spec_info)1820{1821VkPipelineCreationFeedbackEXT pipeline_feedback = {1822.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,1823};1824int64_t pipeline_start = os_time_get_nano();18251826const struct brw_compiler *compiler = pipeline->base.device->physical->compiler;18271828struct anv_pipeline_stage stage = {1829.stage = MESA_SHADER_COMPUTE,1830.module = module,1831.entrypoint = entrypoint,1832.spec_info = spec_info,1833.cache_key = {1834.stage = MESA_SHADER_COMPUTE,1835},1836.feedback = {1837.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,1838},1839};1840anv_pipeline_hash_shader(stage.module,1841stage.entrypoint,1842MESA_SHADER_COMPUTE,1843stage.spec_info,1844stage.shader_sha1);18451846struct anv_shader_bin *bin = NULL;18471848const VkPipelineShaderStageRequiredSubgroupSizeCreateInfoEXT *rss_info =1849vk_find_struct_const(info->stage.pNext,1850PIPELINE_SHADER_STAGE_REQUIRED_SUBGROUP_SIZE_CREATE_INFO_EXT);18511852populate_cs_prog_key(&pipeline->base.device->info, info->stage.flags,1853pipeline->base.device->robust_buffer_access,1854rss_info, &stage.key.cs);18551856ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);18571858const bool skip_cache_lookup =1859(pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);18601861anv_pipeline_hash_compute(pipeline, layout, &stage, stage.cache_key.sha1);18621863bool cache_hit = false;1864if (!skip_cache_lookup) {1865bin = anv_device_search_for_kernel(pipeline->base.device, cache,1866&stage.cache_key,1867sizeof(stage.cache_key),1868&cache_hit);1869}18701871if (bin == NULL &&1872(info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT))1873return VK_PIPELINE_COMPILE_REQUIRED_EXT;18741875void *mem_ctx = ralloc_context(NULL);1876if (bin == NULL) {1877int64_t stage_start = os_time_get_nano();18781879stage.bind_map = (struct anv_pipeline_bind_map) {1880.surface_to_descriptor = stage.surface_to_descriptor,1881.sampler_to_descriptor = stage.sampler_to_descriptor1882};18831884/* Set up a binding for the gl_NumWorkGroups */1885stage.bind_map.surface_count = 1;1886stage.bind_map.surface_to_descriptor[0] = (struct anv_pipeline_binding) {1887.set = ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS,1888};18891890stage.nir = anv_pipeline_stage_get_nir(&pipeline->base, cache, mem_ctx, &stage);1891if (stage.nir == NULL) {1892ralloc_free(mem_ctx);1893return vk_error(VK_ERROR_UNKNOWN);1894}18951896NIR_PASS_V(stage.nir, anv_nir_add_base_work_group_id);18971898anv_pipeline_lower_nir(&pipeline->base, mem_ctx, &stage, layout);18991900NIR_PASS_V(stage.nir, brw_nir_lower_cs_intrinsics);19011902stage.num_stats = 1;19031904struct brw_compile_cs_params params = {1905.nir = stage.nir,1906.key = &stage.key.cs,1907.prog_data = &stage.prog_data.cs,1908.stats = stage.stats,1909.log_data = pipeline->base.device,1910};19111912stage.code = brw_compile_cs(compiler, mem_ctx, ¶ms);1913if (stage.code == NULL) {1914ralloc_free(mem_ctx);1915return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);1916}19171918anv_nir_validate_push_layout(&stage.prog_data.base, &stage.bind_map);19191920if (!stage.prog_data.cs.uses_num_work_groups) {1921assert(stage.bind_map.surface_to_descriptor[0].set ==1922ANV_DESCRIPTOR_SET_NUM_WORK_GROUPS);1923stage.bind_map.surface_to_descriptor[0].set = ANV_DESCRIPTOR_SET_NULL;1924}19251926const unsigned code_size = stage.prog_data.base.program_size;1927bin = anv_device_upload_kernel(pipeline->base.device, cache,1928MESA_SHADER_COMPUTE,1929&stage.cache_key, sizeof(stage.cache_key),1930stage.code, code_size,1931&stage.prog_data.base,1932sizeof(stage.prog_data.cs),1933stage.stats, stage.num_stats,1934NULL, &stage.bind_map);1935if (!bin) {1936ralloc_free(mem_ctx);1937return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);1938}19391940stage.feedback.duration = os_time_get_nano() - stage_start;1941}19421943anv_pipeline_add_executables(&pipeline->base, &stage, bin);19441945ralloc_free(mem_ctx);19461947if (cache_hit) {1948stage.feedback.flags |=1949VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;1950pipeline_feedback.flags |=1951VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;1952}1953pipeline_feedback.duration = os_time_get_nano() - pipeline_start;19541955const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =1956vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);1957if (create_feedback) {1958*create_feedback->pPipelineCreationFeedback = pipeline_feedback;19591960assert(create_feedback->pipelineStageCreationFeedbackCount == 1);1961create_feedback->pPipelineStageCreationFeedbacks[0] = stage.feedback;1962}19631964pipeline->cs = bin;19651966return VK_SUCCESS;1967}19681969/**1970* Copy pipeline state not marked as dynamic.1971* Dynamic state is pipeline state which hasn't been provided at pipeline1972* creation time, but is dynamically provided afterwards using various1973* vkCmdSet* functions.1974*1975* The set of state considered "non_dynamic" is determined by the pieces of1976* state that have their corresponding VkDynamicState enums omitted from1977* VkPipelineDynamicStateCreateInfo::pDynamicStates.1978*1979* @param[out] pipeline Destination non_dynamic state.1980* @param[in] pCreateInfo Source of non_dynamic state to be copied.1981*/1982static void1983copy_non_dynamic_state(struct anv_graphics_pipeline *pipeline,1984const VkGraphicsPipelineCreateInfo *pCreateInfo)1985{1986anv_cmd_dirty_mask_t states = ANV_CMD_DIRTY_DYNAMIC_ALL;1987struct anv_subpass *subpass = pipeline->subpass;19881989pipeline->dynamic_state = default_dynamic_state;19901991states &= ~pipeline->dynamic_states;19921993struct anv_dynamic_state *dynamic = &pipeline->dynamic_state;19941995bool raster_discard =1996pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&1997!(pipeline->dynamic_states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);19981999/* Section 9.2 of the Vulkan 1.0.15 spec says:2000*2001* pViewportState is [...] NULL if the pipeline2002* has rasterization disabled.2003*/2004if (!raster_discard) {2005assert(pCreateInfo->pViewportState);20062007dynamic->viewport.count = pCreateInfo->pViewportState->viewportCount;2008if (states & ANV_CMD_DIRTY_DYNAMIC_VIEWPORT) {2009typed_memcpy(dynamic->viewport.viewports,2010pCreateInfo->pViewportState->pViewports,2011pCreateInfo->pViewportState->viewportCount);2012}20132014dynamic->scissor.count = pCreateInfo->pViewportState->scissorCount;2015if (states & ANV_CMD_DIRTY_DYNAMIC_SCISSOR) {2016typed_memcpy(dynamic->scissor.scissors,2017pCreateInfo->pViewportState->pScissors,2018pCreateInfo->pViewportState->scissorCount);2019}2020}20212022if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_WIDTH) {2023assert(pCreateInfo->pRasterizationState);2024dynamic->line_width = pCreateInfo->pRasterizationState->lineWidth;2025}20262027if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS) {2028assert(pCreateInfo->pRasterizationState);2029dynamic->depth_bias.bias =2030pCreateInfo->pRasterizationState->depthBiasConstantFactor;2031dynamic->depth_bias.clamp =2032pCreateInfo->pRasterizationState->depthBiasClamp;2033dynamic->depth_bias.slope =2034pCreateInfo->pRasterizationState->depthBiasSlopeFactor;2035}20362037if (states & ANV_CMD_DIRTY_DYNAMIC_CULL_MODE) {2038assert(pCreateInfo->pRasterizationState);2039dynamic->cull_mode =2040pCreateInfo->pRasterizationState->cullMode;2041}20422043if (states & ANV_CMD_DIRTY_DYNAMIC_FRONT_FACE) {2044assert(pCreateInfo->pRasterizationState);2045dynamic->front_face =2046pCreateInfo->pRasterizationState->frontFace;2047}20482049if (states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY) {2050assert(pCreateInfo->pInputAssemblyState);2051dynamic->primitive_topology = pCreateInfo->pInputAssemblyState->topology;2052}20532054if (states & ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE) {2055assert(pCreateInfo->pRasterizationState);2056dynamic->raster_discard =2057pCreateInfo->pRasterizationState->rasterizerDiscardEnable;2058}20592060if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BIAS_ENABLE) {2061assert(pCreateInfo->pRasterizationState);2062dynamic->depth_bias_enable =2063pCreateInfo->pRasterizationState->depthBiasEnable;2064}20652066if (states & ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_RESTART_ENABLE) {2067assert(pCreateInfo->pInputAssemblyState);2068dynamic->primitive_restart_enable =2069pCreateInfo->pInputAssemblyState->primitiveRestartEnable;2070}20712072/* Section 9.2 of the Vulkan 1.0.15 spec says:2073*2074* pColorBlendState is [...] NULL if the pipeline has rasterization2075* disabled or if the subpass of the render pass the pipeline is2076* created against does not use any color attachments.2077*/2078bool uses_color_att = false;2079for (unsigned i = 0; i < subpass->color_count; ++i) {2080if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED) {2081uses_color_att = true;2082break;2083}2084}20852086if (uses_color_att && !raster_discard) {2087assert(pCreateInfo->pColorBlendState);20882089if (states & ANV_CMD_DIRTY_DYNAMIC_BLEND_CONSTANTS)2090typed_memcpy(dynamic->blend_constants,2091pCreateInfo->pColorBlendState->blendConstants, 4);2092}20932094/* If there is no depthstencil attachment, then don't read2095* pDepthStencilState. The Vulkan spec states that pDepthStencilState may2096* be NULL in this case. Even if pDepthStencilState is non-NULL, there is2097* no need to override the depthstencil defaults in2098* anv_pipeline::dynamic_state when there is no depthstencil attachment.2099*2100* Section 9.2 of the Vulkan 1.0.15 spec says:2101*2102* pDepthStencilState is [...] NULL if the pipeline has rasterization2103* disabled or if the subpass of the render pass the pipeline is created2104* against does not use a depth/stencil attachment.2105*/2106if (!raster_discard && subpass->depth_stencil_attachment) {2107assert(pCreateInfo->pDepthStencilState);21082109if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS) {2110dynamic->depth_bounds.min =2111pCreateInfo->pDepthStencilState->minDepthBounds;2112dynamic->depth_bounds.max =2113pCreateInfo->pDepthStencilState->maxDepthBounds;2114}21152116if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_COMPARE_MASK) {2117dynamic->stencil_compare_mask.front =2118pCreateInfo->pDepthStencilState->front.compareMask;2119dynamic->stencil_compare_mask.back =2120pCreateInfo->pDepthStencilState->back.compareMask;2121}21222123if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_WRITE_MASK) {2124dynamic->stencil_write_mask.front =2125pCreateInfo->pDepthStencilState->front.writeMask;2126dynamic->stencil_write_mask.back =2127pCreateInfo->pDepthStencilState->back.writeMask;2128}21292130if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_REFERENCE) {2131dynamic->stencil_reference.front =2132pCreateInfo->pDepthStencilState->front.reference;2133dynamic->stencil_reference.back =2134pCreateInfo->pDepthStencilState->back.reference;2135}21362137if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_TEST_ENABLE) {2138dynamic->depth_test_enable =2139pCreateInfo->pDepthStencilState->depthTestEnable;2140}21412142if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_WRITE_ENABLE) {2143dynamic->depth_write_enable =2144pCreateInfo->pDepthStencilState->depthWriteEnable;2145}21462147if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_COMPARE_OP) {2148dynamic->depth_compare_op =2149pCreateInfo->pDepthStencilState->depthCompareOp;2150}21512152if (states & ANV_CMD_DIRTY_DYNAMIC_DEPTH_BOUNDS_TEST_ENABLE) {2153dynamic->depth_bounds_test_enable =2154pCreateInfo->pDepthStencilState->depthBoundsTestEnable;2155}21562157if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_TEST_ENABLE) {2158dynamic->stencil_test_enable =2159pCreateInfo->pDepthStencilState->stencilTestEnable;2160}21612162if (states & ANV_CMD_DIRTY_DYNAMIC_STENCIL_OP) {2163const VkPipelineDepthStencilStateCreateInfo *info =2164pCreateInfo->pDepthStencilState;2165memcpy(&dynamic->stencil_op.front, &info->front,2166sizeof(dynamic->stencil_op.front));2167memcpy(&dynamic->stencil_op.back, &info->back,2168sizeof(dynamic->stencil_op.back));2169}2170}21712172const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =2173vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,2174PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);2175if (line_state) {2176if (states & ANV_CMD_DIRTY_DYNAMIC_LINE_STIPPLE) {2177dynamic->line_stipple.factor = line_state->lineStippleFactor;2178dynamic->line_stipple.pattern = line_state->lineStipplePattern;2179}2180}21812182const VkPipelineMultisampleStateCreateInfo *ms_info =2183pCreateInfo->pRasterizationState->rasterizerDiscardEnable ? NULL :2184pCreateInfo->pMultisampleState;2185if (states & ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS) {2186const VkPipelineSampleLocationsStateCreateInfoEXT *sl_info = ms_info ?2187vk_find_struct_const(ms_info, PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT) : NULL;21882189if (sl_info) {2190dynamic->sample_locations.samples =2191sl_info->sampleLocationsInfo.sampleLocationsCount;2192const VkSampleLocationEXT *positions =2193sl_info->sampleLocationsInfo.pSampleLocations;2194for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {2195dynamic->sample_locations.locations[i].x = positions[i].x;2196dynamic->sample_locations.locations[i].y = positions[i].y;2197}2198}2199}2200/* Ensure we always have valid values for sample_locations. */2201if (pipeline->base.device->vk.enabled_extensions.EXT_sample_locations &&2202dynamic->sample_locations.samples == 0) {2203dynamic->sample_locations.samples =2204ms_info ? ms_info->rasterizationSamples : 1;2205const struct intel_sample_position *positions =2206intel_get_sample_positions(dynamic->sample_locations.samples);2207for (uint32_t i = 0; i < dynamic->sample_locations.samples; i++) {2208dynamic->sample_locations.locations[i].x = positions[i].x;2209dynamic->sample_locations.locations[i].y = positions[i].y;2210}2211}22122213if (states & ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE) {2214if (!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&2215uses_color_att) {2216assert(pCreateInfo->pColorBlendState);2217const VkPipelineColorWriteCreateInfoEXT *color_write_info =2218vk_find_struct_const(pCreateInfo->pColorBlendState->pNext,2219PIPELINE_COLOR_WRITE_CREATE_INFO_EXT);22202221if (color_write_info) {2222dynamic->color_writes = 0;2223for (uint32_t i = 0; i < color_write_info->attachmentCount; i++) {2224dynamic->color_writes |=2225color_write_info->pColorWriteEnables[i] ? (1u << i) : 0;2226}2227}2228}2229}22302231const VkPipelineFragmentShadingRateStateCreateInfoKHR *fsr_state =2232vk_find_struct_const(pCreateInfo->pNext,2233PIPELINE_FRAGMENT_SHADING_RATE_STATE_CREATE_INFO_KHR);2234if (fsr_state) {2235if (states & ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE)2236dynamic->fragment_shading_rate = fsr_state->fragmentSize;2237}22382239pipeline->dynamic_state_mask = states;22402241/* Mark states that can either be dynamic or fully baked into the pipeline.2242*/2243pipeline->static_state_mask = states &2244(ANV_CMD_DIRTY_DYNAMIC_SAMPLE_LOCATIONS |2245ANV_CMD_DIRTY_DYNAMIC_COLOR_BLEND_STATE |2246ANV_CMD_DIRTY_DYNAMIC_SHADING_RATE |2247ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE |2248ANV_CMD_DIRTY_DYNAMIC_LOGIC_OP |2249ANV_CMD_DIRTY_DYNAMIC_PRIMITIVE_TOPOLOGY);2250}22512252static void2253anv_pipeline_validate_create_info(const VkGraphicsPipelineCreateInfo *info)2254{2255#ifdef DEBUG2256struct anv_render_pass *renderpass = NULL;2257struct anv_subpass *subpass = NULL;22582259/* Assert that all required members of VkGraphicsPipelineCreateInfo are2260* present. See the Vulkan 1.0.28 spec, Section 9.2 Graphics Pipelines.2261*/2262assert(info->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);22632264renderpass = anv_render_pass_from_handle(info->renderPass);2265assert(renderpass);22662267assert(info->subpass < renderpass->subpass_count);2268subpass = &renderpass->subpasses[info->subpass];22692270assert(info->stageCount >= 1);2271assert(info->pVertexInputState);2272assert(info->pInputAssemblyState);2273assert(info->pRasterizationState);2274if (!info->pRasterizationState->rasterizerDiscardEnable) {2275assert(info->pViewportState);2276assert(info->pMultisampleState);22772278if (subpass && subpass->depth_stencil_attachment)2279assert(info->pDepthStencilState);22802281if (subpass && subpass->color_count > 0) {2282bool all_color_unused = true;2283for (int i = 0; i < subpass->color_count; i++) {2284if (subpass->color_attachments[i].attachment != VK_ATTACHMENT_UNUSED)2285all_color_unused = false;2286}2287/* pColorBlendState is ignored if the pipeline has rasterization2288* disabled or if the subpass of the render pass the pipeline is2289* created against does not use any color attachments.2290*/2291assert(info->pColorBlendState || all_color_unused);2292}2293}22942295for (uint32_t i = 0; i < info->stageCount; ++i) {2296switch (info->pStages[i].stage) {2297case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:2298case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:2299assert(info->pTessellationState);2300break;2301default:2302break;2303}2304}2305#endif2306}23072308/**2309* Calculate the desired L3 partitioning based on the current state of the2310* pipeline. For now this simply returns the conservative defaults calculated2311* by get_default_l3_weights(), but we could probably do better by gathering2312* more statistics from the pipeline state (e.g. guess of expected URB usage2313* and bound surfaces), or by using feed-back from performance counters.2314*/2315void2316anv_pipeline_setup_l3_config(struct anv_pipeline *pipeline, bool needs_slm)2317{2318const struct intel_device_info *devinfo = &pipeline->device->info;23192320const struct intel_l3_weights w =2321intel_get_default_l3_weights(devinfo, true, needs_slm);23222323pipeline->l3_config = intel_get_l3_config(devinfo, w);2324}23252326static VkLineRasterizationModeEXT2327vk_line_rasterization_mode(const VkPipelineRasterizationLineStateCreateInfoEXT *line_info,2328const VkPipelineMultisampleStateCreateInfo *ms_info)2329{2330VkLineRasterizationModeEXT line_mode =2331line_info ? line_info->lineRasterizationMode :2332VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT;23332334if (line_mode == VK_LINE_RASTERIZATION_MODE_DEFAULT_EXT) {2335if (ms_info && ms_info->rasterizationSamples > 1) {2336return VK_LINE_RASTERIZATION_MODE_RECTANGULAR_EXT;2337} else {2338return VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;2339}2340}23412342return line_mode;2343}23442345VkResult2346anv_graphics_pipeline_init(struct anv_graphics_pipeline *pipeline,2347struct anv_device *device,2348struct anv_pipeline_cache *cache,2349const VkGraphicsPipelineCreateInfo *pCreateInfo,2350const VkAllocationCallbacks *alloc)2351{2352VkResult result;23532354anv_pipeline_validate_create_info(pCreateInfo);23552356result = anv_pipeline_init(&pipeline->base, device,2357ANV_PIPELINE_GRAPHICS, pCreateInfo->flags,2358alloc);2359if (result != VK_SUCCESS)2360return result;23612362anv_batch_set_storage(&pipeline->base.batch, ANV_NULL_ADDRESS,2363pipeline->batch_data, sizeof(pipeline->batch_data));23642365ANV_FROM_HANDLE(anv_render_pass, render_pass, pCreateInfo->renderPass);2366assert(pCreateInfo->subpass < render_pass->subpass_count);2367pipeline->subpass = &render_pass->subpasses[pCreateInfo->subpass];23682369assert(pCreateInfo->pRasterizationState);23702371pipeline->dynamic_states = 0;2372if (pCreateInfo->pDynamicState) {2373/* Remove all of the states that are marked as dynamic */2374uint32_t count = pCreateInfo->pDynamicState->dynamicStateCount;2375for (uint32_t s = 0; s < count; s++) {2376pipeline->dynamic_states |= anv_cmd_dirty_bit_for_vk_dynamic_state(2377pCreateInfo->pDynamicState->pDynamicStates[s]);2378}2379}2380copy_non_dynamic_state(pipeline, pCreateInfo);23812382pipeline->depth_clamp_enable = pCreateInfo->pRasterizationState->depthClampEnable;23832384/* Previously we enabled depth clipping when !depthClampEnable.2385* DepthClipStateCreateInfo now makes depth clipping explicit so if the2386* clipping info is available, use its enable value to determine clipping,2387* otherwise fallback to the previous !depthClampEnable logic.2388*/2389const VkPipelineRasterizationDepthClipStateCreateInfoEXT *clip_info =2390vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,2391PIPELINE_RASTERIZATION_DEPTH_CLIP_STATE_CREATE_INFO_EXT);2392pipeline->depth_clip_enable = clip_info ? clip_info->depthClipEnable : !pipeline->depth_clamp_enable;23932394pipeline->sample_shading_enable =2395!pCreateInfo->pRasterizationState->rasterizerDiscardEnable &&2396pCreateInfo->pMultisampleState &&2397pCreateInfo->pMultisampleState->sampleShadingEnable;23982399/* When we free the pipeline, we detect stages based on the NULL status2400* of various prog_data pointers. Make them NULL by default.2401*/2402memset(pipeline->shaders, 0, sizeof(pipeline->shaders));24032404result = anv_pipeline_compile_graphics(pipeline, cache, pCreateInfo);2405if (result != VK_SUCCESS) {2406anv_pipeline_finish(&pipeline->base, device, alloc);2407return result;2408}24092410assert(pipeline->shaders[MESA_SHADER_VERTEX]);24112412anv_pipeline_setup_l3_config(&pipeline->base, false);24132414const VkPipelineVertexInputStateCreateInfo *vi_info =2415pCreateInfo->pVertexInputState;24162417const uint64_t inputs_read = get_vs_prog_data(pipeline)->inputs_read;24182419pipeline->vb_used = 0;2420for (uint32_t i = 0; i < vi_info->vertexAttributeDescriptionCount; i++) {2421const VkVertexInputAttributeDescription *desc =2422&vi_info->pVertexAttributeDescriptions[i];24232424if (inputs_read & (1ull << (VERT_ATTRIB_GENERIC0 + desc->location)))2425pipeline->vb_used |= 1 << desc->binding;2426}24272428for (uint32_t i = 0; i < vi_info->vertexBindingDescriptionCount; i++) {2429const VkVertexInputBindingDescription *desc =2430&vi_info->pVertexBindingDescriptions[i];24312432pipeline->vb[desc->binding].stride = desc->stride;24332434/* Step rate is programmed per vertex element (attribute), not2435* binding. Set up a map of which bindings step per instance, for2436* reference by vertex element setup. */2437switch (desc->inputRate) {2438default:2439case VK_VERTEX_INPUT_RATE_VERTEX:2440pipeline->vb[desc->binding].instanced = false;2441break;2442case VK_VERTEX_INPUT_RATE_INSTANCE:2443pipeline->vb[desc->binding].instanced = true;2444break;2445}24462447pipeline->vb[desc->binding].instance_divisor = 1;2448}24492450const VkPipelineVertexInputDivisorStateCreateInfoEXT *vi_div_state =2451vk_find_struct_const(vi_info->pNext,2452PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT);2453if (vi_div_state) {2454for (uint32_t i = 0; i < vi_div_state->vertexBindingDivisorCount; i++) {2455const VkVertexInputBindingDivisorDescriptionEXT *desc =2456&vi_div_state->pVertexBindingDivisors[i];24572458pipeline->vb[desc->binding].instance_divisor = desc->divisor;2459}2460}24612462/* Our implementation of VK_KHR_multiview uses instancing to draw the2463* different views. If the client asks for instancing, we need to multiply2464* the instance divisor by the number of views ensure that we repeat the2465* client's per-instance data once for each view.2466*/2467if (pipeline->subpass->view_mask && !pipeline->use_primitive_replication) {2468const uint32_t view_count = anv_subpass_view_count(pipeline->subpass);2469for (uint32_t vb = 0; vb < MAX_VBS; vb++) {2470if (pipeline->vb[vb].instanced)2471pipeline->vb[vb].instance_divisor *= view_count;2472}2473}24742475const VkPipelineInputAssemblyStateCreateInfo *ia_info =2476pCreateInfo->pInputAssemblyState;2477const VkPipelineTessellationStateCreateInfo *tess_info =2478pCreateInfo->pTessellationState;24792480if (anv_pipeline_has_stage(pipeline, MESA_SHADER_TESS_EVAL))2481pipeline->topology = _3DPRIM_PATCHLIST(tess_info->patchControlPoints);2482else2483pipeline->topology = vk_to_intel_primitive_type[ia_info->topology];24842485/* If rasterization is not enabled, ms_info must be ignored. */2486const bool raster_enabled =2487!pCreateInfo->pRasterizationState->rasterizerDiscardEnable ||2488(pipeline->dynamic_states &2489ANV_CMD_DIRTY_DYNAMIC_RASTERIZER_DISCARD_ENABLE);24902491const VkPipelineMultisampleStateCreateInfo *ms_info =2492raster_enabled ? pCreateInfo->pMultisampleState : NULL;24932494const VkPipelineRasterizationLineStateCreateInfoEXT *line_info =2495vk_find_struct_const(pCreateInfo->pRasterizationState->pNext,2496PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);24972498/* Store line mode, polygon mode and rasterization samples, these are used2499* for dynamic primitive topology.2500*/2501pipeline->line_mode = vk_line_rasterization_mode(line_info, ms_info);2502pipeline->polygon_mode = pCreateInfo->pRasterizationState->polygonMode;2503pipeline->rasterization_samples =2504ms_info ? ms_info->rasterizationSamples : 1;25052506return VK_SUCCESS;2507}25082509static VkResult2510compile_upload_rt_shader(struct anv_ray_tracing_pipeline *pipeline,2511struct anv_pipeline_cache *cache,2512nir_shader *nir,2513struct anv_pipeline_stage *stage,2514struct anv_shader_bin **shader_out,2515void *mem_ctx)2516{2517const struct brw_compiler *compiler =2518pipeline->base.device->physical->compiler;2519const struct intel_device_info *devinfo = compiler->devinfo;25202521nir_shader **resume_shaders = NULL;2522uint32_t num_resume_shaders = 0;2523if (nir->info.stage != MESA_SHADER_COMPUTE) {2524NIR_PASS_V(nir, nir_lower_shader_calls,2525nir_address_format_64bit_global,2526BRW_BTD_STACK_ALIGN,2527&resume_shaders, &num_resume_shaders, mem_ctx);2528NIR_PASS_V(nir, brw_nir_lower_shader_calls);2529NIR_PASS_V(nir, brw_nir_lower_rt_intrinsics, devinfo);2530}25312532for (unsigned i = 0; i < num_resume_shaders; i++) {2533NIR_PASS_V(resume_shaders[i], brw_nir_lower_shader_calls);2534NIR_PASS_V(resume_shaders[i], brw_nir_lower_rt_intrinsics, devinfo);2535}25362537stage->code =2538brw_compile_bs(compiler, pipeline->base.device, mem_ctx,2539&stage->key.bs, &stage->prog_data.bs, nir,2540num_resume_shaders, resume_shaders, stage->stats, NULL);2541if (stage->code == NULL)2542return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);25432544/* Ray-tracing shaders don't have a "real" bind map */2545struct anv_pipeline_bind_map empty_bind_map = {};25462547const unsigned code_size = stage->prog_data.base.program_size;2548struct anv_shader_bin *bin =2549anv_device_upload_kernel(pipeline->base.device,2550cache,2551stage->stage,2552&stage->cache_key, sizeof(stage->cache_key),2553stage->code, code_size,2554&stage->prog_data.base,2555sizeof(stage->prog_data.bs),2556stage->stats, 1,2557NULL, &empty_bind_map);2558if (bin == NULL)2559return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);25602561/* TODO: Figure out executables for resume shaders */2562anv_pipeline_add_executables(&pipeline->base, stage, bin);2563util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, bin);25642565*shader_out = bin;25662567return VK_SUCCESS;2568}25692570static bool2571is_rt_stack_size_dynamic(const VkRayTracingPipelineCreateInfoKHR *info)2572{2573if (info->pDynamicState == NULL)2574return false;25752576for (unsigned i = 0; i < info->pDynamicState->dynamicStateCount; i++) {2577if (info->pDynamicState->pDynamicStates[i] ==2578VK_DYNAMIC_STATE_RAY_TRACING_PIPELINE_STACK_SIZE_KHR)2579return true;2580}25812582return false;2583}25842585static void2586anv_pipeline_compute_ray_tracing_stacks(struct anv_ray_tracing_pipeline *pipeline,2587const VkRayTracingPipelineCreateInfoKHR *info,2588uint32_t *stack_max)2589{2590if (is_rt_stack_size_dynamic(info)) {2591pipeline->stack_size = 0; /* 0 means dynamic */2592} else {2593/* From the Vulkan spec:2594*2595* "If the stack size is not set explicitly, the stack size for a2596* pipeline is:2597*2598* rayGenStackMax +2599* min(1, maxPipelineRayRecursionDepth) ×2600* max(closestHitStackMax, missStackMax,2601* intersectionStackMax + anyHitStackMax) +2602* max(0, maxPipelineRayRecursionDepth-1) ×2603* max(closestHitStackMax, missStackMax) +2604* 2 × callableStackMax"2605*/2606pipeline->stack_size =2607stack_max[MESA_SHADER_RAYGEN] +2608MIN2(1, info->maxPipelineRayRecursionDepth) *2609MAX4(stack_max[MESA_SHADER_CLOSEST_HIT],2610stack_max[MESA_SHADER_MISS],2611stack_max[MESA_SHADER_INTERSECTION],2612stack_max[MESA_SHADER_ANY_HIT]) +2613MAX2(0, (int)info->maxPipelineRayRecursionDepth - 1) *2614MAX2(stack_max[MESA_SHADER_CLOSEST_HIT],2615stack_max[MESA_SHADER_MISS]) +26162 * stack_max[MESA_SHADER_CALLABLE];26172618/* This is an extremely unlikely case but we need to set it to some2619* non-zero value so that we don't accidentally think it's dynamic.2620* Our minimum stack size is 2KB anyway so we could set to any small2621* value we like.2622*/2623if (pipeline->stack_size == 0)2624pipeline->stack_size = 1;2625}2626}26272628static struct anv_pipeline_stage *2629anv_pipeline_init_ray_tracing_stages(struct anv_ray_tracing_pipeline *pipeline,2630const VkRayTracingPipelineCreateInfoKHR *info,2631void *pipeline_ctx)2632{2633ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);26342635/* Create enough stage entries for all shader modules plus potential2636* combinaisons in the groups.2637*/2638struct anv_pipeline_stage *stages =2639rzalloc_array(pipeline_ctx, struct anv_pipeline_stage, info->stageCount);26402641for (uint32_t i = 0; i < info->stageCount; i++) {2642const VkPipelineShaderStageCreateInfo *sinfo = &info->pStages[i];2643if (sinfo->module == VK_NULL_HANDLE)2644continue;26452646int64_t stage_start = os_time_get_nano();26472648stages[i] = (struct anv_pipeline_stage) {2649.stage = vk_to_mesa_shader_stage(sinfo->stage),2650.module = vk_shader_module_from_handle(sinfo->module),2651.entrypoint = sinfo->pName,2652.spec_info = sinfo->pSpecializationInfo,2653.cache_key = {2654.stage = vk_to_mesa_shader_stage(sinfo->stage),2655},2656.feedback = {2657.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,2658},2659};26602661populate_bs_prog_key(&pipeline->base.device->info, sinfo->flags,2662pipeline->base.device->robust_buffer_access,2663&stages[i].key.bs);26642665anv_pipeline_hash_shader(stages[i].module,2666stages[i].entrypoint,2667stages[i].stage,2668stages[i].spec_info,2669stages[i].shader_sha1);26702671if (stages[i].stage != MESA_SHADER_INTERSECTION) {2672anv_pipeline_hash_ray_tracing_shader(pipeline, layout, &stages[i],2673stages[i].cache_key.sha1);2674}26752676stages[i].feedback.duration += os_time_get_nano() - stage_start;2677}26782679for (uint32_t i = 0; i < info->groupCount; i++) {2680const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];26812682if (ginfo->type != VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR)2683continue;26842685int64_t stage_start = os_time_get_nano();26862687uint32_t intersection_idx = ginfo->intersectionShader;2688assert(intersection_idx < info->stageCount);26892690uint32_t any_hit_idx = ginfo->anyHitShader;2691if (any_hit_idx != VK_SHADER_UNUSED_KHR) {2692assert(any_hit_idx < info->stageCount);2693anv_pipeline_hash_ray_tracing_combined_shader(pipeline,2694layout,2695&stages[intersection_idx],2696&stages[any_hit_idx],2697stages[intersection_idx].cache_key.sha1);2698} else {2699anv_pipeline_hash_ray_tracing_shader(pipeline, layout,2700&stages[intersection_idx],2701stages[intersection_idx].cache_key.sha1);2702}27032704stages[intersection_idx].feedback.duration += os_time_get_nano() - stage_start;2705}27062707return stages;2708}27092710static bool2711anv_pipeline_load_cached_shaders(struct anv_ray_tracing_pipeline *pipeline,2712struct anv_pipeline_cache *cache,2713const VkRayTracingPipelineCreateInfoKHR *info,2714struct anv_pipeline_stage *stages,2715uint32_t *stack_max)2716{2717uint32_t shaders = 0, cache_hits = 0;2718for (uint32_t i = 0; i < info->stageCount; i++) {2719if (stages[i].entrypoint == NULL)2720continue;27212722shaders++;27232724int64_t stage_start = os_time_get_nano();27252726bool cache_hit;2727stages[i].bin = anv_device_search_for_kernel(pipeline->base.device, cache,2728&stages[i].cache_key,2729sizeof(stages[i].cache_key),2730&cache_hit);2731if (cache_hit) {2732cache_hits++;2733stages[i].feedback.flags |=2734VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;2735}27362737if (stages[i].bin != NULL) {2738anv_pipeline_add_executables(&pipeline->base, &stages[i], stages[i].bin);2739util_dynarray_append(&pipeline->shaders, struct anv_shader_bin *, stages[i].bin);27402741uint32_t stack_size =2742brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;2743stack_max[stages[i].stage] =2744MAX2(stack_max[stages[i].stage], stack_size);2745}27462747stages[i].feedback.duration += os_time_get_nano() - stage_start;2748}27492750return cache_hits == shaders;2751}27522753static VkResult2754anv_pipeline_compile_ray_tracing(struct anv_ray_tracing_pipeline *pipeline,2755struct anv_pipeline_cache *cache,2756const VkRayTracingPipelineCreateInfoKHR *info)2757{2758const struct intel_device_info *devinfo = &pipeline->base.device->info;2759VkResult result;27602761VkPipelineCreationFeedbackEXT pipeline_feedback = {2762.flags = VK_PIPELINE_CREATION_FEEDBACK_VALID_BIT_EXT,2763};2764int64_t pipeline_start = os_time_get_nano();27652766void *pipeline_ctx = ralloc_context(NULL);27672768struct anv_pipeline_stage *stages =2769anv_pipeline_init_ray_tracing_stages(pipeline, info, pipeline_ctx);27702771ANV_FROM_HANDLE(anv_pipeline_layout, layout, info->layout);27722773const bool skip_cache_lookup =2774(pipeline->base.flags & VK_PIPELINE_CREATE_CAPTURE_INTERNAL_REPRESENTATIONS_BIT_KHR);27752776uint32_t stack_max[MESA_VULKAN_SHADER_STAGES] = {};27772778if (!skip_cache_lookup &&2779anv_pipeline_load_cached_shaders(pipeline, cache, info, stages, stack_max)) {2780pipeline_feedback.flags |=2781VK_PIPELINE_CREATION_FEEDBACK_APPLICATION_PIPELINE_CACHE_HIT_BIT_EXT;2782goto done;2783}27842785if (info->flags & VK_PIPELINE_CREATE_FAIL_ON_PIPELINE_COMPILE_REQUIRED_BIT_EXT) {2786ralloc_free(pipeline_ctx);2787return VK_PIPELINE_COMPILE_REQUIRED_EXT;2788}27892790for (uint32_t i = 0; i < info->stageCount; i++) {2791if (stages[i].entrypoint == NULL)2792continue;27932794int64_t stage_start = os_time_get_nano();27952796stages[i].nir = anv_pipeline_stage_get_nir(&pipeline->base, cache,2797pipeline_ctx, &stages[i]);2798if (stages[i].nir == NULL) {2799ralloc_free(pipeline_ctx);2800return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);2801}28022803anv_pipeline_lower_nir(&pipeline->base, pipeline_ctx, &stages[i], layout);28042805stages[i].feedback.duration += os_time_get_nano() - stage_start;2806}28072808for (uint32_t i = 0; i < info->stageCount; i++) {2809if (stages[i].entrypoint == NULL)2810continue;28112812/* Shader found in cache already. */2813if (stages[i].bin != NULL)2814continue;28152816/* We handle intersection shaders as part of the group */2817if (stages[i].stage == MESA_SHADER_INTERSECTION)2818continue;28192820int64_t stage_start = os_time_get_nano();28212822void *stage_ctx = ralloc_context(pipeline_ctx);28232824nir_shader *nir = nir_shader_clone(stage_ctx, stages[i].nir);2825switch (stages[i].stage) {2826case MESA_SHADER_RAYGEN:2827brw_nir_lower_raygen(nir);2828break;28292830case MESA_SHADER_ANY_HIT:2831brw_nir_lower_any_hit(nir, devinfo);2832break;28332834case MESA_SHADER_CLOSEST_HIT:2835brw_nir_lower_closest_hit(nir);2836break;28372838case MESA_SHADER_MISS:2839brw_nir_lower_miss(nir);2840break;28412842case MESA_SHADER_INTERSECTION:2843unreachable("These are handled later");28442845case MESA_SHADER_CALLABLE:2846brw_nir_lower_callable(nir);2847break;28482849default:2850unreachable("Invalid ray-tracing shader stage");2851}28522853result = compile_upload_rt_shader(pipeline, cache, nir, &stages[i],2854&stages[i].bin, stage_ctx);2855if (result != VK_SUCCESS) {2856ralloc_free(pipeline_ctx);2857return result;2858}28592860uint32_t stack_size =2861brw_bs_prog_data_const(stages[i].bin->prog_data)->max_stack_size;2862stack_max[stages[i].stage] = MAX2(stack_max[stages[i].stage], stack_size);28632864ralloc_free(stage_ctx);28652866stages[i].feedback.duration += os_time_get_nano() - stage_start;2867}28682869for (uint32_t i = 0; i < info->groupCount; i++) {2870const VkRayTracingShaderGroupCreateInfoKHR *ginfo = &info->pGroups[i];2871struct anv_rt_shader_group *group = &pipeline->groups[i];2872group->type = ginfo->type;2873switch (ginfo->type) {2874case VK_RAY_TRACING_SHADER_GROUP_TYPE_GENERAL_KHR:2875assert(ginfo->generalShader < info->stageCount);2876group->general = stages[ginfo->generalShader].bin;2877break;28782879case VK_RAY_TRACING_SHADER_GROUP_TYPE_TRIANGLES_HIT_GROUP_KHR:2880if (ginfo->anyHitShader < info->stageCount)2881group->any_hit = stages[ginfo->anyHitShader].bin;28822883if (ginfo->closestHitShader < info->stageCount)2884group->closest_hit = stages[ginfo->closestHitShader].bin;2885break;28862887case VK_RAY_TRACING_SHADER_GROUP_TYPE_PROCEDURAL_HIT_GROUP_KHR: {2888if (ginfo->closestHitShader < info->stageCount)2889group->closest_hit = stages[ginfo->closestHitShader].bin;28902891uint32_t intersection_idx = info->pGroups[i].intersectionShader;2892assert(intersection_idx < info->stageCount);28932894/* Only compile this stage if not already found in the cache. */2895if (stages[intersection_idx].bin == NULL) {2896/* The any-hit and intersection shader have to be combined */2897uint32_t any_hit_idx = info->pGroups[i].anyHitShader;2898const nir_shader *any_hit = NULL;2899if (any_hit_idx < info->stageCount)2900any_hit = stages[any_hit_idx].nir;29012902void *group_ctx = ralloc_context(pipeline_ctx);2903nir_shader *intersection =2904nir_shader_clone(group_ctx, stages[intersection_idx].nir);29052906brw_nir_lower_combined_intersection_any_hit(intersection, any_hit,2907devinfo);29082909result = compile_upload_rt_shader(pipeline, cache,2910intersection,2911&stages[intersection_idx],2912&group->intersection,2913group_ctx);2914ralloc_free(group_ctx);2915if (result != VK_SUCCESS)2916return result;2917} else {2918group->intersection = stages[intersection_idx].bin;2919}29202921uint32_t stack_size =2922brw_bs_prog_data_const(group->intersection->prog_data)->max_stack_size;2923stack_max[MESA_SHADER_INTERSECTION] =2924MAX2(stack_max[MESA_SHADER_INTERSECTION], stack_size);29252926break;2927}29282929default:2930unreachable("Invalid ray tracing shader group type");2931}2932}29332934done:2935ralloc_free(pipeline_ctx);29362937anv_pipeline_compute_ray_tracing_stacks(pipeline, info, stack_max);29382939pipeline_feedback.duration = os_time_get_nano() - pipeline_start;29402941const VkPipelineCreationFeedbackCreateInfoEXT *create_feedback =2942vk_find_struct_const(info->pNext, PIPELINE_CREATION_FEEDBACK_CREATE_INFO_EXT);2943if (create_feedback) {2944*create_feedback->pPipelineCreationFeedback = pipeline_feedback;29452946assert(info->stageCount == create_feedback->pipelineStageCreationFeedbackCount);2947for (uint32_t i = 0; i < info->stageCount; i++) {2948gl_shader_stage s = vk_to_mesa_shader_stage(info->pStages[i].stage);2949create_feedback->pPipelineStageCreationFeedbacks[i] = stages[s].feedback;2950}2951}29522953return VK_SUCCESS;2954}29552956VkResult2957anv_device_init_rt_shaders(struct anv_device *device)2958{2959if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)2960return VK_SUCCESS;29612962bool cache_hit;29632964struct brw_rt_trampoline {2965char name[16];2966struct brw_cs_prog_key key;2967} trampoline_key = {2968.name = "rt-trampoline",2969.key = {2970/* TODO: Other subgroup sizes? */2971.base.subgroup_size_type = BRW_SUBGROUP_SIZE_REQUIRE_8,2972},2973};2974device->rt_trampoline =2975anv_device_search_for_kernel(device, &device->default_pipeline_cache,2976&trampoline_key, sizeof(trampoline_key),2977&cache_hit);2978if (device->rt_trampoline == NULL) {29792980void *tmp_ctx = ralloc_context(NULL);2981nir_shader *trampoline_nir =2982brw_nir_create_raygen_trampoline(device->physical->compiler, tmp_ctx);29832984struct anv_pipeline_bind_map bind_map = {2985.surface_count = 0,2986.sampler_count = 0,2987};2988uint32_t dummy_params[4] = { 0, };2989struct brw_cs_prog_data trampoline_prog_data = {2990.base.nr_params = 4,2991.base.param = dummy_params,2992.uses_inline_data = true,2993.uses_btd_stack_ids = true,2994};2995struct brw_compile_cs_params params = {2996.nir = trampoline_nir,2997.key = &trampoline_key.key,2998.prog_data = &trampoline_prog_data,2999.log_data = device,3000};3001const unsigned *tramp_data =3002brw_compile_cs(device->physical->compiler, tmp_ctx, ¶ms);30033004device->rt_trampoline =3005anv_device_upload_kernel(device, &device->default_pipeline_cache,3006MESA_SHADER_COMPUTE,3007&trampoline_key, sizeof(trampoline_key),3008tramp_data,3009trampoline_prog_data.base.program_size,3010&trampoline_prog_data.base,3011sizeof(trampoline_prog_data),3012NULL, 0, NULL, &bind_map);30133014ralloc_free(tmp_ctx);30153016if (device->rt_trampoline == NULL)3017return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);3018}30193020struct brw_rt_trivial_return {3021char name[16];3022struct brw_bs_prog_key key;3023} return_key = {3024.name = "rt-trivial-ret",3025};3026device->rt_trivial_return =3027anv_device_search_for_kernel(device, &device->default_pipeline_cache,3028&return_key, sizeof(return_key),3029&cache_hit);3030if (device->rt_trivial_return == NULL) {3031void *tmp_ctx = ralloc_context(NULL);3032nir_shader *trivial_return_nir =3033brw_nir_create_trivial_return_shader(device->physical->compiler, tmp_ctx);30343035NIR_PASS_V(trivial_return_nir, brw_nir_lower_rt_intrinsics, &device->info);30363037struct anv_pipeline_bind_map bind_map = {3038.surface_count = 0,3039.sampler_count = 0,3040};3041struct brw_bs_prog_data return_prog_data = { 0, };3042const unsigned *return_data =3043brw_compile_bs(device->physical->compiler, device, tmp_ctx,3044&return_key.key, &return_prog_data, trivial_return_nir,30450, 0, NULL, NULL);30463047device->rt_trivial_return =3048anv_device_upload_kernel(device, &device->default_pipeline_cache,3049MESA_SHADER_CALLABLE,3050&return_key, sizeof(return_key),3051return_data, return_prog_data.base.program_size,3052&return_prog_data.base, sizeof(return_prog_data),3053NULL, 0, NULL, &bind_map);30543055ralloc_free(tmp_ctx);30563057if (device->rt_trivial_return == NULL) {3058anv_shader_bin_unref(device, device->rt_trampoline);3059return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);3060}3061}30623063return VK_SUCCESS;3064}30653066void3067anv_device_finish_rt_shaders(struct anv_device *device)3068{3069if (!device->vk.enabled_extensions.KHR_ray_tracing_pipeline)3070return;30713072anv_shader_bin_unref(device, device->rt_trampoline);3073}30743075VkResult3076anv_ray_tracing_pipeline_init(struct anv_ray_tracing_pipeline *pipeline,3077struct anv_device *device,3078struct anv_pipeline_cache *cache,3079const VkRayTracingPipelineCreateInfoKHR *pCreateInfo,3080const VkAllocationCallbacks *alloc)3081{3082VkResult result;30833084/* Zero things out so our clean-up works */3085memset(pipeline->groups, 0,3086pipeline->group_count * sizeof(*pipeline->groups));30873088util_dynarray_init(&pipeline->shaders, pipeline->base.mem_ctx);30893090result = anv_pipeline_compile_ray_tracing(pipeline, cache, pCreateInfo);3091if (result != VK_SUCCESS)3092goto fail;30933094anv_pipeline_setup_l3_config(&pipeline->base, /* needs_slm */ false);30953096return VK_SUCCESS;30973098fail:3099util_dynarray_foreach(&pipeline->shaders,3100struct anv_shader_bin *, shader) {3101anv_shader_bin_unref(device, *shader);3102}3103return result;3104}31053106#define WRITE_STR(field, ...) ({ \3107memset(field, 0, sizeof(field)); \3108UNUSED int i = snprintf(field, sizeof(field), __VA_ARGS__); \3109assert(i > 0 && i < sizeof(field)); \3110})31113112VkResult anv_GetPipelineExecutablePropertiesKHR(3113VkDevice device,3114const VkPipelineInfoKHR* pPipelineInfo,3115uint32_t* pExecutableCount,3116VkPipelineExecutablePropertiesKHR* pProperties)3117{3118ANV_FROM_HANDLE(anv_pipeline, pipeline, pPipelineInfo->pipeline);3119VK_OUTARRAY_MAKE(out, pProperties, pExecutableCount);31203121util_dynarray_foreach (&pipeline->executables, struct anv_pipeline_executable, exe) {3122vk_outarray_append(&out, props) {3123gl_shader_stage stage = exe->stage;3124props->stages = mesa_to_vk_shader_stage(stage);31253126unsigned simd_width = exe->stats.dispatch_width;3127if (stage == MESA_SHADER_FRAGMENT) {3128WRITE_STR(props->name, "%s%d %s",3129simd_width ? "SIMD" : "vec",3130simd_width ? simd_width : 4,3131_mesa_shader_stage_to_string(stage));3132} else {3133WRITE_STR(props->name, "%s", _mesa_shader_stage_to_string(stage));3134}3135WRITE_STR(props->description, "%s%d %s shader",3136simd_width ? "SIMD" : "vec",3137simd_width ? simd_width : 4,3138_mesa_shader_stage_to_string(stage));31393140/* The compiler gives us a dispatch width of 0 for vec4 but Vulkan3141* wants a subgroup size of 1.3142*/3143props->subgroupSize = MAX2(simd_width, 1);3144}3145}31463147return vk_outarray_status(&out);3148}31493150static const struct anv_pipeline_executable *3151anv_pipeline_get_executable(struct anv_pipeline *pipeline, uint32_t index)3152{3153assert(index < util_dynarray_num_elements(&pipeline->executables,3154struct anv_pipeline_executable));3155return util_dynarray_element(3156&pipeline->executables, struct anv_pipeline_executable, index);3157}31583159VkResult anv_GetPipelineExecutableStatisticsKHR(3160VkDevice device,3161const VkPipelineExecutableInfoKHR* pExecutableInfo,3162uint32_t* pStatisticCount,3163VkPipelineExecutableStatisticKHR* pStatistics)3164{3165ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);3166VK_OUTARRAY_MAKE(out, pStatistics, pStatisticCount);31673168const struct anv_pipeline_executable *exe =3169anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);31703171const struct brw_stage_prog_data *prog_data;3172switch (pipeline->type) {3173case ANV_PIPELINE_GRAPHICS: {3174prog_data = anv_pipeline_to_graphics(pipeline)->shaders[exe->stage]->prog_data;3175break;3176}3177case ANV_PIPELINE_COMPUTE: {3178prog_data = anv_pipeline_to_compute(pipeline)->cs->prog_data;3179break;3180}3181default:3182unreachable("invalid pipeline type");3183}31843185vk_outarray_append(&out, stat) {3186WRITE_STR(stat->name, "Instruction Count");3187WRITE_STR(stat->description,3188"Number of GEN instructions in the final generated "3189"shader executable.");3190stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;3191stat->value.u64 = exe->stats.instructions;3192}31933194vk_outarray_append(&out, stat) {3195WRITE_STR(stat->name, "SEND Count");3196WRITE_STR(stat->description,3197"Number of instructions in the final generated shader "3198"executable which access external units such as the "3199"constant cache or the sampler.");3200stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;3201stat->value.u64 = exe->stats.sends;3202}32033204vk_outarray_append(&out, stat) {3205WRITE_STR(stat->name, "Loop Count");3206WRITE_STR(stat->description,3207"Number of loops (not unrolled) in the final generated "3208"shader executable.");3209stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;3210stat->value.u64 = exe->stats.loops;3211}32123213vk_outarray_append(&out, stat) {3214WRITE_STR(stat->name, "Cycle Count");3215WRITE_STR(stat->description,3216"Estimate of the number of EU cycles required to execute "3217"the final generated executable. This is an estimate only "3218"and may vary greatly from actual run-time performance.");3219stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;3220stat->value.u64 = exe->stats.cycles;3221}32223223vk_outarray_append(&out, stat) {3224WRITE_STR(stat->name, "Spill Count");3225WRITE_STR(stat->description,3226"Number of scratch spill operations. This gives a rough "3227"estimate of the cost incurred due to spilling temporary "3228"values to memory. If this is non-zero, you may want to "3229"adjust your shader to reduce register pressure.");3230stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;3231stat->value.u64 = exe->stats.spills;3232}32333234vk_outarray_append(&out, stat) {3235WRITE_STR(stat->name, "Fill Count");3236WRITE_STR(stat->description,3237"Number of scratch fill operations. This gives a rough "3238"estimate of the cost incurred due to spilling temporary "3239"values to memory. If this is non-zero, you may want to "3240"adjust your shader to reduce register pressure.");3241stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;3242stat->value.u64 = exe->stats.fills;3243}32443245vk_outarray_append(&out, stat) {3246WRITE_STR(stat->name, "Scratch Memory Size");3247WRITE_STR(stat->description,3248"Number of bytes of scratch memory required by the "3249"generated shader executable. If this is non-zero, you "3250"may want to adjust your shader to reduce register "3251"pressure.");3252stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;3253stat->value.u64 = prog_data->total_scratch;3254}32553256if (gl_shader_stage_uses_workgroup(exe->stage)) {3257vk_outarray_append(&out, stat) {3258WRITE_STR(stat->name, "Workgroup Memory Size");3259WRITE_STR(stat->description,3260"Number of bytes of workgroup shared memory used by this "3261"shader including any padding.");3262stat->format = VK_PIPELINE_EXECUTABLE_STATISTIC_FORMAT_UINT64_KHR;3263stat->value.u64 = prog_data->total_shared;3264}3265}32663267return vk_outarray_status(&out);3268}32693270static bool3271write_ir_text(VkPipelineExecutableInternalRepresentationKHR* ir,3272const char *data)3273{3274ir->isText = VK_TRUE;32753276size_t data_len = strlen(data) + 1;32773278if (ir->pData == NULL) {3279ir->dataSize = data_len;3280return true;3281}32823283strncpy(ir->pData, data, ir->dataSize);3284if (ir->dataSize < data_len)3285return false;32863287ir->dataSize = data_len;3288return true;3289}32903291VkResult anv_GetPipelineExecutableInternalRepresentationsKHR(3292VkDevice device,3293const VkPipelineExecutableInfoKHR* pExecutableInfo,3294uint32_t* pInternalRepresentationCount,3295VkPipelineExecutableInternalRepresentationKHR* pInternalRepresentations)3296{3297ANV_FROM_HANDLE(anv_pipeline, pipeline, pExecutableInfo->pipeline);3298VK_OUTARRAY_MAKE(out, pInternalRepresentations,3299pInternalRepresentationCount);3300bool incomplete_text = false;33013302const struct anv_pipeline_executable *exe =3303anv_pipeline_get_executable(pipeline, pExecutableInfo->executableIndex);33043305if (exe->nir) {3306vk_outarray_append(&out, ir) {3307WRITE_STR(ir->name, "Final NIR");3308WRITE_STR(ir->description,3309"Final NIR before going into the back-end compiler");33103311if (!write_ir_text(ir, exe->nir))3312incomplete_text = true;3313}3314}33153316if (exe->disasm) {3317vk_outarray_append(&out, ir) {3318WRITE_STR(ir->name, "GEN Assembly");3319WRITE_STR(ir->description,3320"Final GEN assembly for the generated shader binary");33213322if (!write_ir_text(ir, exe->disasm))3323incomplete_text = true;3324}3325}33263327return incomplete_text ? VK_INCOMPLETE : vk_outarray_status(&out);3328}33293330VkResult3331anv_GetRayTracingShaderGroupHandlesKHR(3332VkDevice device,3333VkPipeline _pipeline,3334uint32_t firstGroup,3335uint32_t groupCount,3336size_t dataSize,3337void* pData)3338{3339ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);3340if (pipeline->type != ANV_PIPELINE_RAY_TRACING)3341return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);33423343struct anv_ray_tracing_pipeline *rt_pipeline =3344anv_pipeline_to_ray_tracing(pipeline);33453346for (uint32_t i = 0; i < groupCount; i++) {3347struct anv_rt_shader_group *group = &rt_pipeline->groups[firstGroup + i];3348memcpy(pData, group->handle, sizeof(group->handle));3349pData += sizeof(group->handle);3350}33513352return VK_SUCCESS;3353}33543355VkResult3356anv_GetRayTracingCaptureReplayShaderGroupHandlesKHR(3357VkDevice device,3358VkPipeline pipeline,3359uint32_t firstGroup,3360uint32_t groupCount,3361size_t dataSize,3362void* pData)3363{3364unreachable("Unimplemented");3365return vk_error(VK_ERROR_FEATURE_NOT_PRESENT);3366}33673368VkDeviceSize3369anv_GetRayTracingShaderGroupStackSizeKHR(3370VkDevice device,3371VkPipeline _pipeline,3372uint32_t group,3373VkShaderGroupShaderKHR groupShader)3374{3375ANV_FROM_HANDLE(anv_pipeline, pipeline, _pipeline);3376assert(pipeline->type == ANV_PIPELINE_RAY_TRACING);33773378struct anv_ray_tracing_pipeline *rt_pipeline =3379anv_pipeline_to_ray_tracing(pipeline);33803381assert(group < rt_pipeline->group_count);33823383struct anv_shader_bin *bin;3384switch (groupShader) {3385case VK_SHADER_GROUP_SHADER_GENERAL_KHR:3386bin = rt_pipeline->groups[group].general;3387break;33883389case VK_SHADER_GROUP_SHADER_CLOSEST_HIT_KHR:3390bin = rt_pipeline->groups[group].closest_hit;3391break;33923393case VK_SHADER_GROUP_SHADER_ANY_HIT_KHR:3394bin = rt_pipeline->groups[group].any_hit;3395break;33963397case VK_SHADER_GROUP_SHADER_INTERSECTION_KHR:3398bin = rt_pipeline->groups[group].intersection;3399break;34003401default:3402unreachable("Invalid VkShaderGroupShader enum");3403}34043405if (bin == NULL)3406return 0;34073408return brw_bs_prog_data_const(bin->prog_data)->max_stack_size;3409}341034113412