Path: blob/21.2-virgl/src/gallium/frontends/lavapipe/lvp_pipeline.c
4565 views
/*1* Copyright © 2019 Red Hat.2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "lvp_private.h"24#include "vk_util.h"25#include "glsl_types.h"26#include "spirv/nir_spirv.h"27#include "nir/nir_builder.h"28#include "lvp_lower_vulkan_resource.h"29#include "pipe/p_state.h"30#include "pipe/p_context.h"31#include "nir/nir_xfb_info.h"3233#define SPIR_V_MAGIC_NUMBER 0x072302033435#define LVP_PIPELINE_DUP(dst, src, type, count) do { \36type *temp = ralloc_array(mem_ctx, type, count); \37if (!temp) return VK_ERROR_OUT_OF_HOST_MEMORY; \38memcpy(temp, (src), sizeof(type) * count); \39dst = temp; \40} while(0)4142VKAPI_ATTR void VKAPI_CALL lvp_DestroyPipeline(43VkDevice _device,44VkPipeline _pipeline,45const VkAllocationCallbacks* pAllocator)46{47LVP_FROM_HANDLE(lvp_device, device, _device);48LVP_FROM_HANDLE(lvp_pipeline, pipeline, _pipeline);4950if (!_pipeline)51return;5253if (pipeline->shader_cso[PIPE_SHADER_VERTEX])54device->queue.ctx->delete_vs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_VERTEX]);55if (pipeline->shader_cso[PIPE_SHADER_FRAGMENT])56device->queue.ctx->delete_fs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_FRAGMENT]);57if (pipeline->shader_cso[PIPE_SHADER_GEOMETRY])58device->queue.ctx->delete_gs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_GEOMETRY]);59if (pipeline->shader_cso[PIPE_SHADER_TESS_CTRL])60device->queue.ctx->delete_tcs_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_CTRL]);61if (pipeline->shader_cso[PIPE_SHADER_TESS_EVAL])62device->queue.ctx->delete_tes_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_TESS_EVAL]);63if (pipeline->shader_cso[PIPE_SHADER_COMPUTE])64device->queue.ctx->delete_compute_state(device->queue.ctx, pipeline->shader_cso[PIPE_SHADER_COMPUTE]);6566ralloc_free(pipeline->mem_ctx);67vk_object_base_finish(&pipeline->base);68vk_free2(&device->vk.alloc, pAllocator, pipeline);69}7071static VkResult72deep_copy_shader_stage(void *mem_ctx,73struct VkPipelineShaderStageCreateInfo *dst,74const struct VkPipelineShaderStageCreateInfo *src)75{76dst->sType = src->sType;77dst->pNext = NULL;78dst->flags = src->flags;79dst->stage = src->stage;80dst->module = src->module;81dst->pName = src->pName;82dst->pSpecializationInfo = NULL;83if (src->pSpecializationInfo) {84const VkSpecializationInfo *src_spec = src->pSpecializationInfo;85VkSpecializationInfo *dst_spec = ralloc_size(mem_ctx, sizeof(VkSpecializationInfo) +86src_spec->mapEntryCount * sizeof(VkSpecializationMapEntry) +87src_spec->dataSize);88VkSpecializationMapEntry *maps = (VkSpecializationMapEntry *)(dst_spec + 1);89dst_spec->pMapEntries = maps;90void *pdata = (void *)(dst_spec->pMapEntries + src_spec->mapEntryCount);91dst_spec->pData = pdata;929394dst_spec->mapEntryCount = src_spec->mapEntryCount;95dst_spec->dataSize = src_spec->dataSize;96memcpy(pdata, src_spec->pData, src->pSpecializationInfo->dataSize);97memcpy(maps, src_spec->pMapEntries, src_spec->mapEntryCount * sizeof(VkSpecializationMapEntry));98dst->pSpecializationInfo = dst_spec;99}100return VK_SUCCESS;101}102103static VkResult104deep_copy_vertex_input_state(void *mem_ctx,105struct VkPipelineVertexInputStateCreateInfo *dst,106const struct VkPipelineVertexInputStateCreateInfo *src)107{108dst->sType = src->sType;109dst->pNext = NULL;110dst->flags = src->flags;111dst->vertexBindingDescriptionCount = src->vertexBindingDescriptionCount;112113LVP_PIPELINE_DUP(dst->pVertexBindingDescriptions,114src->pVertexBindingDescriptions,115VkVertexInputBindingDescription,116src->vertexBindingDescriptionCount);117118dst->vertexAttributeDescriptionCount = src->vertexAttributeDescriptionCount;119120LVP_PIPELINE_DUP(dst->pVertexAttributeDescriptions,121src->pVertexAttributeDescriptions,122VkVertexInputAttributeDescription,123src->vertexAttributeDescriptionCount);124125if (src->pNext) {126vk_foreach_struct(ext, src->pNext) {127switch (ext->sType) {128case VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_DIVISOR_STATE_CREATE_INFO_EXT: {129VkPipelineVertexInputDivisorStateCreateInfoEXT *ext_src = (VkPipelineVertexInputDivisorStateCreateInfoEXT *)ext;;130VkPipelineVertexInputDivisorStateCreateInfoEXT *ext_dst = ralloc(mem_ctx, VkPipelineVertexInputDivisorStateCreateInfoEXT);131132ext_dst->sType = ext_src->sType;133ext_dst->vertexBindingDivisorCount = ext_src->vertexBindingDivisorCount;134135LVP_PIPELINE_DUP(ext_dst->pVertexBindingDivisors,136ext_src->pVertexBindingDivisors,137VkVertexInputBindingDivisorDescriptionEXT,138ext_src->vertexBindingDivisorCount);139140dst->pNext = ext_dst;141break;142}143default:144break;145}146}147}148return VK_SUCCESS;149}150151static bool152dynamic_state_contains(const VkPipelineDynamicStateCreateInfo *src, VkDynamicState state)153{154if (!src)155return false;156157for (unsigned i = 0; i < src->dynamicStateCount; i++)158if (src->pDynamicStates[i] == state)159return true;160return false;161}162163static VkResult164deep_copy_viewport_state(void *mem_ctx,165const VkPipelineDynamicStateCreateInfo *dyn_state,166VkPipelineViewportStateCreateInfo *dst,167const VkPipelineViewportStateCreateInfo *src)168{169dst->sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO;170dst->pNext = NULL;171dst->pViewports = NULL;172dst->pScissors = NULL;173174if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_VIEWPORT) &&175!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT)) {176LVP_PIPELINE_DUP(dst->pViewports,177src->pViewports,178VkViewport,179src->viewportCount);180}181if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_VIEWPORT_WITH_COUNT_EXT))182dst->viewportCount = src->viewportCount;183else184dst->viewportCount = 0;185186if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_SCISSOR) &&187!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT)) {188if (src->pScissors)189LVP_PIPELINE_DUP(dst->pScissors,190src->pScissors,191VkRect2D,192src->scissorCount);193}194if (!dynamic_state_contains(dyn_state, VK_DYNAMIC_STATE_SCISSOR_WITH_COUNT_EXT))195dst->scissorCount = src->scissorCount;196else197dst->scissorCount = 0;198199return VK_SUCCESS;200}201202static VkResult203deep_copy_color_blend_state(void *mem_ctx,204VkPipelineColorBlendStateCreateInfo *dst,205const VkPipelineColorBlendStateCreateInfo *src)206{207dst->sType = src->sType;208dst->pNext = NULL;209dst->flags = src->flags;210dst->logicOpEnable = src->logicOpEnable;211dst->logicOp = src->logicOp;212213LVP_PIPELINE_DUP(dst->pAttachments,214src->pAttachments,215VkPipelineColorBlendAttachmentState,216src->attachmentCount);217dst->attachmentCount = src->attachmentCount;218219memcpy(&dst->blendConstants, &src->blendConstants, sizeof(float) * 4);220221return VK_SUCCESS;222}223224static VkResult225deep_copy_dynamic_state(void *mem_ctx,226VkPipelineDynamicStateCreateInfo *dst,227const VkPipelineDynamicStateCreateInfo *src)228{229dst->sType = src->sType;230dst->pNext = NULL;231dst->flags = src->flags;232233LVP_PIPELINE_DUP(dst->pDynamicStates,234src->pDynamicStates,235VkDynamicState,236src->dynamicStateCount);237dst->dynamicStateCount = src->dynamicStateCount;238return VK_SUCCESS;239}240241static VkResult242deep_copy_graphics_create_info(void *mem_ctx,243VkGraphicsPipelineCreateInfo *dst,244const VkGraphicsPipelineCreateInfo *src)245{246int i;247VkResult result;248VkPipelineShaderStageCreateInfo *stages;249VkPipelineVertexInputStateCreateInfo *vertex_input;250LVP_FROM_HANDLE(lvp_render_pass, pass, src->renderPass);251252dst->sType = src->sType;253dst->pNext = NULL;254dst->flags = src->flags;255dst->layout = src->layout;256dst->renderPass = src->renderPass;257dst->subpass = src->subpass;258dst->basePipelineHandle = src->basePipelineHandle;259dst->basePipelineIndex = src->basePipelineIndex;260261/* pStages */262VkShaderStageFlags stages_present = 0;263dst->stageCount = src->stageCount;264stages = ralloc_array(mem_ctx, VkPipelineShaderStageCreateInfo, dst->stageCount);265for (i = 0 ; i < dst->stageCount; i++) {266result = deep_copy_shader_stage(mem_ctx, &stages[i], &src->pStages[i]);267if (result != VK_SUCCESS)268return result;269stages_present |= src->pStages[i].stage;270}271dst->pStages = stages;272273/* pVertexInputState */274if (!dynamic_state_contains(src->pDynamicState, VK_DYNAMIC_STATE_VERTEX_INPUT_EXT)) {275vertex_input = ralloc(mem_ctx, VkPipelineVertexInputStateCreateInfo);276result = deep_copy_vertex_input_state(mem_ctx, vertex_input,277src->pVertexInputState);278if (result != VK_SUCCESS)279return result;280dst->pVertexInputState = vertex_input;281} else282dst->pVertexInputState = NULL;283284/* pInputAssemblyState */285LVP_PIPELINE_DUP(dst->pInputAssemblyState,286src->pInputAssemblyState,287VkPipelineInputAssemblyStateCreateInfo,2881);289290/* pTessellationState */291if (src->pTessellationState &&292(stages_present & (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) ==293(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)) {294LVP_PIPELINE_DUP(dst->pTessellationState,295src->pTessellationState,296VkPipelineTessellationStateCreateInfo,2971);298}299300/* pViewportState */301bool rasterization_disabled = !dynamic_state_contains(src->pDynamicState, VK_DYNAMIC_STATE_RASTERIZER_DISCARD_ENABLE_EXT) &&302src->pRasterizationState->rasterizerDiscardEnable;303if (src->pViewportState && !rasterization_disabled) {304VkPipelineViewportStateCreateInfo *viewport_state;305viewport_state = ralloc(mem_ctx, VkPipelineViewportStateCreateInfo);306if (!viewport_state)307return VK_ERROR_OUT_OF_HOST_MEMORY;308deep_copy_viewport_state(mem_ctx, src->pDynamicState,309viewport_state, src->pViewportState);310dst->pViewportState = viewport_state;311} else312dst->pViewportState = NULL;313314/* pRasterizationState */315LVP_PIPELINE_DUP(dst->pRasterizationState,316src->pRasterizationState,317VkPipelineRasterizationStateCreateInfo,3181);319320/* pMultisampleState */321if (src->pMultisampleState && !rasterization_disabled) {322VkPipelineMultisampleStateCreateInfo* ms_state;323ms_state = ralloc_size(mem_ctx, sizeof(VkPipelineMultisampleStateCreateInfo) + sizeof(VkSampleMask));324if (!ms_state)325return VK_ERROR_OUT_OF_HOST_MEMORY;326/* does samplemask need deep copy? */327memcpy(ms_state, src->pMultisampleState, sizeof(VkPipelineMultisampleStateCreateInfo));328if (src->pMultisampleState->pSampleMask) {329VkSampleMask *sample_mask = (VkSampleMask *)(ms_state + 1);330sample_mask[0] = src->pMultisampleState->pSampleMask[0];331ms_state->pSampleMask = sample_mask;332}333dst->pMultisampleState = ms_state;334} else335dst->pMultisampleState = NULL;336337/* pDepthStencilState */338if (src->pDepthStencilState && !rasterization_disabled && pass->has_zs_attachment) {339LVP_PIPELINE_DUP(dst->pDepthStencilState,340src->pDepthStencilState,341VkPipelineDepthStencilStateCreateInfo,3421);343} else344dst->pDepthStencilState = NULL;345346/* pColorBlendState */347if (src->pColorBlendState && !rasterization_disabled && pass->has_color_attachment) {348VkPipelineColorBlendStateCreateInfo* cb_state;349350cb_state = ralloc(mem_ctx, VkPipelineColorBlendStateCreateInfo);351if (!cb_state)352return VK_ERROR_OUT_OF_HOST_MEMORY;353deep_copy_color_blend_state(mem_ctx, cb_state, src->pColorBlendState);354dst->pColorBlendState = cb_state;355} else356dst->pColorBlendState = NULL;357358if (src->pDynamicState) {359VkPipelineDynamicStateCreateInfo* dyn_state;360361/* pDynamicState */362dyn_state = ralloc(mem_ctx, VkPipelineDynamicStateCreateInfo);363if (!dyn_state)364return VK_ERROR_OUT_OF_HOST_MEMORY;365deep_copy_dynamic_state(mem_ctx, dyn_state, src->pDynamicState);366dst->pDynamicState = dyn_state;367} else368dst->pDynamicState = NULL;369370return VK_SUCCESS;371}372373static VkResult374deep_copy_compute_create_info(void *mem_ctx,375VkComputePipelineCreateInfo *dst,376const VkComputePipelineCreateInfo *src)377{378VkResult result;379dst->sType = src->sType;380dst->pNext = NULL;381dst->flags = src->flags;382dst->layout = src->layout;383dst->basePipelineHandle = src->basePipelineHandle;384dst->basePipelineIndex = src->basePipelineIndex;385386result = deep_copy_shader_stage(mem_ctx, &dst->stage, &src->stage);387if (result != VK_SUCCESS)388return result;389return VK_SUCCESS;390}391392static inline unsigned393st_shader_stage_to_ptarget(gl_shader_stage stage)394{395switch (stage) {396case MESA_SHADER_VERTEX:397return PIPE_SHADER_VERTEX;398case MESA_SHADER_FRAGMENT:399return PIPE_SHADER_FRAGMENT;400case MESA_SHADER_GEOMETRY:401return PIPE_SHADER_GEOMETRY;402case MESA_SHADER_TESS_CTRL:403return PIPE_SHADER_TESS_CTRL;404case MESA_SHADER_TESS_EVAL:405return PIPE_SHADER_TESS_EVAL;406case MESA_SHADER_COMPUTE:407return PIPE_SHADER_COMPUTE;408default:409break;410}411412assert(!"should not be reached");413return PIPE_SHADER_VERTEX;414}415416static void417shared_var_info(const struct glsl_type *type, unsigned *size, unsigned *align)418{419assert(glsl_type_is_vector_or_scalar(type));420421uint32_t comp_size = glsl_type_is_boolean(type)422? 4 : glsl_get_bit_size(type) / 8;423unsigned length = glsl_get_vector_elements(type);424*size = comp_size * length,425*align = comp_size;426}427428static void429lvp_shader_compile_to_ir(struct lvp_pipeline *pipeline,430struct vk_shader_module *module,431const char *entrypoint_name,432gl_shader_stage stage,433const VkSpecializationInfo *spec_info)434{435nir_shader *nir;436const nir_shader_compiler_options *drv_options = pipeline->device->pscreen->get_compiler_options(pipeline->device->pscreen, PIPE_SHADER_IR_NIR, st_shader_stage_to_ptarget(stage));437bool progress;438uint32_t *spirv = (uint32_t *) module->data;439assert(spirv[0] == SPIR_V_MAGIC_NUMBER);440assert(module->size % 4 == 0);441442uint32_t num_spec_entries = 0;443struct nir_spirv_specialization *spec_entries = NULL;444if (spec_info && spec_info->mapEntryCount > 0) {445num_spec_entries = spec_info->mapEntryCount;446spec_entries = calloc(num_spec_entries, sizeof(*spec_entries));447for (uint32_t i = 0; i < num_spec_entries; i++) {448VkSpecializationMapEntry entry = spec_info->pMapEntries[i];449const void *data =450(char *)spec_info->pData + entry.offset;451assert((const char *)((char *)data + entry.size) <=452(char *)spec_info->pData + spec_info->dataSize);453454spec_entries[i].id = entry.constantID;455switch (entry.size) {456case 8:457spec_entries[i].value.u64 = *(const uint64_t *)data;458break;459case 4:460spec_entries[i].value.u32 = *(const uint32_t *)data;461break;462case 2:463spec_entries[i].value.u16 = *(const uint16_t *)data;464break;465case 1:466spec_entries[i].value.u8 = *(const uint8_t *)data;467break;468default:469assert(!"Invalid spec constant size");470break;471}472}473}474struct lvp_device *pdevice = pipeline->device;475const struct spirv_to_nir_options spirv_options = {476.environment = NIR_SPIRV_VULKAN,477.caps = {478.float64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_DOUBLES) == 1),479.int16 = true,480.int64 = (pdevice->pscreen->get_param(pdevice->pscreen, PIPE_CAP_INT64) == 1),481.tessellation = true,482.image_ms_array = true,483.image_read_without_format = true,484.image_write_without_format = true,485.storage_image_ms = true,486.geometry_streams = true,487.storage_8bit = true,488.storage_16bit = true,489.variable_pointers = true,490.stencil_export = true,491.post_depth_coverage = true,492.transform_feedback = true,493.device_group = true,494.draw_parameters = true,495.shader_viewport_index_layer = true,496.multiview = true,497.physical_storage_buffer_address = true,498.int64_atomics = true,499.subgroup_arithmetic = true,500.subgroup_basic = true,501.subgroup_ballot = true,502.subgroup_quad = true,503.subgroup_vote = true,504},505.ubo_addr_format = nir_address_format_32bit_index_offset,506.ssbo_addr_format = nir_address_format_32bit_index_offset,507.phys_ssbo_addr_format = nir_address_format_64bit_global,508.push_const_addr_format = nir_address_format_logical,509.shared_addr_format = nir_address_format_32bit_offset,510.frag_coord_is_sysval = false,511};512513nir = spirv_to_nir(spirv, module->size / 4,514spec_entries, num_spec_entries,515stage, entrypoint_name, &spirv_options, drv_options);516517if (!nir) {518free(spec_entries);519return;520}521nir_validate_shader(nir, NULL);522523free(spec_entries);524525NIR_PASS_V(nir, nir_lower_variable_initializers, nir_var_function_temp);526NIR_PASS_V(nir, nir_lower_returns);527NIR_PASS_V(nir, nir_inline_functions);528NIR_PASS_V(nir, nir_copy_prop);529NIR_PASS_V(nir, nir_opt_deref);530531/* Pick off the single entrypoint that we want */532foreach_list_typed_safe(nir_function, func, node, &nir->functions) {533if (!func->is_entrypoint)534exec_node_remove(&func->node);535}536assert(exec_list_length(&nir->functions) == 1);537538NIR_PASS_V(nir, nir_lower_variable_initializers, ~0);539NIR_PASS_V(nir, nir_split_var_copies);540NIR_PASS_V(nir, nir_split_per_member_structs);541542NIR_PASS_V(nir, nir_remove_dead_variables,543nir_var_shader_in | nir_var_shader_out | nir_var_system_value, NULL);544545if (stage == MESA_SHADER_FRAGMENT)546lvp_lower_input_attachments(nir, false);547NIR_PASS_V(nir, nir_lower_system_values);548NIR_PASS_V(nir, nir_lower_compute_system_values, NULL);549550NIR_PASS_V(nir, nir_lower_clip_cull_distance_arrays);551NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_uniform, NULL);552553lvp_lower_pipeline_layout(pipeline->device, pipeline->layout, nir);554555NIR_PASS_V(nir, nir_lower_io_to_temporaries, nir_shader_get_entrypoint(nir), true, true);556NIR_PASS_V(nir, nir_split_var_copies);557NIR_PASS_V(nir, nir_lower_global_vars_to_local);558559NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_push_const,560nir_address_format_32bit_offset);561562NIR_PASS_V(nir, nir_lower_explicit_io,563nir_var_mem_ubo | nir_var_mem_ssbo,564nir_address_format_32bit_index_offset);565566NIR_PASS_V(nir, nir_lower_explicit_io,567nir_var_mem_global,568nir_address_format_64bit_global);569570if (nir->info.stage == MESA_SHADER_COMPUTE) {571NIR_PASS_V(nir, nir_lower_vars_to_explicit_types, nir_var_mem_shared, shared_var_info);572NIR_PASS_V(nir, nir_lower_explicit_io, nir_var_mem_shared, nir_address_format_32bit_offset);573}574575NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_shader_temp, NULL);576577if (nir->info.stage == MESA_SHADER_VERTEX ||578nir->info.stage == MESA_SHADER_GEOMETRY) {579NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, false);580} else if (nir->info.stage == MESA_SHADER_FRAGMENT) {581NIR_PASS_V(nir, nir_lower_io_arrays_to_elements_no_indirects, true);582}583584do {585progress = false;586587NIR_PASS(progress, nir, nir_lower_flrp, 32|64, true);588NIR_PASS(progress, nir, nir_split_array_vars, nir_var_function_temp);589NIR_PASS(progress, nir, nir_shrink_vec_array_vars, nir_var_function_temp);590NIR_PASS(progress, nir, nir_opt_deref);591NIR_PASS(progress, nir, nir_lower_vars_to_ssa);592593NIR_PASS(progress, nir, nir_copy_prop);594NIR_PASS(progress, nir, nir_opt_dce);595NIR_PASS(progress, nir, nir_opt_peephole_select, 8, true, true);596597NIR_PASS(progress, nir, nir_opt_algebraic);598NIR_PASS(progress, nir, nir_opt_constant_folding);599600NIR_PASS(progress, nir, nir_opt_remove_phis);601bool trivial_continues = false;602NIR_PASS(trivial_continues, nir, nir_opt_trivial_continues);603progress |= trivial_continues;604if (trivial_continues) {605/* If nir_opt_trivial_continues makes progress, then we need to clean606* things up if we want any hope of nir_opt_if or nir_opt_loop_unroll607* to make progress.608*/609NIR_PASS(progress, nir, nir_copy_prop);610NIR_PASS(progress, nir, nir_opt_dce);611NIR_PASS(progress, nir, nir_opt_remove_phis);612}613NIR_PASS(progress, nir, nir_opt_if, true);614NIR_PASS(progress, nir, nir_opt_dead_cf);615NIR_PASS(progress, nir, nir_opt_conditional_discard);616NIR_PASS(progress, nir, nir_opt_remove_phis);617NIR_PASS(progress, nir, nir_opt_cse);618NIR_PASS(progress, nir, nir_opt_undef);619620NIR_PASS(progress, nir, nir_opt_deref);621NIR_PASS(progress, nir, nir_lower_alu_to_scalar, NULL, NULL);622} while (progress);623624NIR_PASS_V(nir, nir_lower_var_copies);625NIR_PASS_V(nir, nir_remove_dead_variables, nir_var_function_temp, NULL);626NIR_PASS_V(nir, nir_opt_dce);627nir_sweep(nir);628629nir_shader_gather_info(nir, nir_shader_get_entrypoint(nir));630631if (nir->info.stage != MESA_SHADER_VERTEX)632nir_assign_io_var_locations(nir, nir_var_shader_in, &nir->num_inputs, nir->info.stage);633else {634nir->num_inputs = util_last_bit64(nir->info.inputs_read);635nir_foreach_shader_in_variable(var, nir) {636var->data.driver_location = var->data.location - VERT_ATTRIB_GENERIC0;637}638}639nir_assign_io_var_locations(nir, nir_var_shader_out, &nir->num_outputs,640nir->info.stage);641pipeline->pipeline_nir[stage] = nir;642}643644static void fill_shader_prog(struct pipe_shader_state *state, gl_shader_stage stage, struct lvp_pipeline *pipeline)645{646state->type = PIPE_SHADER_IR_NIR;647state->ir.nir = pipeline->pipeline_nir[stage];648}649650static void651merge_tess_info(struct shader_info *tes_info,652const struct shader_info *tcs_info)653{654/* The Vulkan 1.0.38 spec, section 21.1 Tessellator says:655*656* "PointMode. Controls generation of points rather than triangles657* or lines. This functionality defaults to disabled, and is658* enabled if either shader stage includes the execution mode.659*660* and about Triangles, Quads, IsoLines, VertexOrderCw, VertexOrderCcw,661* PointMode, SpacingEqual, SpacingFractionalEven, SpacingFractionalOdd,662* and OutputVertices, it says:663*664* "One mode must be set in at least one of the tessellation665* shader stages."666*667* So, the fields can be set in either the TCS or TES, but they must668* agree if set in both. Our backend looks at TES, so bitwise-or in669* the values from the TCS.670*/671assert(tcs_info->tess.tcs_vertices_out == 0 ||672tes_info->tess.tcs_vertices_out == 0 ||673tcs_info->tess.tcs_vertices_out == tes_info->tess.tcs_vertices_out);674tes_info->tess.tcs_vertices_out |= tcs_info->tess.tcs_vertices_out;675676assert(tcs_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||677tes_info->tess.spacing == TESS_SPACING_UNSPECIFIED ||678tcs_info->tess.spacing == tes_info->tess.spacing);679tes_info->tess.spacing |= tcs_info->tess.spacing;680681assert(tcs_info->tess.primitive_mode == 0 ||682tes_info->tess.primitive_mode == 0 ||683tcs_info->tess.primitive_mode == tes_info->tess.primitive_mode);684tes_info->tess.primitive_mode |= tcs_info->tess.primitive_mode;685tes_info->tess.ccw |= tcs_info->tess.ccw;686tes_info->tess.point_mode |= tcs_info->tess.point_mode;687}688689static gl_shader_stage690lvp_shader_stage(VkShaderStageFlagBits stage)691{692switch (stage) {693case VK_SHADER_STAGE_VERTEX_BIT:694return MESA_SHADER_VERTEX;695case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:696return MESA_SHADER_TESS_CTRL;697case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:698return MESA_SHADER_TESS_EVAL;699case VK_SHADER_STAGE_GEOMETRY_BIT:700return MESA_SHADER_GEOMETRY;701case VK_SHADER_STAGE_FRAGMENT_BIT:702return MESA_SHADER_FRAGMENT;703case VK_SHADER_STAGE_COMPUTE_BIT:704return MESA_SHADER_COMPUTE;705default:706unreachable("invalid VkShaderStageFlagBits");707return MESA_SHADER_NONE;708}709}710711static VkResult712lvp_pipeline_compile(struct lvp_pipeline *pipeline,713gl_shader_stage stage)714{715struct lvp_device *device = pipeline->device;716device->physical_device->pscreen->finalize_nir(device->physical_device->pscreen, pipeline->pipeline_nir[stage], true);717if (stage == MESA_SHADER_COMPUTE) {718struct pipe_compute_state shstate = {0};719shstate.prog = (void *)pipeline->pipeline_nir[MESA_SHADER_COMPUTE];720shstate.ir_type = PIPE_SHADER_IR_NIR;721shstate.req_local_mem = pipeline->pipeline_nir[MESA_SHADER_COMPUTE]->info.shared_size;722pipeline->shader_cso[PIPE_SHADER_COMPUTE] = device->queue.ctx->create_compute_state(device->queue.ctx, &shstate);723} else {724struct pipe_shader_state shstate = {0};725fill_shader_prog(&shstate, stage, pipeline);726727if (stage == MESA_SHADER_VERTEX ||728stage == MESA_SHADER_GEOMETRY ||729stage == MESA_SHADER_TESS_EVAL) {730nir_xfb_info *xfb_info = nir_gather_xfb_info(pipeline->pipeline_nir[stage], NULL);731if (xfb_info) {732uint8_t output_mapping[VARYING_SLOT_TESS_MAX];733memset(output_mapping, 0, sizeof(output_mapping));734735nir_foreach_shader_out_variable(var, pipeline->pipeline_nir[stage]) {736unsigned slots = var->data.compact ? DIV_ROUND_UP(glsl_get_length(var->type), 4)737: glsl_count_attribute_slots(var->type, false);738for (unsigned i = 0; i < slots; i++)739output_mapping[var->data.location + i] = var->data.driver_location + i;740}741742shstate.stream_output.num_outputs = xfb_info->output_count;743for (unsigned i = 0; i < PIPE_MAX_SO_BUFFERS; i++) {744if (xfb_info->buffers_written & (1 << i)) {745shstate.stream_output.stride[i] = xfb_info->buffers[i].stride / 4;746}747}748for (unsigned i = 0; i < xfb_info->output_count; i++) {749shstate.stream_output.output[i].output_buffer = xfb_info->outputs[i].buffer;750shstate.stream_output.output[i].dst_offset = xfb_info->outputs[i].offset / 4;751shstate.stream_output.output[i].register_index = output_mapping[xfb_info->outputs[i].location];752shstate.stream_output.output[i].num_components = util_bitcount(xfb_info->outputs[i].component_mask);753shstate.stream_output.output[i].start_component = ffs(xfb_info->outputs[i].component_mask) - 1;754shstate.stream_output.output[i].stream = xfb_info->buffer_to_stream[xfb_info->outputs[i].buffer];755}756757ralloc_free(xfb_info);758}759}760761switch (stage) {762case MESA_SHADER_FRAGMENT:763pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);764break;765case MESA_SHADER_VERTEX:766pipeline->shader_cso[PIPE_SHADER_VERTEX] = device->queue.ctx->create_vs_state(device->queue.ctx, &shstate);767break;768case MESA_SHADER_GEOMETRY:769pipeline->shader_cso[PIPE_SHADER_GEOMETRY] = device->queue.ctx->create_gs_state(device->queue.ctx, &shstate);770break;771case MESA_SHADER_TESS_CTRL:772pipeline->shader_cso[PIPE_SHADER_TESS_CTRL] = device->queue.ctx->create_tcs_state(device->queue.ctx, &shstate);773break;774case MESA_SHADER_TESS_EVAL:775pipeline->shader_cso[PIPE_SHADER_TESS_EVAL] = device->queue.ctx->create_tes_state(device->queue.ctx, &shstate);776break;777default:778unreachable("illegal shader");779break;780}781}782return VK_SUCCESS;783}784785static VkResult786lvp_graphics_pipeline_init(struct lvp_pipeline *pipeline,787struct lvp_device *device,788struct lvp_pipeline_cache *cache,789const VkGraphicsPipelineCreateInfo *pCreateInfo,790const VkAllocationCallbacks *alloc)791{792if (alloc == NULL)793alloc = &device->vk.alloc;794pipeline->device = device;795pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);796pipeline->force_min_sample = false;797798pipeline->mem_ctx = ralloc_context(NULL);799/* recreate createinfo */800deep_copy_graphics_create_info(pipeline->mem_ctx, &pipeline->graphics_create_info, pCreateInfo);801pipeline->is_compute_pipeline = false;802803const VkPipelineRasterizationProvokingVertexStateCreateInfoEXT *pv_state =804vk_find_struct_const(pCreateInfo->pRasterizationState,805PIPELINE_RASTERIZATION_PROVOKING_VERTEX_STATE_CREATE_INFO_EXT);806pipeline->provoking_vertex_last = pv_state && pv_state->provokingVertexMode == VK_PROVOKING_VERTEX_MODE_LAST_VERTEX_EXT;807808const VkPipelineRasterizationLineStateCreateInfoEXT *line_state =809vk_find_struct_const(pCreateInfo->pRasterizationState,810PIPELINE_RASTERIZATION_LINE_STATE_CREATE_INFO_EXT);811if (line_state) {812/* always draw bresenham if !smooth */813pipeline->line_stipple_enable = line_state->stippledLineEnable;814pipeline->line_smooth = line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;815pipeline->disable_multisample = line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT ||816line_state->lineRasterizationMode == VK_LINE_RASTERIZATION_MODE_RECTANGULAR_SMOOTH_EXT;817pipeline->line_rectangular = line_state->lineRasterizationMode != VK_LINE_RASTERIZATION_MODE_BRESENHAM_EXT;818if (!dynamic_state_contains(pipeline->graphics_create_info.pDynamicState, VK_DYNAMIC_STATE_LINE_STIPPLE_EXT)) {819pipeline->line_stipple_factor = line_state->lineStippleFactor - 1;820pipeline->line_stipple_pattern = line_state->lineStipplePattern;821}822} else823pipeline->line_rectangular = true;824825826for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {827VK_FROM_HANDLE(vk_shader_module, module,828pCreateInfo->pStages[i].module);829gl_shader_stage stage = lvp_shader_stage(pCreateInfo->pStages[i].stage);830lvp_shader_compile_to_ir(pipeline, module,831pCreateInfo->pStages[i].pName,832stage,833pCreateInfo->pStages[i].pSpecializationInfo);834if (!pipeline->pipeline_nir[stage])835return VK_ERROR_FEATURE_NOT_PRESENT;836}837838if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]) {839if (pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.fs.uses_sample_qualifier ||840BITSET_TEST(pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_ID) ||841BITSET_TEST(pipeline->pipeline_nir[MESA_SHADER_FRAGMENT]->info.system_values_read, SYSTEM_VALUE_SAMPLE_POS))842pipeline->force_min_sample = true;843}844if (pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]) {845nir_lower_patch_vertices(pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL], pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info.tess.tcs_vertices_out, NULL);846merge_tess_info(&pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info, &pipeline->pipeline_nir[MESA_SHADER_TESS_CTRL]->info);847const VkPipelineTessellationDomainOriginStateCreateInfo *domain_origin_state =848vk_find_struct_const(pCreateInfo->pTessellationState,849PIPELINE_TESSELLATION_DOMAIN_ORIGIN_STATE_CREATE_INFO);850if (!domain_origin_state || domain_origin_state->domainOrigin == VK_TESSELLATION_DOMAIN_ORIGIN_UPPER_LEFT)851pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw = !pipeline->pipeline_nir[MESA_SHADER_TESS_EVAL]->info.tess.ccw;852}853854pipeline->gs_output_lines = pipeline->pipeline_nir[MESA_SHADER_GEOMETRY] &&855pipeline->pipeline_nir[MESA_SHADER_GEOMETRY]->info.gs.output_primitive == GL_LINES;856857858bool has_fragment_shader = false;859for (uint32_t i = 0; i < pCreateInfo->stageCount; i++) {860gl_shader_stage stage = lvp_shader_stage(pCreateInfo->pStages[i].stage);861lvp_pipeline_compile(pipeline, stage);862if (stage == MESA_SHADER_FRAGMENT)863has_fragment_shader = true;864}865866if (has_fragment_shader == false) {867/* create a dummy fragment shader for this pipeline. */868nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL,869"dummy_frag");870871pipeline->pipeline_nir[MESA_SHADER_FRAGMENT] = b.shader;872struct pipe_shader_state shstate = {0};873shstate.type = PIPE_SHADER_IR_NIR;874shstate.ir.nir = pipeline->pipeline_nir[MESA_SHADER_FRAGMENT];875pipeline->shader_cso[PIPE_SHADER_FRAGMENT] = device->queue.ctx->create_fs_state(device->queue.ctx, &shstate);876}877return VK_SUCCESS;878}879880static VkResult881lvp_graphics_pipeline_create(882VkDevice _device,883VkPipelineCache _cache,884const VkGraphicsPipelineCreateInfo *pCreateInfo,885const VkAllocationCallbacks *pAllocator,886VkPipeline *pPipeline)887{888LVP_FROM_HANDLE(lvp_device, device, _device);889LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);890struct lvp_pipeline *pipeline;891VkResult result;892893assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO);894895pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,896VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);897if (pipeline == NULL)898return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);899900vk_object_base_init(&device->vk, &pipeline->base,901VK_OBJECT_TYPE_PIPELINE);902result = lvp_graphics_pipeline_init(pipeline, device, cache, pCreateInfo,903pAllocator);904if (result != VK_SUCCESS) {905vk_free2(&device->vk.alloc, pAllocator, pipeline);906return result;907}908909*pPipeline = lvp_pipeline_to_handle(pipeline);910911return VK_SUCCESS;912}913914VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateGraphicsPipelines(915VkDevice _device,916VkPipelineCache pipelineCache,917uint32_t count,918const VkGraphicsPipelineCreateInfo* pCreateInfos,919const VkAllocationCallbacks* pAllocator,920VkPipeline* pPipelines)921{922VkResult result = VK_SUCCESS;923unsigned i = 0;924925for (; i < count; i++) {926VkResult r;927r = lvp_graphics_pipeline_create(_device,928pipelineCache,929&pCreateInfos[i],930pAllocator, &pPipelines[i]);931if (r != VK_SUCCESS) {932result = r;933pPipelines[i] = VK_NULL_HANDLE;934}935}936937return result;938}939940static VkResult941lvp_compute_pipeline_init(struct lvp_pipeline *pipeline,942struct lvp_device *device,943struct lvp_pipeline_cache *cache,944const VkComputePipelineCreateInfo *pCreateInfo,945const VkAllocationCallbacks *alloc)946{947VK_FROM_HANDLE(vk_shader_module, module,948pCreateInfo->stage.module);949if (alloc == NULL)950alloc = &device->vk.alloc;951pipeline->device = device;952pipeline->layout = lvp_pipeline_layout_from_handle(pCreateInfo->layout);953pipeline->force_min_sample = false;954955pipeline->mem_ctx = ralloc_context(NULL);956deep_copy_compute_create_info(pipeline->mem_ctx,957&pipeline->compute_create_info, pCreateInfo);958pipeline->is_compute_pipeline = true;959960lvp_shader_compile_to_ir(pipeline, module,961pCreateInfo->stage.pName,962MESA_SHADER_COMPUTE,963pCreateInfo->stage.pSpecializationInfo);964if (!pipeline->pipeline_nir[MESA_SHADER_COMPUTE])965return VK_ERROR_FEATURE_NOT_PRESENT;966lvp_pipeline_compile(pipeline, MESA_SHADER_COMPUTE);967return VK_SUCCESS;968}969970static VkResult971lvp_compute_pipeline_create(972VkDevice _device,973VkPipelineCache _cache,974const VkComputePipelineCreateInfo *pCreateInfo,975const VkAllocationCallbacks *pAllocator,976VkPipeline *pPipeline)977{978LVP_FROM_HANDLE(lvp_device, device, _device);979LVP_FROM_HANDLE(lvp_pipeline_cache, cache, _cache);980struct lvp_pipeline *pipeline;981VkResult result;982983assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO);984985pipeline = vk_zalloc2(&device->vk.alloc, pAllocator, sizeof(*pipeline), 8,986VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);987if (pipeline == NULL)988return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);989990vk_object_base_init(&device->vk, &pipeline->base,991VK_OBJECT_TYPE_PIPELINE);992result = lvp_compute_pipeline_init(pipeline, device, cache, pCreateInfo,993pAllocator);994if (result != VK_SUCCESS) {995vk_free2(&device->vk.alloc, pAllocator, pipeline);996return result;997}998999*pPipeline = lvp_pipeline_to_handle(pipeline);10001001return VK_SUCCESS;1002}10031004VKAPI_ATTR VkResult VKAPI_CALL lvp_CreateComputePipelines(1005VkDevice _device,1006VkPipelineCache pipelineCache,1007uint32_t count,1008const VkComputePipelineCreateInfo* pCreateInfos,1009const VkAllocationCallbacks* pAllocator,1010VkPipeline* pPipelines)1011{1012VkResult result = VK_SUCCESS;1013unsigned i = 0;10141015for (; i < count; i++) {1016VkResult r;1017r = lvp_compute_pipeline_create(_device,1018pipelineCache,1019&pCreateInfos[i],1020pAllocator, &pPipelines[i]);1021if (r != VK_SUCCESS) {1022result = r;1023pPipelines[i] = VK_NULL_HANDLE;1024}1025}10261027return result;1028}102910301031