Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_meta_clear.c
4560 views
/*1* Copyright © 2020 Raspberry Pi2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "v3dv_private.h"2425#include "compiler/nir/nir_builder.h"26#include "vk_format_info.h"27#include "util/u_pack_color.h"2829static void30destroy_color_clear_pipeline(VkDevice _device,31uint64_t pipeline,32VkAllocationCallbacks *alloc)33{34struct v3dv_meta_color_clear_pipeline *p =35(struct v3dv_meta_color_clear_pipeline *) (uintptr_t) pipeline;36v3dv_DestroyPipeline(_device, p->pipeline, alloc);37if (p->cached)38v3dv_DestroyRenderPass(_device, p->pass, alloc);39vk_free(alloc, p);40}4142static void43destroy_depth_clear_pipeline(VkDevice _device,44struct v3dv_meta_depth_clear_pipeline *p,45VkAllocationCallbacks *alloc)46{47v3dv_DestroyPipeline(_device, p->pipeline, alloc);48vk_free(alloc, p);49}5051static VkResult52create_color_clear_pipeline_layout(struct v3dv_device *device,53VkPipelineLayout *pipeline_layout)54{55/* FIXME: this is abusing a bit the API, since not all of our clear56* pipelines have a geometry shader. We could create 2 different pipeline57* layouts, but this works for us for now.58*/59VkPushConstantRange ranges[2] = {60{ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16 },61{ VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4 },62};6364VkPipelineLayoutCreateInfo info = {65.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,66.setLayoutCount = 0,67.pushConstantRangeCount = 2,68.pPushConstantRanges = ranges,69};7071return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),72&info, &device->vk.alloc, pipeline_layout);73}7475static VkResult76create_depth_clear_pipeline_layout(struct v3dv_device *device,77VkPipelineLayout *pipeline_layout)78{79/* FIXME: this is abusing a bit the API, since not all of our clear80* pipelines have a geometry shader. We could create 2 different pipeline81* layouts, but this works for us for now.82*/83VkPushConstantRange ranges[2] = {84{ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4 },85{ VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4 },86};8788VkPipelineLayoutCreateInfo info = {89.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,90.setLayoutCount = 0,91.pushConstantRangeCount = 2,92.pPushConstantRanges = ranges93};9495return v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),96&info, &device->vk.alloc, pipeline_layout);97}9899void100v3dv_meta_clear_init(struct v3dv_device *device)101{102device->meta.color_clear.cache =103_mesa_hash_table_create(NULL, u64_hash, u64_compare);104105create_color_clear_pipeline_layout(device,106&device->meta.color_clear.p_layout);107108device->meta.depth_clear.cache =109_mesa_hash_table_create(NULL, u64_hash, u64_compare);110111create_depth_clear_pipeline_layout(device,112&device->meta.depth_clear.p_layout);113}114115void116v3dv_meta_clear_finish(struct v3dv_device *device)117{118VkDevice _device = v3dv_device_to_handle(device);119120hash_table_foreach(device->meta.color_clear.cache, entry) {121struct v3dv_meta_color_clear_pipeline *item = entry->data;122destroy_color_clear_pipeline(_device, (uintptr_t)item, &device->vk.alloc);123}124_mesa_hash_table_destroy(device->meta.color_clear.cache, NULL);125126if (device->meta.color_clear.p_layout) {127v3dv_DestroyPipelineLayout(_device, device->meta.color_clear.p_layout,128&device->vk.alloc);129}130131hash_table_foreach(device->meta.depth_clear.cache, entry) {132struct v3dv_meta_depth_clear_pipeline *item = entry->data;133destroy_depth_clear_pipeline(_device, item, &device->vk.alloc);134}135_mesa_hash_table_destroy(device->meta.depth_clear.cache, NULL);136137if (device->meta.depth_clear.p_layout) {138v3dv_DestroyPipelineLayout(_device, device->meta.depth_clear.p_layout,139&device->vk.alloc);140}141}142143static nir_ssa_def *144gen_rect_vertices(nir_builder *b)145{146nir_ssa_def *vertex_id = nir_load_vertex_id(b);147148/* vertex 0: -1.0, -1.0149* vertex 1: -1.0, 1.0150* vertex 2: 1.0, -1.0151* vertex 3: 1.0, 1.0152*153* so:154*155* channel 0 is vertex_id < 2 ? -1.0 : 1.0156* channel 1 is vertex id & 1 ? 1.0 : -1.0157*/158159nir_ssa_def *one = nir_imm_int(b, 1);160nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));161nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);162163nir_ssa_def *comp[4];164comp[0] = nir_bcsel(b, c0cmp,165nir_imm_float(b, -1.0f),166nir_imm_float(b, 1.0f));167168comp[1] = nir_bcsel(b, c1cmp,169nir_imm_float(b, 1.0f),170nir_imm_float(b, -1.0f));171comp[2] = nir_imm_float(b, 0.0f);172comp[3] = nir_imm_float(b, 1.0f);173return nir_vec(b, comp, 4);174}175176static nir_shader *177get_clear_rect_vs()178{179const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();180nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,181"meta clear vs");182183const struct glsl_type *vec4 = glsl_vec4_type();184nir_variable *vs_out_pos =185nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");186vs_out_pos->data.location = VARYING_SLOT_POS;187188nir_ssa_def *pos = gen_rect_vertices(&b);189nir_store_var(&b, vs_out_pos, pos, 0xf);190191return b.shader;192}193194static nir_shader *195get_clear_rect_gs(uint32_t push_constant_layer_base)196{197/* FIXME: this creates a geometry shader that takes the index of a single198* layer to clear from push constants, so we need to emit a draw call for199* each layer that we want to clear. We could actually do better and have it200* take a range of layers and then emit one triangle per layer to clear,201* however, if we were to do this we would need to be careful not to exceed202* the maximum number of output vertices allowed in a geometry shader.203*/204const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();205nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,206"meta clear gs");207nir_shader *nir = b.shader;208nir->info.inputs_read = 1ull << VARYING_SLOT_POS;209nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |210(1ull << VARYING_SLOT_LAYER);211nir->info.gs.input_primitive = GL_TRIANGLES;212nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;213nir->info.gs.vertices_in = 3;214nir->info.gs.vertices_out = 3;215nir->info.gs.invocations = 1;216nir->info.gs.active_stream_mask = 0x1;217218/* in vec4 gl_Position[3] */219nir_variable *gs_in_pos =220nir_variable_create(b.shader, nir_var_shader_in,221glsl_array_type(glsl_vec4_type(), 3, 0),222"in_gl_Position");223gs_in_pos->data.location = VARYING_SLOT_POS;224225/* out vec4 gl_Position */226nir_variable *gs_out_pos =227nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),228"out_gl_Position");229gs_out_pos->data.location = VARYING_SLOT_POS;230231/* out float gl_Layer */232nir_variable *gs_out_layer =233nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),234"out_gl_Layer");235gs_out_layer->data.location = VARYING_SLOT_LAYER;236237/* Emit output triangle */238for (uint32_t i = 0; i < 3; i++) {239/* gl_Position from shader input */240nir_deref_instr *in_pos_i =241nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);242nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);243244/* gl_Layer from push constants */245nir_ssa_def *layer =246nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),247.base = push_constant_layer_base, .range = 4);248nir_store_var(&b, gs_out_layer, layer, 0x1);249250nir_emit_vertex(&b, 0);251}252253nir_end_primitive(&b, 0);254255return nir;256}257258static nir_shader *259get_color_clear_rect_fs(uint32_t rt_idx, VkFormat format)260{261const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();262nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,263"meta clear fs");264265enum pipe_format pformat = vk_format_to_pipe_format(format);266const struct glsl_type *fs_out_type =267util_format_is_float(pformat) ? glsl_vec4_type() : glsl_uvec4_type();268269nir_variable *fs_out_color =270nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");271fs_out_color->data.location = FRAG_RESULT_DATA0 + rt_idx;272273nir_ssa_def *color_load = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .base = 0, .range = 16);274nir_store_var(&b, fs_out_color, color_load, 0xf);275276return b.shader;277}278279static nir_shader *280get_depth_clear_rect_fs()281{282const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();283nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,284"meta depth clear fs");285286nir_variable *fs_out_depth =287nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),288"out_depth");289fs_out_depth->data.location = FRAG_RESULT_DEPTH;290291nir_ssa_def *depth_load =292nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0), .base = 0, .range = 4);293294nir_store_var(&b, fs_out_depth, depth_load, 0x1);295296return b.shader;297}298299static VkResult300create_pipeline(struct v3dv_device *device,301struct v3dv_render_pass *pass,302uint32_t subpass_idx,303uint32_t samples,304struct nir_shader *vs_nir,305struct nir_shader *gs_nir,306struct nir_shader *fs_nir,307const VkPipelineVertexInputStateCreateInfo *vi_state,308const VkPipelineDepthStencilStateCreateInfo *ds_state,309const VkPipelineColorBlendStateCreateInfo *cb_state,310const VkPipelineLayout layout,311VkPipeline *pipeline)312{313VkPipelineShaderStageCreateInfo stages[3] = { 0 };314struct vk_shader_module vs_m;315struct vk_shader_module gs_m;316struct vk_shader_module fs_m;317318uint32_t stage_count = 0;319v3dv_shader_module_internal_init(device, &vs_m, vs_nir);320stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;321stages[stage_count].stage = VK_SHADER_STAGE_VERTEX_BIT;322stages[stage_count].module = vk_shader_module_to_handle(&vs_m);323stages[stage_count].pName = "main";324stage_count++;325326if (gs_nir) {327v3dv_shader_module_internal_init(device, &gs_m, gs_nir);328stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;329stages[stage_count].stage = VK_SHADER_STAGE_GEOMETRY_BIT;330stages[stage_count].module = vk_shader_module_to_handle(&gs_m);331stages[stage_count].pName = "main";332stage_count++;333}334335if (fs_nir) {336v3dv_shader_module_internal_init(device, &fs_m, fs_nir);337stages[stage_count].sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO;338stages[stage_count].stage = VK_SHADER_STAGE_FRAGMENT_BIT;339stages[stage_count].module = vk_shader_module_to_handle(&fs_m);340stages[stage_count].pName = "main";341stage_count++;342}343344VkGraphicsPipelineCreateInfo info = {345.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,346347.stageCount = stage_count,348.pStages = stages,349350.pVertexInputState = vi_state,351352.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {353.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,354.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,355.primitiveRestartEnable = false,356},357358.pViewportState = &(VkPipelineViewportStateCreateInfo) {359.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,360.viewportCount = 1,361.scissorCount = 1,362},363364.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {365.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,366.rasterizerDiscardEnable = false,367.polygonMode = VK_POLYGON_MODE_FILL,368.cullMode = VK_CULL_MODE_NONE,369.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,370.depthBiasEnable = false,371},372373.pMultisampleState = &(VkPipelineMultisampleStateCreateInfo) {374.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,375.rasterizationSamples = samples,376.sampleShadingEnable = false,377.pSampleMask = NULL,378.alphaToCoverageEnable = false,379.alphaToOneEnable = false,380},381382.pDepthStencilState = ds_state,383384.pColorBlendState = cb_state,385386/* The meta clear pipeline declares all state as dynamic.387* As a consequence, vkCmdBindPipeline writes no dynamic state388* to the cmd buffer. Therefore, at the end of the meta clear,389* we need only restore dynamic state that was vkCmdSet.390*/391.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {392.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,393.dynamicStateCount = 6,394.pDynamicStates = (VkDynamicState[]) {395VK_DYNAMIC_STATE_VIEWPORT,396VK_DYNAMIC_STATE_SCISSOR,397VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,398VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,399VK_DYNAMIC_STATE_STENCIL_REFERENCE,400VK_DYNAMIC_STATE_BLEND_CONSTANTS,401VK_DYNAMIC_STATE_DEPTH_BIAS,402VK_DYNAMIC_STATE_LINE_WIDTH,403},404},405406.flags = 0,407.layout = layout,408.renderPass = v3dv_render_pass_to_handle(pass),409.subpass = subpass_idx,410};411412VkResult result =413v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),414VK_NULL_HANDLE,4151, &info,416&device->vk.alloc,417pipeline);418419ralloc_free(vs_nir);420ralloc_free(fs_nir);421422return result;423}424425static VkResult426create_color_clear_pipeline(struct v3dv_device *device,427struct v3dv_render_pass *pass,428uint32_t subpass_idx,429uint32_t rt_idx,430VkFormat format,431uint32_t samples,432uint32_t components,433bool is_layered,434VkPipelineLayout pipeline_layout,435VkPipeline *pipeline)436{437nir_shader *vs_nir = get_clear_rect_vs();438nir_shader *fs_nir = get_color_clear_rect_fs(rt_idx, format);439nir_shader *gs_nir = is_layered ? get_clear_rect_gs(16) : NULL;440441const VkPipelineVertexInputStateCreateInfo vi_state = {442.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,443.vertexBindingDescriptionCount = 0,444.vertexAttributeDescriptionCount = 0,445};446447const VkPipelineDepthStencilStateCreateInfo ds_state = {448.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,449.depthTestEnable = false,450.depthWriteEnable = false,451.depthBoundsTestEnable = false,452.stencilTestEnable = false,453};454455assert(subpass_idx < pass->subpass_count);456const uint32_t color_count = pass->subpasses[subpass_idx].color_count;457assert(rt_idx < color_count);458459VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS];460for (uint32_t i = 0; i < color_count; i++) {461blend_att_state[i] = (VkPipelineColorBlendAttachmentState) {462.blendEnable = false,463.colorWriteMask = i == rt_idx ? components : 0,464};465}466467const VkPipelineColorBlendStateCreateInfo cb_state = {468.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,469.logicOpEnable = false,470.attachmentCount = color_count,471.pAttachments = blend_att_state472};473474return create_pipeline(device,475pass, subpass_idx,476samples,477vs_nir, gs_nir, fs_nir,478&vi_state,479&ds_state,480&cb_state,481pipeline_layout,482pipeline);483}484485static VkResult486create_depth_clear_pipeline(struct v3dv_device *device,487VkImageAspectFlags aspects,488struct v3dv_render_pass *pass,489uint32_t subpass_idx,490uint32_t samples,491bool is_layered,492VkPipelineLayout pipeline_layout,493VkPipeline *pipeline)494{495const bool has_depth = aspects & VK_IMAGE_ASPECT_DEPTH_BIT;496const bool has_stencil = aspects & VK_IMAGE_ASPECT_STENCIL_BIT;497assert(has_depth || has_stencil);498499nir_shader *vs_nir = get_clear_rect_vs();500nir_shader *fs_nir = has_depth ? get_depth_clear_rect_fs() : NULL;501nir_shader *gs_nir = is_layered ? get_clear_rect_gs(4) : NULL;502503const VkPipelineVertexInputStateCreateInfo vi_state = {504.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,505.vertexBindingDescriptionCount = 0,506.vertexAttributeDescriptionCount = 0,507};508509const VkPipelineDepthStencilStateCreateInfo ds_state = {510.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,511.depthTestEnable = has_depth,512.depthWriteEnable = has_depth,513.depthCompareOp = VK_COMPARE_OP_ALWAYS,514.depthBoundsTestEnable = false,515.stencilTestEnable = has_stencil,516.front = {517.passOp = VK_STENCIL_OP_REPLACE,518.compareOp = VK_COMPARE_OP_ALWAYS,519/* compareMask, writeMask and reference are dynamic state */520},521.back = { 0 },522};523524assert(subpass_idx < pass->subpass_count);525VkPipelineColorBlendAttachmentState blend_att_state[V3D_MAX_DRAW_BUFFERS] = { 0 };526const VkPipelineColorBlendStateCreateInfo cb_state = {527.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,528.logicOpEnable = false,529.attachmentCount = pass->subpasses[subpass_idx].color_count,530.pAttachments = blend_att_state,531};532533return create_pipeline(device,534pass, subpass_idx,535samples,536vs_nir, gs_nir, fs_nir,537&vi_state,538&ds_state,539&cb_state,540pipeline_layout,541pipeline);542}543544static VkResult545create_color_clear_render_pass(struct v3dv_device *device,546uint32_t rt_idx,547VkFormat format,548uint32_t samples,549VkRenderPass *pass)550{551VkAttachmentDescription att = {552.format = format,553.samples = samples,554.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,555.storeOp = VK_ATTACHMENT_STORE_OP_STORE,556.initialLayout = VK_IMAGE_LAYOUT_GENERAL,557.finalLayout = VK_IMAGE_LAYOUT_GENERAL,558};559560VkAttachmentReference att_ref = {561.attachment = rt_idx,562.layout = VK_IMAGE_LAYOUT_GENERAL,563};564565VkSubpassDescription subpass = {566.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,567.inputAttachmentCount = 0,568.colorAttachmentCount = 1,569.pColorAttachments = &att_ref,570.pResolveAttachments = NULL,571.pDepthStencilAttachment = NULL,572.preserveAttachmentCount = 0,573.pPreserveAttachments = NULL,574};575576VkRenderPassCreateInfo info = {577.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,578.attachmentCount = 1,579.pAttachments = &att,580.subpassCount = 1,581.pSubpasses = &subpass,582.dependencyCount = 0,583.pDependencies = NULL,584};585586return v3dv_CreateRenderPass(v3dv_device_to_handle(device),587&info, &device->vk.alloc, pass);588}589590static inline uint64_t591get_color_clear_pipeline_cache_key(uint32_t rt_idx,592VkFormat format,593uint32_t samples,594uint32_t components,595bool is_layered)596{597assert(rt_idx < V3D_MAX_DRAW_BUFFERS);598599uint64_t key = 0;600uint32_t bit_offset = 0;601602key |= rt_idx;603bit_offset += 2;604605key |= ((uint64_t) format) << bit_offset;606bit_offset += 32;607608key |= ((uint64_t) samples) << bit_offset;609bit_offset += 4;610611key |= ((uint64_t) components) << bit_offset;612bit_offset += 4;613614key |= (is_layered ? 1ull : 0ull) << bit_offset;615bit_offset += 1;616617assert(bit_offset <= 64);618return key;619}620621static inline uint64_t622get_depth_clear_pipeline_cache_key(VkImageAspectFlags aspects,623VkFormat format,624uint32_t samples,625bool is_layered)626{627uint64_t key = 0;628uint32_t bit_offset = 0;629630key |= format;631bit_offset += 32;632633key |= ((uint64_t) samples) << bit_offset;634bit_offset += 4;635636const bool has_depth = (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) ? 1 : 0;637key |= ((uint64_t) has_depth) << bit_offset;638bit_offset++;639640const bool has_stencil = (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) ? 1 : 0;641key |= ((uint64_t) has_stencil) << bit_offset;642bit_offset++;;643644key |= (is_layered ? 1ull : 0ull) << bit_offset;645bit_offset += 1;646647assert(bit_offset <= 64);648return key;649}650651static VkResult652get_color_clear_pipeline(struct v3dv_device *device,653struct v3dv_render_pass *pass,654uint32_t subpass_idx,655uint32_t rt_idx,656uint32_t attachment_idx,657VkFormat format,658uint32_t samples,659uint32_t components,660bool is_layered,661struct v3dv_meta_color_clear_pipeline **pipeline)662{663assert(vk_format_is_color(format));664665VkResult result = VK_SUCCESS;666667/* If pass != NULL it means that we are emitting the clear as a draw call668* in the current pass bound by the application. In that case, we can't669* cache the pipeline, since it will be referencing that pass and the670* application could be destroying it at any point. Hopefully, the perf671* impact is not too big since we still have the device pipeline cache672* around and we won't end up re-compiling the clear shader.673*674* FIXME: alternatively, we could refcount (or maybe clone) the render pass675* provided by the application and include it in the pipeline key setup676* to make caching safe in this scenario, however, based on tests with677* vkQuake3, the fact that we are not caching here doesn't seem to have678* any significant impact in performance, so it might not be worth it.679*/680const bool can_cache_pipeline = (pass == NULL);681682uint64_t key;683if (can_cache_pipeline) {684key = get_color_clear_pipeline_cache_key(rt_idx, format, samples,685components, is_layered);686mtx_lock(&device->meta.mtx);687struct hash_entry *entry =688_mesa_hash_table_search(device->meta.color_clear.cache, &key);689if (entry) {690mtx_unlock(&device->meta.mtx);691*pipeline = entry->data;692return VK_SUCCESS;693}694}695696*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,697VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);698699if (*pipeline == NULL) {700result = VK_ERROR_OUT_OF_HOST_MEMORY;701goto fail;702}703704if (!pass) {705result = create_color_clear_render_pass(device,706rt_idx,707format,708samples,709&(*pipeline)->pass);710if (result != VK_SUCCESS)711goto fail;712713pass = v3dv_render_pass_from_handle((*pipeline)->pass);714} else {715(*pipeline)->pass = v3dv_render_pass_to_handle(pass);716}717718result = create_color_clear_pipeline(device,719pass,720subpass_idx,721rt_idx,722format,723samples,724components,725is_layered,726device->meta.color_clear.p_layout,727&(*pipeline)->pipeline);728if (result != VK_SUCCESS)729goto fail;730731if (can_cache_pipeline) {732(*pipeline)->key = key;733(*pipeline)->cached = true;734_mesa_hash_table_insert(device->meta.color_clear.cache,735&(*pipeline)->key, *pipeline);736737mtx_unlock(&device->meta.mtx);738}739740return VK_SUCCESS;741742fail:743if (can_cache_pipeline)744mtx_unlock(&device->meta.mtx);745746VkDevice _device = v3dv_device_to_handle(device);747if (*pipeline) {748if ((*pipeline)->cached)749v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);750if ((*pipeline)->pipeline)751v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);752vk_free(&device->vk.alloc, *pipeline);753*pipeline = NULL;754}755756return result;757}758759static VkResult760get_depth_clear_pipeline(struct v3dv_device *device,761VkImageAspectFlags aspects,762struct v3dv_render_pass *pass,763uint32_t subpass_idx,764uint32_t attachment_idx,765bool is_layered,766struct v3dv_meta_depth_clear_pipeline **pipeline)767{768assert(subpass_idx < pass->subpass_count);769assert(attachment_idx != VK_ATTACHMENT_UNUSED);770assert(attachment_idx < pass->attachment_count);771772VkResult result = VK_SUCCESS;773774const uint32_t samples = pass->attachments[attachment_idx].desc.samples;775const VkFormat format = pass->attachments[attachment_idx].desc.format;776assert(vk_format_is_depth_or_stencil(format));777778const uint64_t key =779get_depth_clear_pipeline_cache_key(aspects, format, samples, is_layered);780mtx_lock(&device->meta.mtx);781struct hash_entry *entry =782_mesa_hash_table_search(device->meta.depth_clear.cache, &key);783if (entry) {784mtx_unlock(&device->meta.mtx);785*pipeline = entry->data;786return VK_SUCCESS;787}788789*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,790VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);791792if (*pipeline == NULL) {793result = VK_ERROR_OUT_OF_HOST_MEMORY;794goto fail;795}796797result = create_depth_clear_pipeline(device,798aspects,799pass,800subpass_idx,801samples,802is_layered,803device->meta.depth_clear.p_layout,804&(*pipeline)->pipeline);805if (result != VK_SUCCESS)806goto fail;807808(*pipeline)->key = key;809_mesa_hash_table_insert(device->meta.depth_clear.cache,810&(*pipeline)->key, *pipeline);811812mtx_unlock(&device->meta.mtx);813return VK_SUCCESS;814815fail:816mtx_unlock(&device->meta.mtx);817818VkDevice _device = v3dv_device_to_handle(device);819if (*pipeline) {820if ((*pipeline)->pipeline)821v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);822vk_free(&device->vk.alloc, *pipeline);823*pipeline = NULL;824}825826return result;827}828829/* Emits a scissored quad in the clear color */830static void831emit_subpass_color_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,832struct v3dv_render_pass *pass,833struct v3dv_subpass *subpass,834uint32_t rt_idx,835const VkClearColorValue *clear_color,836bool is_layered,837bool all_rects_same_layers,838uint32_t rect_count,839const VkClearRect *rects)840{841/* Skip if attachment is unused in the current subpass */842assert(rt_idx < subpass->color_count);843const uint32_t attachment_idx = subpass->color_attachments[rt_idx].attachment;844if (attachment_idx == VK_ATTACHMENT_UNUSED)845return;846847/* Obtain a pipeline for this clear */848assert(attachment_idx < cmd_buffer->state.pass->attachment_count);849const VkFormat format =850cmd_buffer->state.pass->attachments[attachment_idx].desc.format;851const VkFormat samples =852cmd_buffer->state.pass->attachments[attachment_idx].desc.samples;853const uint32_t components = VK_COLOR_COMPONENT_R_BIT |854VK_COLOR_COMPONENT_G_BIT |855VK_COLOR_COMPONENT_B_BIT |856VK_COLOR_COMPONENT_A_BIT;857struct v3dv_meta_color_clear_pipeline *pipeline = NULL;858VkResult result = get_color_clear_pipeline(cmd_buffer->device,859pass,860cmd_buffer->state.subpass_idx,861rt_idx,862attachment_idx,863format,864samples,865components,866is_layered,867&pipeline);868if (result != VK_SUCCESS) {869if (result == VK_ERROR_OUT_OF_HOST_MEMORY)870v3dv_flag_oom(cmd_buffer, NULL);871return;872}873assert(pipeline && pipeline->pipeline);874875/* Emit clear rects */876v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);877878VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);879v3dv_CmdPushConstants(cmd_buffer_handle,880cmd_buffer->device->meta.depth_clear.p_layout,881VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16,882clear_color->float32);883884v3dv_CmdBindPipeline(cmd_buffer_handle,885VK_PIPELINE_BIND_POINT_GRAPHICS,886pipeline->pipeline);887888uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;889890for (uint32_t i = 0; i < rect_count; i++) {891const VkViewport viewport = {892.x = rects[i].rect.offset.x,893.y = rects[i].rect.offset.y,894.width = rects[i].rect.extent.width,895.height = rects[i].rect.extent.height,896.minDepth = 0.0f,897.maxDepth = 1.0f898};899v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);900v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);901902if (is_layered) {903for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;904layer_offset++) {905uint32_t layer = rects[i].baseArrayLayer + layer_offset;906v3dv_CmdPushConstants(cmd_buffer_handle,907cmd_buffer->device->meta.depth_clear.p_layout,908VK_SHADER_STAGE_GEOMETRY_BIT, 16, 4, &layer);909v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);910}911} else {912assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);913v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);914}915}916917/* Subpass pipelines can't be cached because they include a reference to the918* render pass currently bound by the application, which means that we need919* to destroy them manually here.920*/921assert(!pipeline->cached);922v3dv_cmd_buffer_add_private_obj(923cmd_buffer, (uintptr_t)pipeline,924(v3dv_cmd_buffer_private_obj_destroy_cb) destroy_color_clear_pipeline);925926v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);927}928929/* Emits a scissored quad, clearing the depth aspect by writing to gl_FragDepth930* and the stencil aspect by using stencil testing.931*/932static void933emit_subpass_ds_clear_rects(struct v3dv_cmd_buffer *cmd_buffer,934struct v3dv_render_pass *pass,935struct v3dv_subpass *subpass,936VkImageAspectFlags aspects,937const VkClearDepthStencilValue *clear_ds,938bool is_layered,939bool all_rects_same_layers,940uint32_t rect_count,941const VkClearRect *rects)942{943/* Skip if attachment is unused in the current subpass */944const uint32_t attachment_idx = subpass->ds_attachment.attachment;945if (attachment_idx == VK_ATTACHMENT_UNUSED)946return;947948/* Obtain a pipeline for this clear */949assert(attachment_idx < cmd_buffer->state.pass->attachment_count);950struct v3dv_meta_depth_clear_pipeline *pipeline = NULL;951VkResult result = get_depth_clear_pipeline(cmd_buffer->device,952aspects,953pass,954cmd_buffer->state.subpass_idx,955attachment_idx,956is_layered,957&pipeline);958if (result != VK_SUCCESS) {959if (result == VK_ERROR_OUT_OF_HOST_MEMORY)960v3dv_flag_oom(cmd_buffer, NULL);961return;962}963assert(pipeline && pipeline->pipeline);964965/* Emit clear rects */966v3dv_cmd_buffer_meta_state_push(cmd_buffer, false);967968VkCommandBuffer cmd_buffer_handle = v3dv_cmd_buffer_to_handle(cmd_buffer);969v3dv_CmdPushConstants(cmd_buffer_handle,970cmd_buffer->device->meta.depth_clear.p_layout,971VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4,972&clear_ds->depth);973974v3dv_CmdBindPipeline(cmd_buffer_handle,975VK_PIPELINE_BIND_POINT_GRAPHICS,976pipeline->pipeline);977978uint32_t dynamic_states = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;979if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {980v3dv_CmdSetStencilReference(cmd_buffer_handle,981VK_STENCIL_FACE_FRONT_AND_BACK,982clear_ds->stencil);983v3dv_CmdSetStencilWriteMask(cmd_buffer_handle,984VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);985v3dv_CmdSetStencilCompareMask(cmd_buffer_handle,986VK_STENCIL_FACE_FRONT_AND_BACK, 0xff);987dynamic_states |= VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK |988VK_DYNAMIC_STATE_STENCIL_WRITE_MASK |989VK_DYNAMIC_STATE_STENCIL_REFERENCE;990}991992for (uint32_t i = 0; i < rect_count; i++) {993const VkViewport viewport = {994.x = rects[i].rect.offset.x,995.y = rects[i].rect.offset.y,996.width = rects[i].rect.extent.width,997.height = rects[i].rect.extent.height,998.minDepth = 0.0f,999.maxDepth = 1.0f1000};1001v3dv_CmdSetViewport(cmd_buffer_handle, 0, 1, &viewport);1002v3dv_CmdSetScissor(cmd_buffer_handle, 0, 1, &rects[i].rect);1003if (is_layered) {1004for (uint32_t layer_offset = 0; layer_offset < rects[i].layerCount;1005layer_offset++) {1006uint32_t layer = rects[i].baseArrayLayer + layer_offset;1007v3dv_CmdPushConstants(cmd_buffer_handle,1008cmd_buffer->device->meta.depth_clear.p_layout,1009VK_SHADER_STAGE_GEOMETRY_BIT, 4, 4, &layer);1010v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);1011}1012} else {1013assert(rects[i].baseArrayLayer == 0 && rects[i].layerCount == 1);1014v3dv_CmdDraw(cmd_buffer_handle, 4, 1, 0, 0);1015}1016}10171018v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dynamic_states, false);1019}10201021static void1022gather_layering_info(uint32_t rect_count, const VkClearRect *rects,1023bool *is_layered, bool *all_rects_same_layers)1024{1025*all_rects_same_layers = true;10261027uint32_t min_layer = rects[0].baseArrayLayer;1028uint32_t max_layer = rects[0].baseArrayLayer + rects[0].layerCount - 1;1029for (uint32_t i = 1; i < rect_count; i++) {1030if (rects[i].baseArrayLayer != rects[i - 1].baseArrayLayer ||1031rects[i].layerCount != rects[i - 1].layerCount) {1032*all_rects_same_layers = false;1033min_layer = MIN2(min_layer, rects[i].baseArrayLayer);1034max_layer = MAX2(max_layer, rects[i].baseArrayLayer +1035rects[i].layerCount - 1);1036}1037}10381039*is_layered = !(min_layer == 0 && max_layer == 0);1040}10411042VKAPI_ATTR void VKAPI_CALL1043v3dv_CmdClearAttachments(VkCommandBuffer commandBuffer,1044uint32_t attachmentCount,1045const VkClearAttachment *pAttachments,1046uint32_t rectCount,1047const VkClearRect *pRects)1048{1049V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);10501051/* We can only clear attachments in the current subpass */1052assert(attachmentCount <= 5); /* 4 color + D/S */10531054struct v3dv_render_pass *pass = cmd_buffer->state.pass;10551056assert(cmd_buffer->state.subpass_idx < pass->subpass_count);1057struct v3dv_subpass *subpass =1058&cmd_buffer->state.pass->subpasses[cmd_buffer->state.subpass_idx];10591060/* Emit a clear rect inside the current job for this subpass. For layered1061* framebuffers, we use a geometry shader to redirect clears to the1062* appropriate layers.1063*/1064bool is_layered, all_rects_same_layers;1065gather_layering_info(rectCount, pRects, &is_layered, &all_rects_same_layers);1066for (uint32_t i = 0; i < attachmentCount; i++) {1067if (pAttachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {1068emit_subpass_color_clear_rects(cmd_buffer, pass, subpass,1069pAttachments[i].colorAttachment,1070&pAttachments[i].clearValue.color,1071is_layered, all_rects_same_layers,1072rectCount, pRects);1073} else {1074emit_subpass_ds_clear_rects(cmd_buffer, pass, subpass,1075pAttachments[i].aspectMask,1076&pAttachments[i].clearValue.depthStencil,1077is_layered, all_rects_same_layers,1078rectCount, pRects);1079}1080}1081}108210831084