Path: blob/21.2-virgl/src/amd/vulkan/radv_meta_clear.c
7233 views
/*1* Copyright © 2015 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "nir/nir_builder.h"24#include "radv_debug.h"25#include "radv_meta.h"26#include "radv_private.h"2728#include "util/format_rgb9e5.h"29#include "vk_format.h"3031enum { DEPTH_CLEAR_SLOW, DEPTH_CLEAR_FAST_EXPCLEAR, DEPTH_CLEAR_FAST_NO_EXPCLEAR };3233static void34build_color_shaders(struct nir_shader **out_vs, struct nir_shader **out_fs, uint32_t frag_output)35{36nir_builder vs_b =37nir_builder_init_simple_shader(MESA_SHADER_VERTEX, NULL, "meta_clear_color_vs");38nir_builder fs_b =39nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_clear_color_fs");4041const struct glsl_type *position_type = glsl_vec4_type();42const struct glsl_type *color_type = glsl_vec4_type();4344nir_variable *vs_out_pos =45nir_variable_create(vs_b.shader, nir_var_shader_out, position_type, "gl_Position");46vs_out_pos->data.location = VARYING_SLOT_POS;4748nir_ssa_def *in_color_load =49nir_load_push_constant(&fs_b, 4, 32, nir_imm_int(&fs_b, 0), .range = 16);5051nir_variable *fs_out_color =52nir_variable_create(fs_b.shader, nir_var_shader_out, color_type, "f_color");53fs_out_color->data.location = FRAG_RESULT_DATA0 + frag_output;5455nir_store_var(&fs_b, fs_out_color, in_color_load, 0xf);5657nir_ssa_def *outvec = radv_meta_gen_rect_vertices(&vs_b);58nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);5960const struct glsl_type *layer_type = glsl_int_type();61nir_variable *vs_out_layer =62nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");63vs_out_layer->data.location = VARYING_SLOT_LAYER;64vs_out_layer->data.interpolation = INTERP_MODE_FLAT;65nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);66nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);6768nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);69nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);7071*out_vs = vs_b.shader;72*out_fs = fs_b.shader;73}7475static VkResult76create_pipeline(struct radv_device *device, struct radv_render_pass *render_pass, uint32_t samples,77struct nir_shader *vs_nir, struct nir_shader *fs_nir,78const VkPipelineVertexInputStateCreateInfo *vi_state,79const VkPipelineDepthStencilStateCreateInfo *ds_state,80const VkPipelineColorBlendStateCreateInfo *cb_state, const VkPipelineLayout layout,81const struct radv_graphics_pipeline_create_info *extra,82const VkAllocationCallbacks *alloc, VkPipeline *pipeline)83{84VkDevice device_h = radv_device_to_handle(device);85VkResult result;8687result = radv_graphics_pipeline_create(88device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),89&(VkGraphicsPipelineCreateInfo){90.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,91.stageCount = fs_nir ? 2 : 1,92.pStages =93(VkPipelineShaderStageCreateInfo[]){94{95.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,96.stage = VK_SHADER_STAGE_VERTEX_BIT,97.module = vk_shader_module_handle_from_nir(vs_nir),98.pName = "main",99},100{101.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,102.stage = VK_SHADER_STAGE_FRAGMENT_BIT,103.module = vk_shader_module_handle_from_nir(fs_nir),104.pName = "main",105},106},107.pVertexInputState = vi_state,108.pInputAssemblyState =109&(VkPipelineInputAssemblyStateCreateInfo){110.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,111.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,112.primitiveRestartEnable = false,113},114.pViewportState =115&(VkPipelineViewportStateCreateInfo){116.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,117.viewportCount = 1,118.scissorCount = 1,119},120.pRasterizationState =121&(VkPipelineRasterizationStateCreateInfo){122.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,123.rasterizerDiscardEnable = false,124.polygonMode = VK_POLYGON_MODE_FILL,125.cullMode = VK_CULL_MODE_NONE,126.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,127.depthBiasEnable = false,128},129.pMultisampleState =130&(VkPipelineMultisampleStateCreateInfo){131.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,132.rasterizationSamples = samples,133.sampleShadingEnable = false,134.pSampleMask = NULL,135.alphaToCoverageEnable = false,136.alphaToOneEnable = false,137},138.pDepthStencilState = ds_state,139.pColorBlendState = cb_state,140.pDynamicState =141&(VkPipelineDynamicStateCreateInfo){142/* The meta clear pipeline declares all state as dynamic.143* As a consequence, vkCmdBindPipeline writes no dynamic state144* to the cmd buffer. Therefore, at the end of the meta clear,145* we need only restore dynamic state was vkCmdSet.146*/147.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,148.dynamicStateCount = 8,149.pDynamicStates =150(VkDynamicState[]){151/* Everything except stencil write mask */152VK_DYNAMIC_STATE_VIEWPORT,153VK_DYNAMIC_STATE_SCISSOR,154VK_DYNAMIC_STATE_LINE_WIDTH,155VK_DYNAMIC_STATE_DEPTH_BIAS,156VK_DYNAMIC_STATE_BLEND_CONSTANTS,157VK_DYNAMIC_STATE_DEPTH_BOUNDS,158VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,159VK_DYNAMIC_STATE_STENCIL_REFERENCE,160},161},162.layout = layout,163.flags = 0,164.renderPass = radv_render_pass_to_handle(render_pass),165.subpass = 0,166},167extra, alloc, pipeline);168169ralloc_free(vs_nir);170ralloc_free(fs_nir);171172return result;173}174175static VkResult176create_color_renderpass(struct radv_device *device, VkFormat vk_format, uint32_t samples,177VkRenderPass *pass)178{179mtx_lock(&device->meta_state.mtx);180if (*pass) {181mtx_unlock(&device->meta_state.mtx);182return VK_SUCCESS;183}184185VkResult result = radv_CreateRenderPass2(186radv_device_to_handle(device),187&(VkRenderPassCreateInfo2){188.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,189.attachmentCount = 1,190.pAttachments =191&(VkAttachmentDescription2){192.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,193.format = vk_format,194.samples = samples,195.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,196.storeOp = VK_ATTACHMENT_STORE_OP_STORE,197.initialLayout = VK_IMAGE_LAYOUT_GENERAL,198.finalLayout = VK_IMAGE_LAYOUT_GENERAL,199},200.subpassCount = 1,201.pSubpasses =202&(VkSubpassDescription2){203.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,204.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,205.inputAttachmentCount = 0,206.colorAttachmentCount = 1,207.pColorAttachments =208&(VkAttachmentReference2){209.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,210.attachment = 0,211.layout = VK_IMAGE_LAYOUT_GENERAL,212},213.pResolveAttachments = NULL,214.pDepthStencilAttachment =215&(VkAttachmentReference2){216.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,217.attachment = VK_ATTACHMENT_UNUSED,218.layout = VK_IMAGE_LAYOUT_GENERAL,219},220.preserveAttachmentCount = 0,221.pPreserveAttachments = NULL,222},223.dependencyCount = 2,224.pDependencies =225(VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,226.srcSubpass = VK_SUBPASS_EXTERNAL,227.dstSubpass = 0,228.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,229.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,230.srcAccessMask = 0,231.dstAccessMask = 0,232.dependencyFlags = 0},233{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,234.srcSubpass = 0,235.dstSubpass = VK_SUBPASS_EXTERNAL,236.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,237.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,238.srcAccessMask = 0,239.dstAccessMask = 0,240.dependencyFlags = 0}},241},242&device->meta_state.alloc, pass);243mtx_unlock(&device->meta_state.mtx);244return result;245}246247static VkResult248create_color_pipeline(struct radv_device *device, uint32_t samples, uint32_t frag_output,249VkPipeline *pipeline, VkRenderPass pass)250{251struct nir_shader *vs_nir;252struct nir_shader *fs_nir;253VkResult result;254255mtx_lock(&device->meta_state.mtx);256if (*pipeline) {257mtx_unlock(&device->meta_state.mtx);258return VK_SUCCESS;259}260261build_color_shaders(&vs_nir, &fs_nir, frag_output);262263const VkPipelineVertexInputStateCreateInfo vi_state = {264.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,265.vertexBindingDescriptionCount = 0,266.vertexAttributeDescriptionCount = 0,267};268269const VkPipelineDepthStencilStateCreateInfo ds_state = {270.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,271.depthTestEnable = false,272.depthWriteEnable = false,273.depthBoundsTestEnable = false,274.stencilTestEnable = false,275};276277VkPipelineColorBlendAttachmentState blend_attachment_state[MAX_RTS] = {0};278blend_attachment_state[frag_output] = (VkPipelineColorBlendAttachmentState){279.blendEnable = false,280.colorWriteMask = VK_COLOR_COMPONENT_A_BIT | VK_COLOR_COMPONENT_R_BIT |281VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT,282};283284const VkPipelineColorBlendStateCreateInfo cb_state = {285.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,286.logicOpEnable = false,287.attachmentCount = MAX_RTS,288.pAttachments = blend_attachment_state};289290struct radv_graphics_pipeline_create_info extra = {291.use_rectlist = true,292};293result =294create_pipeline(device, radv_render_pass_from_handle(pass), samples, vs_nir, fs_nir,295&vi_state, &ds_state, &cb_state, device->meta_state.clear_color_p_layout,296&extra, &device->meta_state.alloc, pipeline);297298mtx_unlock(&device->meta_state.mtx);299return result;300}301302static void303finish_meta_clear_htile_mask_state(struct radv_device *device)304{305struct radv_meta_state *state = &device->meta_state;306307radv_DestroyPipeline(radv_device_to_handle(device), state->clear_htile_mask_pipeline,308&state->alloc);309radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_htile_mask_p_layout,310&state->alloc);311radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->clear_htile_mask_ds_layout,312&state->alloc);313}314315void316radv_device_finish_meta_clear_state(struct radv_device *device)317{318struct radv_meta_state *state = &device->meta_state;319320for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {321for (uint32_t j = 0; j < ARRAY_SIZE(state->clear[i].color_pipelines); ++j) {322radv_DestroyPipeline(radv_device_to_handle(device), state->clear[i].color_pipelines[j],323&state->alloc);324radv_DestroyRenderPass(radv_device_to_handle(device), state->clear[i].render_pass[j],325&state->alloc);326}327328for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {329radv_DestroyPipeline(radv_device_to_handle(device), state->clear[i].depth_only_pipeline[j],330&state->alloc);331radv_DestroyPipeline(radv_device_to_handle(device),332state->clear[i].stencil_only_pipeline[j], &state->alloc);333radv_DestroyPipeline(radv_device_to_handle(device),334state->clear[i].depthstencil_pipeline[j], &state->alloc);335336radv_DestroyPipeline(radv_device_to_handle(device),337state->clear[i].depth_only_unrestricted_pipeline[j], &state->alloc);338radv_DestroyPipeline(radv_device_to_handle(device),339state->clear[i].stencil_only_unrestricted_pipeline[j], &state->alloc);340radv_DestroyPipeline(radv_device_to_handle(device),341state->clear[i].depthstencil_unrestricted_pipeline[j], &state->alloc);342}343radv_DestroyRenderPass(radv_device_to_handle(device), state->clear[i].depthstencil_rp,344&state->alloc);345}346radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_color_p_layout,347&state->alloc);348radv_DestroyPipelineLayout(radv_device_to_handle(device), state->clear_depth_p_layout,349&state->alloc);350radv_DestroyPipelineLayout(radv_device_to_handle(device),351state->clear_depth_unrestricted_p_layout, &state->alloc);352353finish_meta_clear_htile_mask_state(device);354}355356static void357emit_color_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,358const VkClearRect *clear_rect, uint32_t view_mask)359{360struct radv_device *device = cmd_buffer->device;361const struct radv_subpass *subpass = cmd_buffer->state.subpass;362const uint32_t subpass_att = clear_att->colorAttachment;363const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;364const struct radv_image_view *iview =365cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;366uint32_t samples, samples_log2;367VkFormat format;368unsigned fs_key;369VkClearColorValue clear_value = clear_att->clearValue.color;370VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);371VkPipeline pipeline;372373/* When a framebuffer is bound to the current command buffer, get the374* number of samples from it. Otherwise, get the number of samples from375* the render pass because it's likely a secondary command buffer.376*/377if (iview) {378samples = iview->image->info.samples;379format = iview->vk_format;380} else {381samples = cmd_buffer->state.pass->attachments[pass_att].samples;382format = cmd_buffer->state.pass->attachments[pass_att].format;383}384385samples_log2 = ffs(samples) - 1;386fs_key = radv_format_meta_fs_key(device, format);387388if (fs_key == -1) {389radv_finishme("color clears incomplete");390return;391}392393if (device->meta_state.clear[samples_log2].render_pass[fs_key] == VK_NULL_HANDLE) {394VkResult ret =395create_color_renderpass(device, radv_fs_key_format_exemplars[fs_key], samples,396&device->meta_state.clear[samples_log2].render_pass[fs_key]);397if (ret != VK_SUCCESS) {398cmd_buffer->record_result = ret;399return;400}401}402403if (device->meta_state.clear[samples_log2].color_pipelines[fs_key] == VK_NULL_HANDLE) {404VkResult ret = create_color_pipeline(405device, samples, 0, &device->meta_state.clear[samples_log2].color_pipelines[fs_key],406device->meta_state.clear[samples_log2].render_pass[fs_key]);407if (ret != VK_SUCCESS) {408cmd_buffer->record_result = ret;409return;410}411}412413pipeline = device->meta_state.clear[samples_log2].color_pipelines[fs_key];414if (!pipeline) {415radv_finishme("color clears incomplete");416return;417}418assert(samples_log2 < ARRAY_SIZE(device->meta_state.clear));419assert(pipeline);420assert(clear_att->aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);421assert(clear_att->colorAttachment < subpass->color_count);422423radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),424device->meta_state.clear_color_p_layout, VK_SHADER_STAGE_FRAGMENT_BIT, 0,42516, &clear_value);426427struct radv_subpass clear_subpass = {428.color_count = 1,429.color_attachments =430(struct radv_subpass_attachment[]){subpass->color_attachments[clear_att->colorAttachment]},431.depth_stencil_attachment = NULL,432};433434radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);435436radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);437438radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,439&(VkViewport){.x = clear_rect->rect.offset.x,440.y = clear_rect->rect.offset.y,441.width = clear_rect->rect.extent.width,442.height = clear_rect->rect.extent.height,443.minDepth = 0.0f,444.maxDepth = 1.0f});445446radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);447448if (view_mask) {449u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);450} else {451radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);452}453454radv_cmd_buffer_set_subpass(cmd_buffer, subpass);455}456457static void458build_depthstencil_shader(struct nir_shader **out_vs, struct nir_shader **out_fs, bool unrestricted)459{460nir_builder vs_b = nir_builder_init_simple_shader(461MESA_SHADER_VERTEX, NULL,462unrestricted ? "meta_clear_depthstencil_unrestricted_vs" : "meta_clear_depthstencil_vs");463nir_builder fs_b = nir_builder_init_simple_shader(464MESA_SHADER_FRAGMENT, NULL,465unrestricted ? "meta_clear_depthstencil_unrestricted_fs" : "meta_clear_depthstencil_fs");466467const struct glsl_type *position_out_type = glsl_vec4_type();468469nir_variable *vs_out_pos =470nir_variable_create(vs_b.shader, nir_var_shader_out, position_out_type, "gl_Position");471vs_out_pos->data.location = VARYING_SLOT_POS;472473nir_ssa_def *z;474if (unrestricted) {475nir_ssa_def *in_color_load =476nir_load_push_constant(&fs_b, 1, 32, nir_imm_int(&fs_b, 0), .range = 4);477478nir_variable *fs_out_depth =479nir_variable_create(fs_b.shader, nir_var_shader_out, glsl_int_type(), "f_depth");480fs_out_depth->data.location = FRAG_RESULT_DEPTH;481nir_store_var(&fs_b, fs_out_depth, in_color_load, 0x1);482483z = nir_imm_float(&vs_b, 0.0);484} else {485z = nir_load_push_constant(&vs_b, 1, 32, nir_imm_int(&vs_b, 0), .range = 4);486}487488nir_ssa_def *outvec = radv_meta_gen_rect_vertices_comp2(&vs_b, z);489nir_store_var(&vs_b, vs_out_pos, outvec, 0xf);490491const struct glsl_type *layer_type = glsl_int_type();492nir_variable *vs_out_layer =493nir_variable_create(vs_b.shader, nir_var_shader_out, layer_type, "v_layer");494vs_out_layer->data.location = VARYING_SLOT_LAYER;495vs_out_layer->data.interpolation = INTERP_MODE_FLAT;496nir_ssa_def *inst_id = nir_load_instance_id(&vs_b);497nir_ssa_def *base_instance = nir_load_base_instance(&vs_b);498499nir_ssa_def *layer_id = nir_iadd(&vs_b, inst_id, base_instance);500nir_store_var(&vs_b, vs_out_layer, layer_id, 0x1);501502*out_vs = vs_b.shader;503*out_fs = fs_b.shader;504}505506static VkResult507create_depthstencil_renderpass(struct radv_device *device, uint32_t samples,508VkRenderPass *render_pass)509{510mtx_lock(&device->meta_state.mtx);511if (*render_pass) {512mtx_unlock(&device->meta_state.mtx);513return VK_SUCCESS;514}515516VkResult result = radv_CreateRenderPass2(517radv_device_to_handle(device),518&(VkRenderPassCreateInfo2){519.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,520.attachmentCount = 1,521.pAttachments =522&(VkAttachmentDescription2){523.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,524.format = VK_FORMAT_D32_SFLOAT_S8_UINT,525.samples = samples,526.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,527.storeOp = VK_ATTACHMENT_STORE_OP_STORE,528.initialLayout = VK_IMAGE_LAYOUT_GENERAL,529.finalLayout = VK_IMAGE_LAYOUT_GENERAL,530},531.subpassCount = 1,532.pSubpasses =533&(VkSubpassDescription2){534.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,535.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,536.inputAttachmentCount = 0,537.colorAttachmentCount = 0,538.pColorAttachments = NULL,539.pResolveAttachments = NULL,540.pDepthStencilAttachment =541&(VkAttachmentReference2){542.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,543.attachment = 0,544.layout = VK_IMAGE_LAYOUT_GENERAL,545},546.preserveAttachmentCount = 0,547.pPreserveAttachments = NULL,548},549.dependencyCount = 2,550.pDependencies =551(VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,552.srcSubpass = VK_SUBPASS_EXTERNAL,553.dstSubpass = 0,554.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,555.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,556.srcAccessMask = 0,557.dstAccessMask = 0,558.dependencyFlags = 0},559{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,560.srcSubpass = 0,561.dstSubpass = VK_SUBPASS_EXTERNAL,562.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,563.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,564.srcAccessMask = 0,565.dstAccessMask = 0,566.dependencyFlags = 0}}},567&device->meta_state.alloc, render_pass);568mtx_unlock(&device->meta_state.mtx);569return result;570}571572static VkResult573create_depthstencil_pipeline(struct radv_device *device, VkImageAspectFlags aspects,574uint32_t samples, int index, bool unrestricted, VkPipeline *pipeline,575VkRenderPass render_pass)576{577struct nir_shader *vs_nir, *fs_nir;578VkResult result;579580mtx_lock(&device->meta_state.mtx);581if (*pipeline) {582mtx_unlock(&device->meta_state.mtx);583return VK_SUCCESS;584}585586build_depthstencil_shader(&vs_nir, &fs_nir, unrestricted);587588const VkPipelineVertexInputStateCreateInfo vi_state = {589.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,590.vertexBindingDescriptionCount = 0,591.vertexAttributeDescriptionCount = 0,592};593594const VkPipelineDepthStencilStateCreateInfo ds_state = {595.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,596.depthTestEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),597.depthCompareOp = VK_COMPARE_OP_ALWAYS,598.depthWriteEnable = !!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT),599.depthBoundsTestEnable = false,600.stencilTestEnable = !!(aspects & VK_IMAGE_ASPECT_STENCIL_BIT),601.front =602{603.passOp = VK_STENCIL_OP_REPLACE,604.compareOp = VK_COMPARE_OP_ALWAYS,605.writeMask = UINT32_MAX,606.reference = 0, /* dynamic */607},608.back = {0 /* dont care */},609};610611const VkPipelineColorBlendStateCreateInfo cb_state = {612.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,613.logicOpEnable = false,614.attachmentCount = 0,615.pAttachments = NULL,616};617618struct radv_graphics_pipeline_create_info extra = {619.use_rectlist = true,620};621622if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT) {623extra.db_depth_clear = index == DEPTH_CLEAR_SLOW ? false : true;624extra.db_depth_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;625}626if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {627extra.db_stencil_clear = index == DEPTH_CLEAR_SLOW ? false : true;628extra.db_stencil_disable_expclear = index == DEPTH_CLEAR_FAST_NO_EXPCLEAR ? true : false;629}630result =631create_pipeline(device, radv_render_pass_from_handle(render_pass), samples, vs_nir, fs_nir,632&vi_state, &ds_state, &cb_state, device->meta_state.clear_depth_p_layout,633&extra, &device->meta_state.alloc, pipeline);634635mtx_unlock(&device->meta_state.mtx);636return result;637}638639static bool640depth_view_can_fast_clear(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,641VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,642const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)643{644if (!iview)645return false;646647uint32_t queue_mask = radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,648cmd_buffer->queue_family_index);649if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||650clear_rect->rect.extent.width != iview->extent.width ||651clear_rect->rect.extent.height != iview->extent.height)652return false;653if (radv_image_is_tc_compat_htile(iview->image) &&654(((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && clear_value.depth != 0.0 &&655clear_value.depth != 1.0) ||656((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) && clear_value.stencil != 0)))657return false;658if (radv_htile_enabled(iview->image, iview->base_mip) && iview->base_mip == 0 &&659iview->base_layer == 0 && iview->layer_count == iview->image->info.array_size &&660radv_layout_is_htile_compressed(cmd_buffer->device, iview->image, layout, in_render_loop,661queue_mask) &&662radv_image_extent_compare(iview->image, &iview->extent))663return true;664return false;665}666667static VkPipeline668pick_depthstencil_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_state *meta_state,669const struct radv_image_view *iview, int samples_log2,670VkImageAspectFlags aspects, VkImageLayout layout, bool in_render_loop,671const VkClearRect *clear_rect, VkClearDepthStencilValue clear_value)672{673bool fast = depth_view_can_fast_clear(cmd_buffer, iview, aspects, layout, in_render_loop,674clear_rect, clear_value);675bool unrestricted = cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted;676int index = DEPTH_CLEAR_SLOW;677VkPipeline *pipeline;678679if (fast) {680/* we don't know the previous clear values, so we always have681* the NO_EXPCLEAR path */682index = DEPTH_CLEAR_FAST_NO_EXPCLEAR;683}684685switch (aspects) {686case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:687pipeline = unrestricted688? &meta_state->clear[samples_log2].depthstencil_unrestricted_pipeline[index]689: &meta_state->clear[samples_log2].depthstencil_pipeline[index];690break;691case VK_IMAGE_ASPECT_DEPTH_BIT:692pipeline = unrestricted693? &meta_state->clear[samples_log2].depth_only_unrestricted_pipeline[index]694: &meta_state->clear[samples_log2].depth_only_pipeline[index];695break;696case VK_IMAGE_ASPECT_STENCIL_BIT:697pipeline = unrestricted698? &meta_state->clear[samples_log2].stencil_only_unrestricted_pipeline[index]699: &meta_state->clear[samples_log2].stencil_only_pipeline[index];700break;701default:702unreachable("expected depth or stencil aspect");703}704705if (cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp == VK_NULL_HANDLE) {706VkResult ret = create_depthstencil_renderpass(707cmd_buffer->device, 1u << samples_log2,708&cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);709if (ret != VK_SUCCESS) {710cmd_buffer->record_result = ret;711return VK_NULL_HANDLE;712}713}714715if (*pipeline == VK_NULL_HANDLE) {716VkResult ret = create_depthstencil_pipeline(717cmd_buffer->device, aspects, 1u << samples_log2, index, unrestricted, pipeline,718cmd_buffer->device->meta_state.clear[samples_log2].depthstencil_rp);719if (ret != VK_SUCCESS) {720cmd_buffer->record_result = ret;721return VK_NULL_HANDLE;722}723}724return *pipeline;725}726727static void728emit_depthstencil_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,729const VkClearRect *clear_rect, struct radv_subpass_attachment *ds_att,730uint32_t view_mask)731{732struct radv_device *device = cmd_buffer->device;733struct radv_meta_state *meta_state = &device->meta_state;734const struct radv_subpass *subpass = cmd_buffer->state.subpass;735const uint32_t pass_att = ds_att->attachment;736VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;737VkImageAspectFlags aspects = clear_att->aspectMask;738const struct radv_image_view *iview =739cmd_buffer->state.attachments ? cmd_buffer->state.attachments[pass_att].iview : NULL;740uint32_t samples, samples_log2;741VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);742743/* When a framebuffer is bound to the current command buffer, get the744* number of samples from it. Otherwise, get the number of samples from745* the render pass because it's likely a secondary command buffer.746*/747if (iview) {748samples = iview->image->info.samples;749} else {750samples = cmd_buffer->state.pass->attachments[pass_att].samples;751}752753samples_log2 = ffs(samples) - 1;754755assert(pass_att != VK_ATTACHMENT_UNUSED);756757if (!(aspects & VK_IMAGE_ASPECT_DEPTH_BIT))758clear_value.depth = 1.0f;759760if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted) {761radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),762device->meta_state.clear_depth_unrestricted_p_layout,763VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4, &clear_value.depth);764} else {765radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),766device->meta_state.clear_depth_p_layout, VK_SHADER_STAGE_VERTEX_BIT, 0,7674, &clear_value.depth);768}769770uint32_t prev_reference = cmd_buffer->state.dynamic.stencil_reference.front;771if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {772radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, clear_value.stencil);773}774775VkPipeline pipeline =776pick_depthstencil_pipeline(cmd_buffer, meta_state, iview, samples_log2, aspects,777ds_att->layout, ds_att->in_render_loop, clear_rect, clear_value);778if (!pipeline)779return;780781struct radv_subpass clear_subpass = {782.color_count = 0,783.color_attachments = NULL,784.depth_stencil_attachment = ds_att,785};786787radv_cmd_buffer_set_subpass(cmd_buffer, &clear_subpass);788789radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS, pipeline);790791if (depth_view_can_fast_clear(cmd_buffer, iview, aspects, ds_att->layout, ds_att->in_render_loop,792clear_rect, clear_value))793radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);794795radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,796&(VkViewport){.x = clear_rect->rect.offset.x,797.y = clear_rect->rect.offset.y,798.width = clear_rect->rect.extent.width,799.height = clear_rect->rect.extent.height,800.minDepth = 0.0f,801.maxDepth = 1.0f});802803radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1, &clear_rect->rect);804805if (view_mask) {806u_foreach_bit(i, view_mask) radv_CmdDraw(cmd_buffer_h, 3, 1, 0, i);807} else {808radv_CmdDraw(cmd_buffer_h, 3, clear_rect->layerCount, 0, clear_rect->baseArrayLayer);809}810811if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT) {812radv_CmdSetStencilReference(cmd_buffer_h, VK_STENCIL_FACE_FRONT_BIT, prev_reference);813}814815radv_cmd_buffer_set_subpass(cmd_buffer, subpass);816}817818static uint32_t819clear_htile_mask(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,820struct radeon_winsys_bo *bo, uint64_t offset, uint64_t size, uint32_t htile_value,821uint32_t htile_mask)822{823struct radv_device *device = cmd_buffer->device;824struct radv_meta_state *state = &device->meta_state;825uint64_t block_count = round_up_u64(size, 1024);826struct radv_meta_saved_state saved_state;827828radv_meta_save(829&saved_state, cmd_buffer,830RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);831832struct radv_buffer dst_buffer = {.bo = bo, .offset = offset, .size = size};833834radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,835state->clear_htile_mask_pipeline);836837radv_meta_push_descriptor_set(838cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, state->clear_htile_mask_p_layout, 0, /* set */8391, /* descriptorWriteCount */840(VkWriteDescriptorSet[]){841{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,842.dstBinding = 0,843.dstArrayElement = 0,844.descriptorCount = 1,845.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,846.pBufferInfo = &(VkDescriptorBufferInfo){.buffer = radv_buffer_to_handle(&dst_buffer),847.offset = 0,848.range = size}}});849850const unsigned constants[2] = {851htile_value & htile_mask,852~htile_mask,853};854855radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer), state->clear_htile_mask_p_layout,856VK_SHADER_STAGE_COMPUTE_BIT, 0, 8, constants);857858radv_CmdDispatch(radv_cmd_buffer_to_handle(cmd_buffer), block_count, 1, 1);859860radv_meta_restore(&saved_state, cmd_buffer);861862return RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |863radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);864}865866static uint32_t867radv_get_htile_fast_clear_value(const struct radv_device *device, const struct radv_image *image,868VkClearDepthStencilValue value)869{870uint32_t max_zval = 0x3fff; /* maximum 14-bit value. */871uint32_t zmask = 0, smem = 0;872uint32_t htile_value;873uint32_t zmin, zmax;874875/* Convert the depth value to 14-bit zmin/zmax values. */876zmin = ((value.depth * max_zval) + 0.5f);877zmax = zmin;878879if (radv_image_tile_stencil_disabled(device, image)) {880/* Z only (no stencil):881*882* |31 18|17 4|3 0|883* +---------+---------+-------+884* | Max Z | Min Z | ZMask |885*/886htile_value = (((zmax & 0x3fff) << 18) |887((zmin & 0x3fff) << 4) |888((zmask & 0xf) << 0));889} else {890891/* Z and stencil:892*893* |31 12|11 10|9 8|7 6|5 4|3 0|894* +-----------+-----+------+-----+-----+-------+895* | Z Range | | SMem | SR1 | SR0 | ZMask |896*897* Z, stencil, 4 bit VRS encoding:898* |31 12| 11 10 |9 8|7 6 |5 4|3 0|899* +-----------+------------+------+------------+-----+-------+900* | Z Range | VRS Y-rate | SMem | VRS X-rate | SR0 | ZMask |901*/902uint32_t delta = 0;903uint32_t zrange = ((zmax << 6) | delta);904uint32_t sresults = 0xf; /* SR0/SR1 both as 0x3. */905906if (radv_image_has_vrs_htile(device, image))907sresults = 0x3;908909htile_value = (((zrange & 0xfffff) << 12) |910((smem & 0x3) << 8) |911((sresults & 0xf) << 4) |912((zmask & 0xf) << 0));913}914915return htile_value;916}917918static uint32_t919radv_get_htile_mask(const struct radv_device *device, const struct radv_image *image,920VkImageAspectFlags aspects)921{922uint32_t mask = 0;923924if (radv_image_tile_stencil_disabled(device, image)) {925/* All the HTILE buffer is used when there is no stencil. */926mask = UINT32_MAX;927} else {928if (aspects & VK_IMAGE_ASPECT_DEPTH_BIT)929mask |= 0xfffffc0f;930if (aspects & VK_IMAGE_ASPECT_STENCIL_BIT)931mask |= 0x000003f0;932}933934return mask;935}936937static bool938radv_is_fast_clear_depth_allowed(VkClearDepthStencilValue value)939{940return value.depth == 1.0f || value.depth == 0.0f;941}942943static bool944radv_is_fast_clear_stencil_allowed(VkClearDepthStencilValue value)945{946return value.stencil == 0;947}948949static bool950radv_can_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,951VkImageLayout image_layout, bool in_render_loop,952VkImageAspectFlags aspects, const VkClearRect *clear_rect,953const VkClearDepthStencilValue clear_value, uint32_t view_mask)954{955if (!iview || !iview->support_fast_clear)956return false;957958if (!radv_layout_is_htile_compressed(959cmd_buffer->device, iview->image, image_layout, in_render_loop,960radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,961cmd_buffer->queue_family_index)))962return false;963964if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||965clear_rect->rect.extent.width != iview->image->info.width ||966clear_rect->rect.extent.height != iview->image->info.height)967return false;968969if (view_mask && (iview->image->info.array_size >= 32 ||970(1u << iview->image->info.array_size) - 1u != view_mask))971return false;972if (!view_mask && clear_rect->baseArrayLayer != 0)973return false;974if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)975return false;976977if (cmd_buffer->device->vk.enabled_extensions.EXT_depth_range_unrestricted &&978(aspects & VK_IMAGE_ASPECT_DEPTH_BIT) &&979(clear_value.depth < 0.0 || clear_value.depth > 1.0))980return false;981982if (radv_image_is_tc_compat_htile(iview->image) &&983(((aspects & VK_IMAGE_ASPECT_DEPTH_BIT) && !radv_is_fast_clear_depth_allowed(clear_value)) ||984((aspects & VK_IMAGE_ASPECT_STENCIL_BIT) &&985!radv_is_fast_clear_stencil_allowed(clear_value))))986return false;987988return true;989}990991static void992radv_fast_clear_depth(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,993const VkClearAttachment *clear_att, enum radv_cmd_flush_bits *pre_flush,994enum radv_cmd_flush_bits *post_flush)995{996VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;997VkImageAspectFlags aspects = clear_att->aspectMask;998uint32_t clear_word, flush_bits;9991000clear_word = radv_get_htile_fast_clear_value(cmd_buffer->device, iview->image, clear_value);10011002if (pre_flush) {1003enum radv_cmd_flush_bits bits =1004radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,1005iview->image) |1006radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT |1007VK_ACCESS_SHADER_READ_BIT, iview->image);1008cmd_buffer->state.flush_bits |= bits & ~*pre_flush;1009*pre_flush |= cmd_buffer->state.flush_bits;1010}10111012VkImageSubresourceRange range = {1013.aspectMask = aspects,1014.baseMipLevel = iview->base_mip,1015.levelCount = iview->level_count,1016.baseArrayLayer = iview->base_layer,1017.layerCount = iview->layer_count,1018};10191020flush_bits = radv_clear_htile(cmd_buffer, iview->image, &range, clear_word);10211022if (iview->image->planes[0].surface.has_stencil &&1023!(aspects == (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT))) {1024/* Synchronize after performing a depth-only or a stencil-only1025* fast clear because the driver uses an optimized path which1026* performs a read-modify-write operation, and the two separate1027* aspects might use the same HTILE memory.1028*/1029cmd_buffer->state.flush_bits |= flush_bits;1030}10311032radv_update_ds_clear_metadata(cmd_buffer, iview, clear_value, aspects);1033if (post_flush) {1034*post_flush |= flush_bits;1035}1036}10371038static nir_shader *1039build_clear_htile_mask_shader()1040{1041nir_builder b =1042nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_clear_htile_mask");1043b.shader->info.workgroup_size[0] = 64;1044b.shader->info.workgroup_size[1] = 1;1045b.shader->info.workgroup_size[2] = 1;10461047nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);1048nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);1049nir_ssa_def *block_size =1050nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],1051b.shader->info.workgroup_size[2], 0);10521053nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);10541055nir_ssa_def *offset = nir_imul(&b, global_id, nir_imm_int(&b, 16));1056offset = nir_channel(&b, offset, 0);10571058nir_ssa_def *buf = radv_meta_load_descriptor(&b, 0, 0);10591060nir_ssa_def *constants = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);10611062nir_ssa_def *load = nir_load_ssbo(&b, 4, 32, buf, offset, .align_mul = 16);10631064/* data = (data & ~htile_mask) | (htile_value & htile_mask) */1065nir_ssa_def *data = nir_iand(&b, load, nir_channel(&b, constants, 1));1066data = nir_ior(&b, data, nir_channel(&b, constants, 0));10671068nir_store_ssbo(&b, data, buf, offset, .write_mask = 0xf, .access = ACCESS_NON_READABLE,1069.align_mul = 16);10701071return b.shader;1072}10731074static VkResult1075init_meta_clear_htile_mask_state(struct radv_device *device)1076{1077struct radv_meta_state *state = &device->meta_state;1078VkResult result;1079nir_shader *cs = build_clear_htile_mask_shader();10801081VkDescriptorSetLayoutCreateInfo ds_layout_info = {1082.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,1083.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,1084.bindingCount = 1,1085.pBindings = (VkDescriptorSetLayoutBinding[]){1086{.binding = 0,1087.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_BUFFER,1088.descriptorCount = 1,1089.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,1090.pImmutableSamplers = NULL},1091}};10921093result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_layout_info,1094&state->alloc, &state->clear_htile_mask_ds_layout);1095if (result != VK_SUCCESS)1096goto fail;10971098VkPipelineLayoutCreateInfo p_layout_info = {1099.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,1100.setLayoutCount = 1,1101.pSetLayouts = &state->clear_htile_mask_ds_layout,1102.pushConstantRangeCount = 1,1103.pPushConstantRanges =1104&(VkPushConstantRange){1105VK_SHADER_STAGE_COMPUTE_BIT,11060,11078,1108},1109};11101111result = radv_CreatePipelineLayout(radv_device_to_handle(device), &p_layout_info, &state->alloc,1112&state->clear_htile_mask_p_layout);1113if (result != VK_SUCCESS)1114goto fail;11151116VkPipelineShaderStageCreateInfo shader_stage = {1117.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,1118.stage = VK_SHADER_STAGE_COMPUTE_BIT,1119.module = vk_shader_module_handle_from_nir(cs),1120.pName = "main",1121.pSpecializationInfo = NULL,1122};11231124VkComputePipelineCreateInfo pipeline_info = {1125.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,1126.stage = shader_stage,1127.flags = 0,1128.layout = state->clear_htile_mask_p_layout,1129};11301131result = radv_CreateComputePipelines(radv_device_to_handle(device),1132radv_pipeline_cache_to_handle(&state->cache), 1,1133&pipeline_info, NULL, &state->clear_htile_mask_pipeline);11341135ralloc_free(cs);1136return result;1137fail:1138ralloc_free(cs);1139return result;1140}11411142VkResult1143radv_device_init_meta_clear_state(struct radv_device *device, bool on_demand)1144{1145VkResult res;1146struct radv_meta_state *state = &device->meta_state;11471148VkPipelineLayoutCreateInfo pl_color_create_info = {1149.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,1150.setLayoutCount = 0,1151.pushConstantRangeCount = 1,1152.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 16},1153};11541155res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_color_create_info,1156&device->meta_state.alloc,1157&device->meta_state.clear_color_p_layout);1158if (res != VK_SUCCESS)1159goto fail;11601161VkPipelineLayoutCreateInfo pl_depth_create_info = {1162.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,1163.setLayoutCount = 0,1164.pushConstantRangeCount = 1,1165.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_VERTEX_BIT, 0, 4},1166};11671168res = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_depth_create_info,1169&device->meta_state.alloc,1170&device->meta_state.clear_depth_p_layout);1171if (res != VK_SUCCESS)1172goto fail;11731174VkPipelineLayoutCreateInfo pl_depth_unrestricted_create_info = {1175.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,1176.setLayoutCount = 0,1177.pushConstantRangeCount = 1,1178.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_FRAGMENT_BIT, 0, 4},1179};11801181res = radv_CreatePipelineLayout(radv_device_to_handle(device),1182&pl_depth_unrestricted_create_info, &device->meta_state.alloc,1183&device->meta_state.clear_depth_unrestricted_p_layout);1184if (res != VK_SUCCESS)1185goto fail;11861187res = init_meta_clear_htile_mask_state(device);1188if (res != VK_SUCCESS)1189goto fail;11901191if (on_demand)1192return VK_SUCCESS;11931194for (uint32_t i = 0; i < ARRAY_SIZE(state->clear); ++i) {1195uint32_t samples = 1 << i;1196for (uint32_t j = 0; j < NUM_META_FS_KEYS; ++j) {1197VkFormat format = radv_fs_key_format_exemplars[j];1198unsigned fs_key = radv_format_meta_fs_key(device, format);1199assert(!state->clear[i].color_pipelines[fs_key]);12001201res =1202create_color_renderpass(device, format, samples, &state->clear[i].render_pass[fs_key]);1203if (res != VK_SUCCESS)1204goto fail;12051206res = create_color_pipeline(device, samples, 0, &state->clear[i].color_pipelines[fs_key],1207state->clear[i].render_pass[fs_key]);1208if (res != VK_SUCCESS)1209goto fail;1210}12111212res = create_depthstencil_renderpass(device, samples, &state->clear[i].depthstencil_rp);1213if (res != VK_SUCCESS)1214goto fail;12151216for (uint32_t j = 0; j < NUM_DEPTH_CLEAR_PIPELINES; j++) {1217res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, false,1218&state->clear[i].depth_only_pipeline[j],1219state->clear[i].depthstencil_rp);1220if (res != VK_SUCCESS)1221goto fail;12221223res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,1224&state->clear[i].stencil_only_pipeline[j],1225state->clear[i].depthstencil_rp);1226if (res != VK_SUCCESS)1227goto fail;12281229res = create_depthstencil_pipeline(1230device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, false,1231&state->clear[i].depthstencil_pipeline[j], state->clear[i].depthstencil_rp);1232if (res != VK_SUCCESS)1233goto fail;12341235res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_DEPTH_BIT, samples, j, true,1236&state->clear[i].depth_only_unrestricted_pipeline[j],1237state->clear[i].depthstencil_rp);1238if (res != VK_SUCCESS)1239goto fail;12401241res = create_depthstencil_pipeline(device, VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,1242&state->clear[i].stencil_only_unrestricted_pipeline[j],1243state->clear[i].depthstencil_rp);1244if (res != VK_SUCCESS)1245goto fail;12461247res = create_depthstencil_pipeline(1248device, VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT, samples, j, true,1249&state->clear[i].depthstencil_unrestricted_pipeline[j],1250state->clear[i].depthstencil_rp);1251if (res != VK_SUCCESS)1252goto fail;1253}1254}1255return VK_SUCCESS;12561257fail:1258radv_device_finish_meta_clear_state(device);1259return res;1260}12611262static uint32_t1263radv_get_cmask_fast_clear_value(const struct radv_image *image)1264{1265uint32_t value = 0; /* Default value when no DCC. */12661267/* The fast-clear value is different for images that have both DCC and1268* CMASK metadata.1269*/1270if (radv_image_has_dcc(image)) {1271/* DCC fast clear with MSAA should clear CMASK to 0xC. */1272return image->info.samples > 1 ? 0xcccccccc : 0xffffffff;1273}12741275return value;1276}12771278uint32_t1279radv_clear_cmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,1280const VkImageSubresourceRange *range, uint32_t value)1281{1282uint64_t offset = image->offset + image->planes[0].surface.cmask_offset;1283uint64_t size;12841285if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {1286/* TODO: clear layers. */1287size = image->planes[0].surface.cmask_size;1288} else {1289unsigned slice_size = image->planes[0].surface.cmask_slice_size;12901291offset += slice_size * range->baseArrayLayer;1292size = slice_size * radv_get_layerCount(image, range);1293}12941295return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);1296}12971298uint32_t1299radv_clear_fmask(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,1300const VkImageSubresourceRange *range, uint32_t value)1301{1302uint64_t offset = image->offset + image->planes[0].surface.fmask_offset;1303unsigned slice_size = image->planes[0].surface.fmask_slice_size;1304uint64_t size;13051306/* MSAA images do not support mipmap levels. */1307assert(range->baseMipLevel == 0 && radv_get_levelCount(image, range) == 1);13081309offset += slice_size * range->baseArrayLayer;1310size = slice_size * radv_get_layerCount(image, range);13111312return radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);1313}13141315uint32_t1316radv_clear_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,1317const VkImageSubresourceRange *range, uint32_t value)1318{1319uint32_t level_count = radv_get_levelCount(image, range);1320uint32_t layer_count = radv_get_layerCount(image, range);1321uint32_t flush_bits = 0;13221323/* Mark the image as being compressed. */1324radv_update_dcc_metadata(cmd_buffer, image, range, true);13251326for (uint32_t l = 0; l < level_count; l++) {1327uint64_t offset = image->offset + image->planes[0].surface.meta_offset;1328uint32_t level = range->baseMipLevel + l;1329uint64_t size;13301331if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10) {1332/* DCC for mipmaps+layers is currently disabled. */1333offset += image->planes[0].surface.meta_slice_size * range->baseArrayLayer +1334image->planes[0].surface.u.gfx9.meta_levels[level].offset;1335size = image->planes[0].surface.u.gfx9.meta_levels[level].size * layer_count;1336} else if (cmd_buffer->device->physical_device->rad_info.chip_class == GFX9) {1337/* Mipmap levels and layers aren't implemented. */1338assert(level == 0);1339size = image->planes[0].surface.meta_size;1340} else {1341const struct legacy_surf_dcc_level *dcc_level =1342&image->planes[0].surface.u.legacy.color.dcc_level[level];13431344/* If dcc_fast_clear_size is 0 (which might happens for1345* mipmaps) the fill buffer operation below is a no-op.1346* This can only happen during initialization as the1347* fast clear path fallbacks to slow clears if one1348* level can't be fast cleared.1349*/1350offset +=1351dcc_level->dcc_offset + dcc_level->dcc_slice_fast_clear_size * range->baseArrayLayer;1352size = dcc_level->dcc_slice_fast_clear_size * radv_get_layerCount(image, range);1353}13541355/* Do not clear this level if it can't be compressed. */1356if (!size)1357continue;13581359flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);1360}13611362return flush_bits;1363}13641365uint32_t1366radv_clear_htile(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *image,1367const VkImageSubresourceRange *range, uint32_t value)1368{1369uint32_t level_count = radv_get_levelCount(image, range);1370uint32_t flush_bits = 0;1371uint32_t htile_mask;13721373htile_mask = radv_get_htile_mask(cmd_buffer->device, image, range->aspectMask);13741375if (level_count != image->info.levels) {1376assert(cmd_buffer->device->physical_device->rad_info.chip_class >= GFX10);13771378/* Clear individuals levels separately. */1379for (uint32_t l = 0; l < level_count; l++) {1380uint32_t level = range->baseMipLevel + l;1381uint64_t offset = image->offset + image->planes[0].surface.meta_offset +1382image->planes[0].surface.u.gfx9.meta_levels[level].offset;1383uint32_t size = image->planes[0].surface.u.gfx9.meta_levels[level].size;13841385/* Do not clear this level if it can be compressed. */1386if (!size)1387continue;13881389if (htile_mask == UINT_MAX) {1390/* Clear the whole HTILE buffer. */1391flush_bits |= radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);1392} else {1393/* Only clear depth or stencil bytes in the HTILE buffer. */1394flush_bits |=1395clear_htile_mask(cmd_buffer, image, image->bo, offset, size, value, htile_mask);1396}1397}1398} else {1399unsigned layer_count = radv_get_layerCount(image, range);1400uint64_t size = image->planes[0].surface.meta_slice_size * layer_count;1401uint64_t offset = image->offset + image->planes[0].surface.meta_offset +1402image->planes[0].surface.meta_slice_size * range->baseArrayLayer;14031404if (htile_mask == UINT_MAX) {1405/* Clear the whole HTILE buffer. */1406flush_bits = radv_fill_buffer(cmd_buffer, image, image->bo, offset, size, value);1407} else {1408/* Only clear depth or stencil bytes in the HTILE buffer. */1409flush_bits =1410clear_htile_mask(cmd_buffer, image, image->bo, offset, size, value, htile_mask);1411}1412}14131414return flush_bits;1415}14161417enum {1418RADV_DCC_CLEAR_REG = 0x20202020U,1419RADV_DCC_CLEAR_MAIN_1 = 0x80808080U,1420RADV_DCC_CLEAR_SECONDARY_1 = 0x40404040U1421};14221423static void1424vi_get_fast_clear_parameters(struct radv_device *device, VkFormat image_format,1425VkFormat view_format, const VkClearColorValue *clear_value,1426uint32_t *reset_value, bool *can_avoid_fast_clear_elim)1427{1428bool values[4] = {0};1429int extra_channel;1430bool main_value = false;1431bool extra_value = false;1432bool has_color = false;1433bool has_alpha = false;1434*can_avoid_fast_clear_elim = false;14351436*reset_value = RADV_DCC_CLEAR_REG;14371438const struct util_format_description *desc = vk_format_description(view_format);1439if (view_format == VK_FORMAT_B10G11R11_UFLOAT_PACK32 ||1440view_format == VK_FORMAT_R5G6B5_UNORM_PACK16 || view_format == VK_FORMAT_B5G6R5_UNORM_PACK16)1441extra_channel = -1;1442else if (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN) {1443if (vi_alpha_is_on_msb(device, view_format))1444extra_channel = desc->nr_channels - 1;1445else1446extra_channel = 0;1447} else1448return;14491450for (int i = 0; i < 4; i++) {1451int index = desc->swizzle[i] - PIPE_SWIZZLE_X;1452if (desc->swizzle[i] < PIPE_SWIZZLE_X || desc->swizzle[i] > PIPE_SWIZZLE_W)1453continue;14541455if (desc->channel[i].pure_integer && desc->channel[i].type == UTIL_FORMAT_TYPE_SIGNED) {1456/* Use the maximum value for clamping the clear color. */1457int max = u_bit_consecutive(0, desc->channel[i].size - 1);14581459values[i] = clear_value->int32[i] != 0;1460if (clear_value->int32[i] != 0 && MIN2(clear_value->int32[i], max) != max)1461return;1462} else if (desc->channel[i].pure_integer &&1463desc->channel[i].type == UTIL_FORMAT_TYPE_UNSIGNED) {1464/* Use the maximum value for clamping the clear color. */1465unsigned max = u_bit_consecutive(0, desc->channel[i].size);14661467values[i] = clear_value->uint32[i] != 0U;1468if (clear_value->uint32[i] != 0U && MIN2(clear_value->uint32[i], max) != max)1469return;1470} else {1471values[i] = clear_value->float32[i] != 0.0F;1472if (clear_value->float32[i] != 0.0F && clear_value->float32[i] != 1.0F)1473return;1474}14751476if (index == extra_channel) {1477extra_value = values[i];1478has_alpha = true;1479} else {1480main_value = values[i];1481has_color = true;1482}1483}14841485/* If alpha isn't present, make it the same as color, and vice versa. */1486if (!has_alpha)1487extra_value = main_value;1488else if (!has_color)1489main_value = extra_value;14901491for (int i = 0; i < 4; ++i)1492if (values[i] != main_value && desc->swizzle[i] - PIPE_SWIZZLE_X != extra_channel &&1493desc->swizzle[i] >= PIPE_SWIZZLE_X && desc->swizzle[i] <= PIPE_SWIZZLE_W)1494return;14951496*can_avoid_fast_clear_elim = true;1497*reset_value = 0;1498if (main_value)1499*reset_value |= RADV_DCC_CLEAR_MAIN_1;15001501if (extra_value)1502*reset_value |= RADV_DCC_CLEAR_SECONDARY_1;1503return;1504}15051506static bool1507radv_can_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,1508VkImageLayout image_layout, bool in_render_loop,1509const VkClearRect *clear_rect, VkClearColorValue clear_value,1510uint32_t view_mask)1511{1512uint32_t clear_color[2];15131514if (!iview || !iview->support_fast_clear)1515return false;15161517if (!radv_layout_can_fast_clear(1518cmd_buffer->device, iview->image, iview->base_mip, image_layout, in_render_loop,1519radv_image_queue_family_mask(iview->image, cmd_buffer->queue_family_index,1520cmd_buffer->queue_family_index)))1521return false;15221523if (clear_rect->rect.offset.x || clear_rect->rect.offset.y ||1524clear_rect->rect.extent.width != iview->image->info.width ||1525clear_rect->rect.extent.height != iview->image->info.height)1526return false;15271528if (view_mask && (iview->image->info.array_size >= 32 ||1529(1u << iview->image->info.array_size) - 1u != view_mask))1530return false;1531if (!view_mask && clear_rect->baseArrayLayer != 0)1532return false;1533if (!view_mask && clear_rect->layerCount != iview->image->info.array_size)1534return false;15351536/* DCC */1537if (!radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value))1538return false;15391540if (!radv_image_has_clear_value(iview->image) && (clear_color[0] != 0 || clear_color[1] != 0))1541return false;15421543if (radv_dcc_enabled(iview->image, iview->base_mip)) {1544bool can_avoid_fast_clear_elim;1545uint32_t reset_value;15461547vi_get_fast_clear_parameters(cmd_buffer->device, iview->image->vk_format, iview->vk_format,1548&clear_value, &reset_value, &can_avoid_fast_clear_elim);15491550if (iview->image->info.samples > 1) {1551/* DCC fast clear with MSAA should clear CMASK. */1552/* FIXME: This doesn't work for now. There is a1553* hardware bug with fast clears and DCC for MSAA1554* textures. AMDVLK has a workaround but it doesn't1555* seem to work here. Note that we might emit useless1556* CB flushes but that shouldn't matter.1557*/1558if (!can_avoid_fast_clear_elim)1559return false;1560}15611562if (iview->image->info.levels > 1) {1563if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {1564uint32_t last_level = iview->base_mip + iview->level_count - 1;1565if (last_level >= iview->image->planes[0].surface.num_meta_levels) {1566/* Do not fast clears if one level can't be fast cleard. */1567return false;1568}1569} else {1570for (uint32_t l = 0; l < iview->level_count; l++) {1571uint32_t level = iview->base_mip + l;1572struct legacy_surf_dcc_level *dcc_level =1573&iview->image->planes[0].surface.u.legacy.color.dcc_level[level];15741575/* Do not fast clears if one level can't be1576* fast cleared.1577*/1578if (!dcc_level->dcc_fast_clear_size)1579return false;1580}1581}1582}1583}15841585return true;1586}15871588static void1589radv_fast_clear_color(struct radv_cmd_buffer *cmd_buffer, const struct radv_image_view *iview,1590const VkClearAttachment *clear_att, uint32_t subpass_att,1591enum radv_cmd_flush_bits *pre_flush, enum radv_cmd_flush_bits *post_flush)1592{1593VkClearColorValue clear_value = clear_att->clearValue.color;1594uint32_t clear_color[2], flush_bits = 0;1595uint32_t cmask_clear_value;1596VkImageSubresourceRange range = {1597.aspectMask = iview->aspect_mask,1598.baseMipLevel = iview->base_mip,1599.levelCount = iview->level_count,1600.baseArrayLayer = iview->base_layer,1601.layerCount = iview->layer_count,1602};16031604if (pre_flush) {1605enum radv_cmd_flush_bits bits =1606radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, iview->image) |1607radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, iview->image);1608cmd_buffer->state.flush_bits |= bits & ~*pre_flush;1609*pre_flush |= cmd_buffer->state.flush_bits;1610}16111612/* DCC */1613radv_format_pack_clear_color(iview->vk_format, clear_color, &clear_value);16141615cmask_clear_value = radv_get_cmask_fast_clear_value(iview->image);16161617/* clear cmask buffer */1618bool need_decompress_pass = false;1619if (radv_dcc_enabled(iview->image, iview->base_mip)) {1620uint32_t reset_value;1621bool can_avoid_fast_clear_elim;16221623vi_get_fast_clear_parameters(cmd_buffer->device, iview->image->vk_format, iview->vk_format,1624&clear_value, &reset_value, &can_avoid_fast_clear_elim);16251626if (radv_image_has_cmask(iview->image)) {1627flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);1628}16291630if (!can_avoid_fast_clear_elim)1631need_decompress_pass = true;16321633flush_bits |= radv_clear_dcc(cmd_buffer, iview->image, &range, reset_value);1634} else {1635flush_bits = radv_clear_cmask(cmd_buffer, iview->image, &range, cmask_clear_value);16361637/* Fast clearing with CMASK should always be eliminated. */1638need_decompress_pass = true;1639}16401641if (post_flush) {1642*post_flush |= flush_bits;1643}16441645/* Update the FCE predicate to perform a fast-clear eliminate. */1646radv_update_fce_metadata(cmd_buffer, iview->image, &range, need_decompress_pass);16471648radv_update_color_clear_metadata(cmd_buffer, iview, subpass_att, clear_color);1649}16501651/**1652* The parameters mean that same as those in vkCmdClearAttachments.1653*/1654static void1655emit_clear(struct radv_cmd_buffer *cmd_buffer, const VkClearAttachment *clear_att,1656const VkClearRect *clear_rect, enum radv_cmd_flush_bits *pre_flush,1657enum radv_cmd_flush_bits *post_flush, uint32_t view_mask, bool ds_resolve_clear)1658{1659const struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;1660const struct radv_subpass *subpass = cmd_buffer->state.subpass;1661VkImageAspectFlags aspects = clear_att->aspectMask;16621663if (aspects & VK_IMAGE_ASPECT_COLOR_BIT) {1664const uint32_t subpass_att = clear_att->colorAttachment;1665assert(subpass_att < subpass->color_count);1666const uint32_t pass_att = subpass->color_attachments[subpass_att].attachment;1667if (pass_att == VK_ATTACHMENT_UNUSED)1668return;16691670VkImageLayout image_layout = subpass->color_attachments[subpass_att].layout;1671bool in_render_loop = subpass->color_attachments[subpass_att].in_render_loop;1672const struct radv_image_view *iview =1673fb ? cmd_buffer->state.attachments[pass_att].iview : NULL;1674VkClearColorValue clear_value = clear_att->clearValue.color;16751676if (radv_can_fast_clear_color(cmd_buffer, iview, image_layout, in_render_loop, clear_rect,1677clear_value, view_mask)) {1678radv_fast_clear_color(cmd_buffer, iview, clear_att, subpass_att, pre_flush, post_flush);1679} else {1680emit_color_clear(cmd_buffer, clear_att, clear_rect, view_mask);1681}1682} else {1683struct radv_subpass_attachment *ds_att = subpass->depth_stencil_attachment;16841685if (ds_resolve_clear)1686ds_att = subpass->ds_resolve_attachment;16871688if (!ds_att || ds_att->attachment == VK_ATTACHMENT_UNUSED)1689return;16901691VkImageLayout image_layout = ds_att->layout;1692bool in_render_loop = ds_att->in_render_loop;1693const struct radv_image_view *iview =1694fb ? cmd_buffer->state.attachments[ds_att->attachment].iview : NULL;1695VkClearDepthStencilValue clear_value = clear_att->clearValue.depthStencil;16961697assert(aspects & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT));16981699if (radv_can_fast_clear_depth(cmd_buffer, iview, image_layout, in_render_loop, aspects,1700clear_rect, clear_value, view_mask)) {1701radv_fast_clear_depth(cmd_buffer, iview, clear_att, pre_flush, post_flush);1702} else {1703emit_depthstencil_clear(cmd_buffer, clear_att, clear_rect, ds_att, view_mask);1704}1705}1706}17071708static inline bool1709radv_attachment_needs_clear(struct radv_cmd_state *cmd_state, uint32_t a)1710{1711uint32_t view_mask = cmd_state->subpass->view_mask;1712return (a != VK_ATTACHMENT_UNUSED && cmd_state->attachments[a].pending_clear_aspects &&1713(!view_mask || (view_mask & ~cmd_state->attachments[a].cleared_views)));1714}17151716static bool1717radv_subpass_needs_clear(struct radv_cmd_buffer *cmd_buffer)1718{1719struct radv_cmd_state *cmd_state = &cmd_buffer->state;1720uint32_t a;17211722if (!cmd_state->subpass)1723return false;17241725for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {1726a = cmd_state->subpass->color_attachments[i].attachment;1727if (radv_attachment_needs_clear(cmd_state, a))1728return true;1729}17301731if (cmd_state->subpass->depth_stencil_attachment) {1732a = cmd_state->subpass->depth_stencil_attachment->attachment;1733if (radv_attachment_needs_clear(cmd_state, a))1734return true;1735}17361737if (!cmd_state->subpass->ds_resolve_attachment)1738return false;17391740a = cmd_state->subpass->ds_resolve_attachment->attachment;1741return radv_attachment_needs_clear(cmd_state, a);1742}17431744static void1745radv_subpass_clear_attachment(struct radv_cmd_buffer *cmd_buffer,1746struct radv_attachment_state *attachment,1747const VkClearAttachment *clear_att,1748enum radv_cmd_flush_bits *pre_flush,1749enum radv_cmd_flush_bits *post_flush, bool ds_resolve_clear)1750{1751struct radv_cmd_state *cmd_state = &cmd_buffer->state;1752uint32_t view_mask = cmd_state->subpass->view_mask;17531754VkClearRect clear_rect = {1755.rect = cmd_state->render_area,1756.baseArrayLayer = 0,1757.layerCount = cmd_state->framebuffer->layers,1758};17591760radv_describe_begin_render_pass_clear(cmd_buffer, clear_att->aspectMask);17611762emit_clear(cmd_buffer, clear_att, &clear_rect, pre_flush, post_flush,1763view_mask & ~attachment->cleared_views, ds_resolve_clear);1764if (view_mask)1765attachment->cleared_views |= view_mask;1766else1767attachment->pending_clear_aspects = 0;17681769radv_describe_end_render_pass_clear(cmd_buffer);1770}17711772/**1773* Emit any pending attachment clears for the current subpass.1774*1775* @see radv_attachment_state::pending_clear_aspects1776*/1777void1778radv_cmd_buffer_clear_subpass(struct radv_cmd_buffer *cmd_buffer)1779{1780struct radv_cmd_state *cmd_state = &cmd_buffer->state;1781struct radv_meta_saved_state saved_state;1782enum radv_cmd_flush_bits pre_flush = 0;1783enum radv_cmd_flush_bits post_flush = 0;17841785if (!radv_subpass_needs_clear(cmd_buffer))1786return;17871788radv_meta_save(&saved_state, cmd_buffer,1789RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);17901791for (uint32_t i = 0; i < cmd_state->subpass->color_count; ++i) {1792uint32_t a = cmd_state->subpass->color_attachments[i].attachment;17931794if (!radv_attachment_needs_clear(cmd_state, a))1795continue;17961797assert(cmd_state->attachments[a].pending_clear_aspects == VK_IMAGE_ASPECT_COLOR_BIT);17981799VkClearAttachment clear_att = {1800.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,1801.colorAttachment = i, /* Use attachment index relative to subpass */1802.clearValue = cmd_state->attachments[a].clear_value,1803};18041805radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[a], &clear_att, &pre_flush,1806&post_flush, false);1807}18081809if (cmd_state->subpass->depth_stencil_attachment) {1810uint32_t ds = cmd_state->subpass->depth_stencil_attachment->attachment;1811if (radv_attachment_needs_clear(cmd_state, ds)) {1812VkClearAttachment clear_att = {1813.aspectMask = cmd_state->attachments[ds].pending_clear_aspects,1814.clearValue = cmd_state->attachments[ds].clear_value,1815};18161817radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds], &clear_att,1818&pre_flush, &post_flush, false);1819}1820}18211822if (cmd_state->subpass->ds_resolve_attachment) {1823uint32_t ds_resolve = cmd_state->subpass->ds_resolve_attachment->attachment;1824if (radv_attachment_needs_clear(cmd_state, ds_resolve)) {1825VkClearAttachment clear_att = {1826.aspectMask = cmd_state->attachments[ds_resolve].pending_clear_aspects,1827.clearValue = cmd_state->attachments[ds_resolve].clear_value,1828};18291830radv_subpass_clear_attachment(cmd_buffer, &cmd_state->attachments[ds_resolve], &clear_att,1831&pre_flush, &post_flush, true);1832}1833}18341835radv_meta_restore(&saved_state, cmd_buffer);1836cmd_buffer->state.flush_bits |= post_flush;1837}18381839static void1840radv_clear_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,1841VkImageLayout image_layout, const VkImageSubresourceRange *range,1842VkFormat format, int level, int layer, const VkClearValue *clear_val)1843{1844VkDevice device_h = radv_device_to_handle(cmd_buffer->device);1845struct radv_image_view iview;1846uint32_t width = radv_minify(image->info.width, range->baseMipLevel + level);1847uint32_t height = radv_minify(image->info.height, range->baseMipLevel + level);18481849radv_image_view_init(&iview, cmd_buffer->device,1850&(VkImageViewCreateInfo){1851.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,1852.image = radv_image_to_handle(image),1853.viewType = radv_meta_get_view_type(image),1854.format = format,1855.subresourceRange = {.aspectMask = range->aspectMask,1856.baseMipLevel = range->baseMipLevel + level,1857.levelCount = 1,1858.baseArrayLayer = range->baseArrayLayer + layer,1859.layerCount = 1},1860},1861NULL);18621863VkFramebuffer fb;1864radv_CreateFramebuffer(1865device_h,1866&(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,1867.attachmentCount = 1,1868.pAttachments =1869(VkImageView[]){1870radv_image_view_to_handle(&iview),1871},1872.width = width,1873.height = height,1874.layers = 1},1875&cmd_buffer->pool->alloc, &fb);18761877VkAttachmentDescription2 att_desc = {1878.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2,1879.format = iview.vk_format,1880.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD,1881.storeOp = VK_ATTACHMENT_STORE_OP_STORE,1882.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD,1883.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE,1884.initialLayout = image_layout,1885.finalLayout = image_layout,1886};18871888VkSubpassDescription2 subpass_desc = {1889.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,1890.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,1891.inputAttachmentCount = 0,1892.colorAttachmentCount = 0,1893.pColorAttachments = NULL,1894.pResolveAttachments = NULL,1895.pDepthStencilAttachment = NULL,1896.preserveAttachmentCount = 0,1897.pPreserveAttachments = NULL,1898};18991900const VkAttachmentReference2 att_ref = {1901.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,1902.attachment = 0,1903.layout = image_layout,1904};19051906if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {1907subpass_desc.colorAttachmentCount = 1;1908subpass_desc.pColorAttachments = &att_ref;1909} else {1910subpass_desc.pDepthStencilAttachment = &att_ref;1911}19121913VkRenderPass pass;1914radv_CreateRenderPass2(1915device_h,1916&(VkRenderPassCreateInfo2){1917.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,1918.attachmentCount = 1,1919.pAttachments = &att_desc,1920.subpassCount = 1,1921.pSubpasses = &subpass_desc,1922.dependencyCount = 2,1923.pDependencies =1924(VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,1925.srcSubpass = VK_SUBPASS_EXTERNAL,1926.dstSubpass = 0,1927.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,1928.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,1929.srcAccessMask = 0,1930.dstAccessMask = 0,1931.dependencyFlags = 0},1932{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,1933.srcSubpass = 0,1934.dstSubpass = VK_SUBPASS_EXTERNAL,1935.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,1936.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,1937.srcAccessMask = 0,1938.dstAccessMask = 0,1939.dependencyFlags = 0}}},1940&cmd_buffer->pool->alloc, &pass);19411942radv_cmd_buffer_begin_render_pass(cmd_buffer,1943&(VkRenderPassBeginInfo){1944.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,1945.renderArea =1946{1947.offset =1948{19490,19500,1951},1952.extent =1953{1954.width = width,1955.height = height,1956},1957},1958.renderPass = pass,1959.framebuffer = fb,1960.clearValueCount = 0,1961.pClearValues = NULL,1962},1963NULL);19641965radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);19661967VkClearAttachment clear_att = {1968.aspectMask = range->aspectMask,1969.colorAttachment = 0,1970.clearValue = *clear_val,1971};19721973VkClearRect clear_rect = {1974.rect =1975{1976.offset = {0, 0},1977.extent = {width, height},1978},1979.baseArrayLayer = range->baseArrayLayer,1980.layerCount = 1, /* FINISHME: clear multi-layer framebuffer */1981};19821983emit_clear(cmd_buffer, &clear_att, &clear_rect, NULL, NULL, 0, false);19841985radv_cmd_buffer_end_render_pass(cmd_buffer);1986radv_DestroyRenderPass(device_h, pass, &cmd_buffer->pool->alloc);1987radv_DestroyFramebuffer(device_h, fb, &cmd_buffer->pool->alloc);1988}19891990/**1991* Return TRUE if a fast color or depth clear has been performed.1992*/1993static bool1994radv_fast_clear_range(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image, VkFormat format,1995VkImageLayout image_layout, bool in_render_loop,1996const VkImageSubresourceRange *range, const VkClearValue *clear_val)1997{1998struct radv_image_view iview;19992000radv_image_view_init(&iview, cmd_buffer->device,2001&(VkImageViewCreateInfo){2002.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,2003.image = radv_image_to_handle(image),2004.viewType = radv_meta_get_view_type(image),2005.format = image->vk_format,2006.subresourceRange =2007{2008.aspectMask = range->aspectMask,2009.baseMipLevel = range->baseMipLevel,2010.levelCount = range->levelCount,2011.baseArrayLayer = range->baseArrayLayer,2012.layerCount = range->layerCount,2013},2014},2015NULL);20162017VkClearRect clear_rect = {2018.rect =2019{2020.offset = {0, 0},2021.extent =2022{2023radv_minify(image->info.width, range->baseMipLevel),2024radv_minify(image->info.height, range->baseMipLevel),2025},2026},2027.baseArrayLayer = range->baseArrayLayer,2028.layerCount = range->layerCount,2029};20302031VkClearAttachment clear_att = {2032.aspectMask = range->aspectMask,2033.colorAttachment = 0,2034.clearValue = *clear_val,2035};20362037if (vk_format_is_color(format)) {2038if (radv_can_fast_clear_color(cmd_buffer, &iview, image_layout, in_render_loop, &clear_rect,2039clear_att.clearValue.color, 0)) {2040radv_fast_clear_color(cmd_buffer, &iview, &clear_att, clear_att.colorAttachment, NULL,2041NULL);2042return true;2043}2044} else {2045if (radv_can_fast_clear_depth(cmd_buffer, &iview, image_layout, in_render_loop,2046range->aspectMask, &clear_rect,2047clear_att.clearValue.depthStencil, 0)) {2048radv_fast_clear_depth(cmd_buffer, &iview, &clear_att, NULL, NULL);2049return true;2050}2051}20522053return false;2054}20552056static void2057radv_cmd_clear_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,2058VkImageLayout image_layout, const VkClearValue *clear_value,2059uint32_t range_count, const VkImageSubresourceRange *ranges, bool cs)2060{2061VkFormat format = image->vk_format;2062VkClearValue internal_clear_value;20632064if (ranges->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)2065internal_clear_value.color = clear_value->color;2066else2067internal_clear_value.depthStencil = clear_value->depthStencil;20682069bool disable_compression = false;20702071if (format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32) {2072bool blendable;2073if (cs ? !radv_is_storage_image_format_supported(cmd_buffer->device->physical_device, format)2074: !radv_is_colorbuffer_format_supported(cmd_buffer->device->physical_device, format,2075&blendable)) {2076format = VK_FORMAT_R32_UINT;2077internal_clear_value.color.uint32[0] = float3_to_rgb9e5(clear_value->color.float32);20782079uint32_t queue_mask = radv_image_queue_family_mask(image, cmd_buffer->queue_family_index,2080cmd_buffer->queue_family_index);20812082for (uint32_t r = 0; r < range_count; r++) {2083const VkImageSubresourceRange *range = &ranges[r];20842085/* Don't use compressed image stores because they will use an incompatible format. */2086if (radv_layout_dcc_compressed(cmd_buffer->device, image, range->baseMipLevel,2087image_layout, false, queue_mask)) {2088disable_compression = cs;2089break;2090}2091}2092}2093}20942095if (format == VK_FORMAT_R4G4_UNORM_PACK8) {2096uint8_t r, g;2097format = VK_FORMAT_R8_UINT;2098r = float_to_ubyte(clear_value->color.float32[0]) >> 4;2099g = float_to_ubyte(clear_value->color.float32[1]) >> 4;2100internal_clear_value.color.uint32[0] = (r << 4) | (g & 0xf);2101}21022103for (uint32_t r = 0; r < range_count; r++) {2104const VkImageSubresourceRange *range = &ranges[r];21052106/* Try to perform a fast clear first, otherwise fallback to2107* the legacy path.2108*/2109if (!cs && radv_fast_clear_range(cmd_buffer, image, format, image_layout, false, range,2110&internal_clear_value)) {2111continue;2112}21132114for (uint32_t l = 0; l < radv_get_levelCount(image, range); ++l) {2115const uint32_t layer_count = image->type == VK_IMAGE_TYPE_3D2116? radv_minify(image->info.depth, range->baseMipLevel + l)2117: radv_get_layerCount(image, range);2118for (uint32_t s = 0; s < layer_count; ++s) {21192120if (cs) {2121struct radv_meta_blit2d_surf surf;2122surf.format = format;2123surf.image = image;2124surf.level = range->baseMipLevel + l;2125surf.layer = range->baseArrayLayer + s;2126surf.aspect_mask = range->aspectMask;2127surf.disable_compression = disable_compression;2128radv_meta_clear_image_cs(cmd_buffer, &surf, &internal_clear_value.color);2129} else {2130assert(!disable_compression);2131radv_clear_image_layer(cmd_buffer, image, image_layout, range, format, l, s,2132&internal_clear_value);2133}2134}2135}2136}21372138if (disable_compression) {2139enum radv_cmd_flush_bits flush_bits = 0;2140for (unsigned i = 0; i < range_count; i++) {2141if (radv_dcc_enabled(image, ranges[i].baseMipLevel))2142flush_bits |= radv_clear_dcc(cmd_buffer, image, &ranges[i], 0xffffffffu);2143}2144cmd_buffer->state.flush_bits |= flush_bits;2145}2146}21472148void2149radv_CmdClearColorImage(VkCommandBuffer commandBuffer, VkImage image_h, VkImageLayout imageLayout,2150const VkClearColorValue *pColor, uint32_t rangeCount,2151const VkImageSubresourceRange *pRanges)2152{2153RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);2154RADV_FROM_HANDLE(radv_image, image, image_h);2155struct radv_meta_saved_state saved_state;2156bool cs;21572158cs = cmd_buffer->queue_family_index == RADV_QUEUE_COMPUTE ||2159!radv_image_is_renderable(cmd_buffer->device, image);21602161if (cs) {2162radv_meta_save(2163&saved_state, cmd_buffer,2164RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);2165} else {2166radv_meta_save(&saved_state, cmd_buffer,2167RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);2168}21692170radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pColor, rangeCount,2171pRanges, cs);21722173radv_meta_restore(&saved_state, cmd_buffer);2174}21752176void2177radv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer, VkImage image_h,2178VkImageLayout imageLayout,2179const VkClearDepthStencilValue *pDepthStencil, uint32_t rangeCount,2180const VkImageSubresourceRange *pRanges)2181{2182RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);2183RADV_FROM_HANDLE(radv_image, image, image_h);2184struct radv_meta_saved_state saved_state;21852186radv_meta_save(&saved_state, cmd_buffer,2187RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);21882189radv_cmd_clear_image(cmd_buffer, image, imageLayout, (const VkClearValue *)pDepthStencil,2190rangeCount, pRanges, false);21912192radv_meta_restore(&saved_state, cmd_buffer);2193}21942195void2196radv_CmdClearAttachments(VkCommandBuffer commandBuffer, uint32_t attachmentCount,2197const VkClearAttachment *pAttachments, uint32_t rectCount,2198const VkClearRect *pRects)2199{2200RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);2201struct radv_meta_saved_state saved_state;2202enum radv_cmd_flush_bits pre_flush = 0;2203enum radv_cmd_flush_bits post_flush = 0;22042205if (!cmd_buffer->state.subpass)2206return;22072208radv_meta_save(&saved_state, cmd_buffer,2209RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_CONSTANTS);22102211/* FINISHME: We can do better than this dumb loop. It thrashes too much2212* state.2213*/2214for (uint32_t a = 0; a < attachmentCount; ++a) {2215for (uint32_t r = 0; r < rectCount; ++r) {2216emit_clear(cmd_buffer, &pAttachments[a], &pRects[r], &pre_flush, &post_flush,2217cmd_buffer->state.subpass->view_mask, false);2218}2219}22202221radv_meta_restore(&saved_state, cmd_buffer);2222cmd_buffer->state.flush_bits |= post_flush;2223}222422252226