Path: blob/21.2-virgl/src/amd/vulkan/radv_meta_fast_clear.c
7237 views
/*1* Copyright © 2016 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <assert.h>24#include <stdbool.h>2526#include "radv_meta.h"27#include "radv_private.h"28#include "sid.h"2930static nir_shader *31build_dcc_decompress_compute_shader(struct radv_device *dev)32{33const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);3435nir_builder b =36nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_decompress_compute");3738/* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */39b.shader->info.workgroup_size[0] = 16;40b.shader->info.workgroup_size[1] = 16;41b.shader->info.workgroup_size[2] = 1;42nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "in_img");43input_img->data.descriptor_set = 0;44input_img->data.binding = 0;4546nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");47output_img->data.descriptor_set = 0;48output_img->data.binding = 1;4950nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);51nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);52nir_ssa_def *block_size =53nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],54b.shader->info.workgroup_size[2], 0);5556nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);5758nir_ssa_def *data =59nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id,60nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0));6162/* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid63* creating a vmcnt(0) because it expects the L1 cache to keep memory64* operations in-order for the same workgroup. The vmcnt(0) seems65* necessary however. */66nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,67.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);6869nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,70nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0));71return b.shader;72}7374static VkResult75create_dcc_compress_compute(struct radv_device *device)76{77VkResult result = VK_SUCCESS;78nir_shader *cs = build_dcc_decompress_compute_shader(device);7980VkDescriptorSetLayoutCreateInfo ds_create_info = {81.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,82.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,83.bindingCount = 2,84.pBindings = (VkDescriptorSetLayoutBinding[]){85{.binding = 0,86.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,87.descriptorCount = 1,88.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,89.pImmutableSamplers = NULL},90{.binding = 1,91.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,92.descriptorCount = 1,93.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,94.pImmutableSamplers = NULL},95}};9697result = radv_CreateDescriptorSetLayout(98radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,99&device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);100if (result != VK_SUCCESS)101goto cleanup;102103VkPipelineLayoutCreateInfo pl_create_info = {104.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,105.setLayoutCount = 1,106.pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,107.pushConstantRangeCount = 0,108.pPushConstantRanges = NULL,109};110111result = radv_CreatePipelineLayout(112radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,113&device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);114if (result != VK_SUCCESS)115goto cleanup;116117/* compute shader */118119VkPipelineShaderStageCreateInfo pipeline_shader_stage = {120.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,121.stage = VK_SHADER_STAGE_COMPUTE_BIT,122.module = vk_shader_module_handle_from_nir(cs),123.pName = "main",124.pSpecializationInfo = NULL,125};126127VkComputePipelineCreateInfo vk_pipeline_info = {128.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,129.stage = pipeline_shader_stage,130.flags = 0,131.layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,132};133134result = radv_CreateComputePipelines(135radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,136&vk_pipeline_info, NULL,137&device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);138if (result != VK_SUCCESS)139goto cleanup;140141cleanup:142ralloc_free(cs);143return result;144}145146static VkResult147create_pass(struct radv_device *device)148{149VkResult result;150VkDevice device_h = radv_device_to_handle(device);151const VkAllocationCallbacks *alloc = &device->meta_state.alloc;152VkAttachmentDescription2 attachment;153154attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;155attachment.pNext = NULL;156attachment.format = VK_FORMAT_UNDEFINED;157attachment.samples = 1;158attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;159attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;160attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;161attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;162163result = radv_CreateRenderPass2(164device_h,165&(VkRenderPassCreateInfo2){166.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,167.attachmentCount = 1,168.pAttachments = &attachment,169.subpassCount = 1,170.pSubpasses =171&(VkSubpassDescription2){172.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,173.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,174.inputAttachmentCount = 0,175.colorAttachmentCount = 1,176.pColorAttachments =177(VkAttachmentReference2[]){178{179.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,180.attachment = 0,181.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,182},183},184.pResolveAttachments = NULL,185.pDepthStencilAttachment =186&(VkAttachmentReference2){187.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,188.attachment = VK_ATTACHMENT_UNUSED,189},190.preserveAttachmentCount = 0,191.pPreserveAttachments = NULL,192},193.dependencyCount = 2,194.pDependencies =195(VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,196.srcSubpass = VK_SUBPASS_EXTERNAL,197.dstSubpass = 0,198.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,199.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,200.srcAccessMask = 0,201.dstAccessMask = 0,202.dependencyFlags = 0},203{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,204.srcSubpass = 0,205.dstSubpass = VK_SUBPASS_EXTERNAL,206.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,207.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,208.srcAccessMask = 0,209.dstAccessMask = 0,210.dependencyFlags = 0}},211},212alloc, &device->meta_state.fast_clear_flush.pass);213214return result;215}216217static VkResult218create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)219{220VkPipelineLayoutCreateInfo pl_create_info = {221.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,222.setLayoutCount = 0,223.pSetLayouts = NULL,224.pushConstantRangeCount = 0,225.pPushConstantRanges = NULL,226};227228return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,229&device->meta_state.alloc, layout);230}231232static VkResult233create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)234{235VkResult result;236VkDevice device_h = radv_device_to_handle(device);237238nir_shader *fs_module = radv_meta_build_nir_fs_noop();239240if (!fs_module) {241/* XXX: Need more accurate error */242result = VK_ERROR_OUT_OF_HOST_MEMORY;243goto cleanup;244}245246const VkPipelineShaderStageCreateInfo stages[2] = {247{248.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,249.stage = VK_SHADER_STAGE_VERTEX_BIT,250.module = vs_module_h,251.pName = "main",252},253{254.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,255.stage = VK_SHADER_STAGE_FRAGMENT_BIT,256.module = vk_shader_module_handle_from_nir(fs_module),257.pName = "main",258},259};260261const VkPipelineVertexInputStateCreateInfo vi_state = {262.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,263.vertexBindingDescriptionCount = 0,264.vertexAttributeDescriptionCount = 0,265};266267const VkPipelineInputAssemblyStateCreateInfo ia_state = {268.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,269.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,270.primitiveRestartEnable = false,271};272273const VkPipelineColorBlendStateCreateInfo blend_state = {274.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,275.logicOpEnable = false,276.attachmentCount = 1,277.pAttachments = (VkPipelineColorBlendAttachmentState[]){278{279.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |280VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,281},282}};283const VkPipelineRasterizationStateCreateInfo rs_state = {284.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,285.depthClampEnable = false,286.rasterizerDiscardEnable = false,287.polygonMode = VK_POLYGON_MODE_FILL,288.cullMode = VK_CULL_MODE_NONE,289.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,290};291292result = radv_graphics_pipeline_create(293device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),294&(VkGraphicsPipelineCreateInfo){295.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,296.stageCount = 2,297.pStages = stages,298299.pVertexInputState = &vi_state,300.pInputAssemblyState = &ia_state,301302.pViewportState =303&(VkPipelineViewportStateCreateInfo){304.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,305.viewportCount = 1,306.scissorCount = 1,307},308.pRasterizationState = &rs_state,309.pMultisampleState =310&(VkPipelineMultisampleStateCreateInfo){311.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,312.rasterizationSamples = 1,313.sampleShadingEnable = false,314.pSampleMask = NULL,315.alphaToCoverageEnable = false,316.alphaToOneEnable = false,317},318.pColorBlendState = &blend_state,319.pDynamicState =320&(VkPipelineDynamicStateCreateInfo){321.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,322.dynamicStateCount = 2,323.pDynamicStates =324(VkDynamicState[]){325VK_DYNAMIC_STATE_VIEWPORT,326VK_DYNAMIC_STATE_SCISSOR,327},328},329.layout = layout,330.renderPass = device->meta_state.fast_clear_flush.pass,331.subpass = 0,332},333&(struct radv_graphics_pipeline_create_info){334.use_rectlist = true,335.custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,336},337&device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);338if (result != VK_SUCCESS)339goto cleanup;340341result = radv_graphics_pipeline_create(342device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),343&(VkGraphicsPipelineCreateInfo){344.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,345.stageCount = 2,346.pStages = stages,347348.pVertexInputState = &vi_state,349.pInputAssemblyState = &ia_state,350351.pViewportState =352&(VkPipelineViewportStateCreateInfo){353.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,354.viewportCount = 1,355.scissorCount = 1,356},357.pRasterizationState = &rs_state,358.pMultisampleState =359&(VkPipelineMultisampleStateCreateInfo){360.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,361.rasterizationSamples = 1,362.sampleShadingEnable = false,363.pSampleMask = NULL,364.alphaToCoverageEnable = false,365.alphaToOneEnable = false,366},367.pColorBlendState = &blend_state,368.pDynamicState =369&(VkPipelineDynamicStateCreateInfo){370.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,371.dynamicStateCount = 2,372.pDynamicStates =373(VkDynamicState[]){374VK_DYNAMIC_STATE_VIEWPORT,375VK_DYNAMIC_STATE_SCISSOR,376},377},378.layout = layout,379.renderPass = device->meta_state.fast_clear_flush.pass,380.subpass = 0,381},382&(struct radv_graphics_pipeline_create_info){383.use_rectlist = true,384.custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,385},386&device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);387if (result != VK_SUCCESS)388goto cleanup;389390result = radv_graphics_pipeline_create(391device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),392&(VkGraphicsPipelineCreateInfo){393.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,394.stageCount = 2,395.pStages = stages,396397.pVertexInputState = &vi_state,398.pInputAssemblyState = &ia_state,399400.pViewportState =401&(VkPipelineViewportStateCreateInfo){402.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,403.viewportCount = 1,404.scissorCount = 1,405},406.pRasterizationState = &rs_state,407.pMultisampleState =408&(VkPipelineMultisampleStateCreateInfo){409.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,410.rasterizationSamples = 1,411.sampleShadingEnable = false,412.pSampleMask = NULL,413.alphaToCoverageEnable = false,414.alphaToOneEnable = false,415},416.pColorBlendState = &blend_state,417.pDynamicState =418&(VkPipelineDynamicStateCreateInfo){419.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,420.dynamicStateCount = 2,421.pDynamicStates =422(VkDynamicState[]){423VK_DYNAMIC_STATE_VIEWPORT,424VK_DYNAMIC_STATE_SCISSOR,425},426},427.layout = layout,428.renderPass = device->meta_state.fast_clear_flush.pass,429.subpass = 0,430},431&(struct radv_graphics_pipeline_create_info){432.use_rectlist = true,433.custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,434},435&device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);436if (result != VK_SUCCESS)437goto cleanup;438439goto cleanup;440441cleanup:442ralloc_free(fs_module);443return result;444}445446void447radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)448{449struct radv_meta_state *state = &device->meta_state;450451radv_DestroyPipeline(radv_device_to_handle(device),452state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);453radv_DestroyPipeline(radv_device_to_handle(device),454state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc);455radv_DestroyPipeline(radv_device_to_handle(device),456state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);457radv_DestroyRenderPass(radv_device_to_handle(device), state->fast_clear_flush.pass,458&state->alloc);459radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout,460&state->alloc);461462radv_DestroyPipeline(radv_device_to_handle(device),463state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc);464radv_DestroyPipelineLayout(radv_device_to_handle(device),465state->fast_clear_flush.dcc_decompress_compute_p_layout,466&state->alloc);467radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),468state->fast_clear_flush.dcc_decompress_compute_ds_layout,469&state->alloc);470}471472static VkResult473radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)474{475VkResult res = VK_SUCCESS;476477mtx_lock(&device->meta_state.mtx);478if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {479mtx_unlock(&device->meta_state.mtx);480return VK_SUCCESS;481}482483nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();484if (!vs_module) {485/* XXX: Need more accurate error */486res = VK_ERROR_OUT_OF_HOST_MEMORY;487goto fail;488}489490res = create_pass(device);491if (res != VK_SUCCESS)492goto fail;493494res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout);495if (res != VK_SUCCESS)496goto fail;497498VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);499res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout);500if (res != VK_SUCCESS)501goto fail;502503res = create_dcc_compress_compute(device);504if (res != VK_SUCCESS)505goto fail;506507goto cleanup;508509fail:510radv_device_finish_meta_fast_clear_flush_state(device);511512cleanup:513ralloc_free(vs_module);514mtx_unlock(&device->meta_state.mtx);515516return res;517}518519VkResult520radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)521{522if (on_demand)523return VK_SUCCESS;524525return radv_device_init_meta_fast_clear_flush_state_internal(device);526}527528static void529radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,530struct radv_image *image, uint64_t pred_offset,531bool value)532{533uint64_t va = 0;534535if (value) {536va = radv_buffer_get_va(image->bo) + image->offset;537va += pred_offset;538}539540si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);541}542543static void544radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,545const VkImageSubresourceRange *range, int level, int layer,546bool flush_cb)547{548struct radv_device *device = cmd_buffer->device;549struct radv_image_view iview;550uint32_t width, height;551552width = radv_minify(image->info.width, range->baseMipLevel + level);553height = radv_minify(image->info.height, range->baseMipLevel + level);554555radv_image_view_init(&iview, device,556&(VkImageViewCreateInfo){557.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,558.image = radv_image_to_handle(image),559.viewType = radv_meta_get_view_type(image),560.format = image->vk_format,561.subresourceRange =562{563.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,564.baseMipLevel = range->baseMipLevel + level,565.levelCount = 1,566.baseArrayLayer = range->baseArrayLayer + layer,567.layerCount = 1,568},569},570NULL);571572VkFramebuffer fb_h;573radv_CreateFramebuffer(574radv_device_to_handle(device),575&(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,576.attachmentCount = 1,577.pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},578.width = width,579.height = height,580.layers = 1},581&cmd_buffer->pool->alloc, &fb_h);582583radv_cmd_buffer_begin_render_pass(cmd_buffer,584&(VkRenderPassBeginInfo){585.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,586.renderPass = device->meta_state.fast_clear_flush.pass,587.framebuffer = fb_h,588.renderArea = {.offset =589{5900,5910,592},593.extent =594{595width,596height,597}},598.clearValueCount = 0,599.pClearValues = NULL,600},601NULL);602603radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);604605if (flush_cb)606cmd_buffer->state.flush_bits |=607radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);608609radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);610611if (flush_cb)612cmd_buffer->state.flush_bits |=613radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);614615radv_cmd_buffer_end_render_pass(cmd_buffer);616617radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);618}619620static void621radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,622const VkImageSubresourceRange *subresourceRange, bool decompress_dcc)623{624struct radv_device *device = cmd_buffer->device;625struct radv_meta_saved_state saved_state;626bool flush_cb = false;627VkPipeline *pipeline;628629if (decompress_dcc) {630pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;631} else if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {632pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;633} else {634pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;635}636637if (!*pipeline) {638VkResult ret;639640ret = radv_device_init_meta_fast_clear_flush_state_internal(device);641if (ret != VK_SUCCESS) {642cmd_buffer->record_result = ret;643return;644}645}646647if (pipeline == &device->meta_state.fast_clear_flush.dcc_decompress_pipeline ||648pipeline == &device->meta_state.fast_clear_flush.fmask_decompress_pipeline) {649/* Flushing CB is required before and after DCC_DECOMPRESS or650* FMASK_DECOMPRESS.651*/652flush_cb = true;653}654655radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS);656657radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,658*pipeline);659660for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {661uint32_t width, height;662663/* Do not decompress levels without DCC. */664if (decompress_dcc && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))665continue;666667width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);668height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);669670radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,671&(VkViewport){.x = 0,672.y = 0,673.width = width,674.height = height,675.minDepth = 0.0f,676.maxDepth = 1.0f});677678radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,679&(VkRect2D){680.offset = {0, 0},681.extent = {width, height},682});683684for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {685radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb);686}687}688689cmd_buffer->state.flush_bits |=690RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;691692radv_meta_restore(&saved_state, cmd_buffer);693}694695static void696radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,697const VkImageSubresourceRange *subresourceRange, bool decompress_dcc)698{699bool use_predication = false;700bool old_predicating = false;701702assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);703704if (decompress_dcc ||705(!(radv_image_has_fmask(image) && !image->tc_compatible_cmask) && image->fce_pred_offset)) {706use_predication = true;707}708709/* If we are asked for DCC decompression without DCC predicates we cannot710* use the FCE predicate. */711if (decompress_dcc && image->dcc_pred_offset == 0)712use_predication = false;713714if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&715(image->info.array_size != radv_get_layerCount(image, subresourceRange) ||716subresourceRange->baseArrayLayer != 0)) {717/* Only use predication if the image has DCC with mipmaps or718* if the range of layers covers the whole image because the719* predication is based on mip level.720*/721use_predication = false;722}723724if (use_predication) {725uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : image->fce_pred_offset;726pred_offset += 8 * subresourceRange->baseMipLevel;727728old_predicating = cmd_buffer->state.predicating;729730radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);731cmd_buffer->state.predicating = true;732}733734radv_process_color_image(cmd_buffer, image, subresourceRange, decompress_dcc);735736if (use_predication) {737uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : image->fce_pred_offset;738pred_offset += 8 * subresourceRange->baseMipLevel;739740cmd_buffer->state.predicating = old_predicating;741742radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);743744if (cmd_buffer->state.predication_type != -1) {745/* Restore previous conditional rendering user state. */746si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,747cmd_buffer->state.predication_op,748cmd_buffer->state.predication_va);749}750}751752if (image->fce_pred_offset != 0) {753/* Clear the image's fast-clear eliminate predicate because754* FMASK and DCC also imply a fast-clear eliminate.755*/756radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);757}758759/* Mark the image as being decompressed. */760if (decompress_dcc)761radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);762}763764void765radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,766const VkImageSubresourceRange *subresourceRange)767{768struct radv_barrier_data barrier = {0};769770if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {771barrier.layout_transitions.fmask_decompress = 1;772} else {773barrier.layout_transitions.fast_clear_eliminate = 1;774}775radv_describe_layout_transition(cmd_buffer, &barrier);776777assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);778radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false);779}780781static void782radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,783const VkImageSubresourceRange *subresourceRange)784{785assert(radv_dcc_enabled(image, subresourceRange->baseMipLevel));786radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);787}788789static void790radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,791const VkImageSubresourceRange *subresourceRange)792{793struct radv_meta_saved_state saved_state;794struct radv_image_view load_iview = {0};795struct radv_image_view store_iview = {0};796struct radv_device *device = cmd_buffer->device;797798cmd_buffer->state.flush_bits |=799radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);800801if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {802VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);803if (ret != VK_SUCCESS) {804cmd_buffer->record_result = ret;805return;806}807}808809radv_meta_save(&saved_state, cmd_buffer,810RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);811812radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,813device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);814815for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {816uint32_t width, height;817818/* Do not decompress levels without DCC. */819if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))820continue;821822width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);823height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);824825for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {826radv_image_view_init(827&load_iview, cmd_buffer->device,828&(VkImageViewCreateInfo){829.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,830.image = radv_image_to_handle(image),831.viewType = VK_IMAGE_VIEW_TYPE_2D,832.format = image->vk_format,833.subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,834.baseMipLevel = subresourceRange->baseMipLevel + l,835.levelCount = 1,836.baseArrayLayer = subresourceRange->baseArrayLayer + s,837.layerCount = 1},838},839&(struct radv_image_view_extra_create_info){.enable_compression = true});840radv_image_view_init(841&store_iview, cmd_buffer->device,842&(VkImageViewCreateInfo){843.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,844.image = radv_image_to_handle(image),845.viewType = VK_IMAGE_VIEW_TYPE_2D,846.format = image->vk_format,847.subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,848.baseMipLevel = subresourceRange->baseMipLevel + l,849.levelCount = 1,850.baseArrayLayer = subresourceRange->baseArrayLayer + s,851.layerCount = 1},852},853&(struct radv_image_view_extra_create_info){.disable_compression = true});854855radv_meta_push_descriptor_set(856cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,857device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */8582, /* descriptorWriteCount */859(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,860.dstBinding = 0,861.dstArrayElement = 0,862.descriptorCount = 1,863.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,864.pImageInfo =865(VkDescriptorImageInfo[]){866{867.sampler = VK_NULL_HANDLE,868.imageView = radv_image_view_to_handle(&load_iview),869.imageLayout = VK_IMAGE_LAYOUT_GENERAL,870},871}},872{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,873.dstBinding = 1,874.dstArrayElement = 0,875.descriptorCount = 1,876.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,877.pImageInfo = (VkDescriptorImageInfo[]){878{879.sampler = VK_NULL_HANDLE,880.imageView = radv_image_view_to_handle(&store_iview),881.imageLayout = VK_IMAGE_LAYOUT_GENERAL,882},883}}});884885radv_unaligned_dispatch(cmd_buffer, width, height, 1);886}887}888889/* Mark this image as actually being decompressed. */890radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);891892radv_meta_restore(&saved_state, cmd_buffer);893894cmd_buffer->state.flush_bits |=895RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |896radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);897898/* Initialize the DCC metadata as "fully expanded". */899cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);900}901902void903radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,904const VkImageSubresourceRange *subresourceRange)905{906struct radv_barrier_data barrier = {0};907908barrier.layout_transitions.dcc_decompress = 1;909radv_describe_layout_transition(cmd_buffer, &barrier);910911if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)912radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);913else914radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);915}916917918