Path: blob/21.2-virgl/src/amd/vulkan/radv_meta_resolve.c
7204 views
/*1* Copyright © 2016 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <assert.h>24#include <stdbool.h>2526#include "nir/nir_builder.h"27#include "radv_meta.h"28#include "radv_private.h"29#include "sid.h"30#include "vk_format.h"3132/* emit 0, 0, 0, 1 */33static nir_shader *34build_nir_fs(void)35{36const struct glsl_type *vec4 = glsl_vec4_type();37nir_variable *f_color; /* vec4, fragment output color */3839nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, NULL, "meta_resolve_fs");4041f_color = nir_variable_create(b.shader, nir_var_shader_out, vec4, "f_color");42f_color->data.location = FRAG_RESULT_DATA0;43nir_store_var(&b, f_color, nir_imm_vec4(&b, 0.0, 0.0, 0.0, 1.0), 0xf);4445return b.shader;46}4748static VkResult49create_pass(struct radv_device *device, VkFormat vk_format, VkRenderPass *pass)50{51VkResult result;52VkDevice device_h = radv_device_to_handle(device);53const VkAllocationCallbacks *alloc = &device->meta_state.alloc;54VkAttachmentDescription2 attachments[2];55int i;5657for (i = 0; i < 2; i++) {58attachments[i].sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;59attachments[i].pNext = NULL;60attachments[i].format = vk_format;61attachments[i].samples = 1;62attachments[i].loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;63attachments[i].storeOp = VK_ATTACHMENT_STORE_OP_STORE;64}65attachments[0].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;66attachments[0].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;67attachments[1].initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;68attachments[1].finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;6970result = radv_CreateRenderPass2(71device_h,72&(VkRenderPassCreateInfo2){73.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,74.attachmentCount = 2,75.pAttachments = attachments,76.subpassCount = 1,77.pSubpasses =78&(VkSubpassDescription2){79.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,80.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,81.inputAttachmentCount = 0,82.colorAttachmentCount = 2,83.pColorAttachments =84(VkAttachmentReference2[]){85{86.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,87.attachment = 0,88.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,89},90{91.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,92.attachment = 1,93.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,94},95},96.pResolveAttachments = NULL,97.pDepthStencilAttachment =98&(VkAttachmentReference2){99.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,100.attachment = VK_ATTACHMENT_UNUSED,101},102.preserveAttachmentCount = 0,103.pPreserveAttachments = NULL,104},105.dependencyCount = 2,106.pDependencies =107(VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,108.srcSubpass = VK_SUBPASS_EXTERNAL,109.dstSubpass = 0,110.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,111.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,112.srcAccessMask = 0,113.dstAccessMask = 0,114.dependencyFlags = 0},115{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,116.srcSubpass = 0,117.dstSubpass = VK_SUBPASS_EXTERNAL,118.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,119.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,120.srcAccessMask = 0,121.dstAccessMask = 0,122.dependencyFlags = 0}},123},124alloc, pass);125126return result;127}128129static VkResult130create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipeline *pipeline,131VkRenderPass pass)132{133VkResult result;134VkDevice device_h = radv_device_to_handle(device);135136nir_shader *fs_module = build_nir_fs();137if (!fs_module) {138/* XXX: Need more accurate error */139result = VK_ERROR_OUT_OF_HOST_MEMORY;140goto cleanup;141}142143VkPipelineLayoutCreateInfo pl_create_info = {144.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,145.setLayoutCount = 0,146.pSetLayouts = NULL,147.pushConstantRangeCount = 0,148.pPushConstantRanges = NULL,149};150151if (!device->meta_state.resolve.p_layout) {152result =153radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,154&device->meta_state.alloc, &device->meta_state.resolve.p_layout);155if (result != VK_SUCCESS)156goto cleanup;157}158159result = radv_graphics_pipeline_create(160device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),161&(VkGraphicsPipelineCreateInfo){162.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,163.stageCount = 2,164.pStages =165(VkPipelineShaderStageCreateInfo[]){166{167.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,168.stage = VK_SHADER_STAGE_VERTEX_BIT,169.module = vs_module_h,170.pName = "main",171},172{173.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,174.stage = VK_SHADER_STAGE_FRAGMENT_BIT,175.module = vk_shader_module_handle_from_nir(fs_module),176.pName = "main",177},178},179.pVertexInputState =180&(VkPipelineVertexInputStateCreateInfo){181.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,182.vertexBindingDescriptionCount = 0,183.vertexAttributeDescriptionCount = 0,184},185.pInputAssemblyState =186&(VkPipelineInputAssemblyStateCreateInfo){187.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,188.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,189.primitiveRestartEnable = false,190},191.pViewportState =192&(VkPipelineViewportStateCreateInfo){193.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,194.viewportCount = 1,195.scissorCount = 1,196},197.pRasterizationState =198&(VkPipelineRasterizationStateCreateInfo){199.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,200.depthClampEnable = false,201.rasterizerDiscardEnable = false,202.polygonMode = VK_POLYGON_MODE_FILL,203.cullMode = VK_CULL_MODE_NONE,204.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,205},206.pMultisampleState =207&(VkPipelineMultisampleStateCreateInfo){208.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,209.rasterizationSamples = 1,210.sampleShadingEnable = false,211.pSampleMask = NULL,212.alphaToCoverageEnable = false,213.alphaToOneEnable = false,214},215.pColorBlendState =216&(VkPipelineColorBlendStateCreateInfo){217.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,218.logicOpEnable = false,219.attachmentCount = 2,220.pAttachments =221(VkPipelineColorBlendAttachmentState[]){222{223.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |224VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,225},226{227.colorWriteMask = 0,228229}},230},231.pDynamicState =232&(VkPipelineDynamicStateCreateInfo){233.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,234.dynamicStateCount = 2,235.pDynamicStates =236(VkDynamicState[]){237VK_DYNAMIC_STATE_VIEWPORT,238VK_DYNAMIC_STATE_SCISSOR,239},240},241.layout = device->meta_state.resolve.p_layout,242.renderPass = pass,243.subpass = 0,244},245&(struct radv_graphics_pipeline_create_info){246.use_rectlist = true,247.custom_blend_mode = V_028808_CB_RESOLVE,248},249&device->meta_state.alloc, pipeline);250if (result != VK_SUCCESS)251goto cleanup;252253goto cleanup;254255cleanup:256ralloc_free(fs_module);257return result;258}259260void261radv_device_finish_meta_resolve_state(struct radv_device *device)262{263struct radv_meta_state *state = &device->meta_state;264265for (uint32_t j = 0; j < NUM_META_FS_KEYS; j++) {266radv_DestroyRenderPass(radv_device_to_handle(device), state->resolve.pass[j], &state->alloc);267radv_DestroyPipeline(radv_device_to_handle(device), state->resolve.pipeline[j],268&state->alloc);269}270radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve.p_layout,271&state->alloc);272}273274VkResult275radv_device_init_meta_resolve_state(struct radv_device *device, bool on_demand)276{277if (on_demand)278return VK_SUCCESS;279280VkResult res = VK_SUCCESS;281struct radv_meta_state *state = &device->meta_state;282nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();283if (!vs_module) {284/* XXX: Need more accurate error */285res = VK_ERROR_OUT_OF_HOST_MEMORY;286goto fail;287}288289for (uint32_t i = 0; i < NUM_META_FS_KEYS; ++i) {290VkFormat format = radv_fs_key_format_exemplars[i];291unsigned fs_key = radv_format_meta_fs_key(device, format);292res = create_pass(device, format, &state->resolve.pass[fs_key]);293if (res != VK_SUCCESS)294goto fail;295296VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);297res = create_pipeline(device, vs_module_h, &state->resolve.pipeline[fs_key],298state->resolve.pass[fs_key]);299if (res != VK_SUCCESS)300goto fail;301}302303goto cleanup;304305fail:306radv_device_finish_meta_resolve_state(device);307308cleanup:309ralloc_free(vs_module);310311return res;312}313314static void315emit_resolve(struct radv_cmd_buffer *cmd_buffer, const struct radv_image *src_image,316const struct radv_image *dst_image, VkFormat vk_format, const VkOffset2D *dest_offset,317const VkExtent2D *resolve_extent)318{319struct radv_device *device = cmd_buffer->device;320VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);321unsigned fs_key = radv_format_meta_fs_key(device, vk_format);322323cmd_buffer->state.flush_bits |=324radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, src_image) |325radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT, src_image) |326radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);327328radv_CmdBindPipeline(cmd_buffer_h, VK_PIPELINE_BIND_POINT_GRAPHICS,329device->meta_state.resolve.pipeline[fs_key]);330331radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,332&(VkViewport){.x = dest_offset->x,333.y = dest_offset->y,334.width = resolve_extent->width,335.height = resolve_extent->height,336.minDepth = 0.0f,337.maxDepth = 1.0f});338339radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,340&(VkRect2D){341.offset = *dest_offset,342.extent = *resolve_extent,343});344345radv_CmdDraw(cmd_buffer_h, 3, 1, 0, 0);346cmd_buffer->state.flush_bits |=347radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, dst_image);348}349350enum radv_resolve_method {351RESOLVE_HW,352RESOLVE_COMPUTE,353RESOLVE_FRAGMENT,354};355356static bool357image_hw_resolve_compat(const struct radv_device *device, struct radv_image *src_image,358struct radv_image *dst_image)359{360if (device->physical_device->rad_info.chip_class >= GFX9) {361return dst_image->planes[0].surface.u.gfx9.swizzle_mode ==362src_image->planes[0].surface.u.gfx9.swizzle_mode;363} else {364return dst_image->planes[0].surface.micro_tile_mode ==365src_image->planes[0].surface.micro_tile_mode;366}367}368369static void370radv_pick_resolve_method_images(struct radv_device *device, struct radv_image *src_image,371VkFormat src_format, struct radv_image *dest_image,372unsigned dest_level, VkImageLayout dest_image_layout,373bool dest_render_loop, struct radv_cmd_buffer *cmd_buffer,374enum radv_resolve_method *method)375376{377uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index,378cmd_buffer->queue_family_index);379380if (vk_format_is_color(src_format)) {381/* Using the fragment resolve path is currently a hint to382* avoid decompressing DCC for partial resolves and383* re-initialize it after resolving using compute.384* TODO: Add support for layered and int to the fragment path.385*/386if (radv_layout_dcc_compressed(device, dest_image, dest_level, dest_image_layout,387dest_render_loop, queue_mask)) {388*method = RESOLVE_FRAGMENT;389} else if (!image_hw_resolve_compat(device, src_image, dest_image)) {390/* The micro tile mode only needs to match for the HW391* resolve path which is the default path for non-DCC392* resolves.393*/394*method = RESOLVE_COMPUTE;395}396397if (src_format == VK_FORMAT_R16G16_UNORM || src_format == VK_FORMAT_R16G16_SNORM)398*method = RESOLVE_COMPUTE;399else if (vk_format_is_int(src_format))400*method = RESOLVE_COMPUTE;401else if (src_image->info.array_size > 1 || dest_image->info.array_size > 1)402*method = RESOLVE_COMPUTE;403} else {404if (src_image->info.array_size > 1 || dest_image->info.array_size > 1)405*method = RESOLVE_COMPUTE;406else407*method = RESOLVE_FRAGMENT;408}409}410411static VkResult412build_resolve_pipeline(struct radv_device *device, unsigned fs_key)413{414VkResult result = VK_SUCCESS;415416if (device->meta_state.resolve.pipeline[fs_key])417return result;418419mtx_lock(&device->meta_state.mtx);420if (device->meta_state.resolve.pipeline[fs_key]) {421mtx_unlock(&device->meta_state.mtx);422return result;423}424425nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();426427result = create_pass(device, radv_fs_key_format_exemplars[fs_key],428&device->meta_state.resolve.pass[fs_key]);429if (result != VK_SUCCESS)430goto fail;431432VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);433result = create_pipeline(device, vs_module_h, &device->meta_state.resolve.pipeline[fs_key],434device->meta_state.resolve.pass[fs_key]);435436fail:437ralloc_free(vs_module);438mtx_unlock(&device->meta_state.mtx);439return result;440}441442static void443radv_meta_resolve_hardware_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,444VkImageLayout src_image_layout, struct radv_image *dst_image,445VkImageLayout dst_image_layout, const VkImageResolve2KHR *region)446{447struct radv_device *device = cmd_buffer->device;448struct radv_meta_saved_state saved_state;449450radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE);451452assert(src_image->info.samples > 1);453if (src_image->info.samples <= 1) {454/* this causes GPU hangs if we get past here */455fprintf(stderr, "radv: Illegal resolve operation (src not multisampled), will hang GPU.");456return;457}458assert(dst_image->info.samples == 1);459460if (src_image->info.array_size > 1)461radv_finishme("vkCmdResolveImage: multisample array images");462463unsigned fs_key = radv_format_meta_fs_key(device, dst_image->vk_format);464465/* From the Vulkan 1.0 spec:466*467* - The aspectMask member of srcSubresource and dstSubresource must468* only contain VK_IMAGE_ASPECT_COLOR_BIT469*470* - The layerCount member of srcSubresource and dstSubresource must471* match472*/473assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);474assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);475assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);476477const uint32_t src_base_layer =478radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);479480const uint32_t dst_base_layer =481radv_meta_get_iview_layer(dst_image, ®ion->dstSubresource, ®ion->dstOffset);482483/**484* From Vulkan 1.0.6 spec: 18.6 Resolving Multisample Images485*486* extent is the size in texels of the source image to resolve in width,487* height and depth. 1D images use only x and width. 2D images use x, y,488* width and height. 3D images use x, y, z, width, height and depth.489*490* srcOffset and dstOffset select the initial x, y, and z offsets in491* texels of the sub-regions of the source and destination image data.492* extent is the size in texels of the source image to resolve in width,493* height and depth. 1D images use only x and width. 2D images use x, y,494* width and height. 3D images use x, y, z, width, height and depth.495*/496const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);497const struct VkOffset3D dstOffset =498radv_sanitize_image_offset(dst_image->type, region->dstOffset);499500uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index,501cmd_buffer->queue_family_index);502503if (radv_layout_dcc_compressed(cmd_buffer->device, dst_image, region->dstSubresource.mipLevel,504dst_image_layout, false, queue_mask)) {505VkImageSubresourceRange range = {506.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,507.baseMipLevel = region->dstSubresource.mipLevel,508.levelCount = 1,509.baseArrayLayer = dst_base_layer,510.layerCount = region->dstSubresource.layerCount,511};512513cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_image, &range, 0xffffffff);514}515516for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {517518VkResult ret = build_resolve_pipeline(device, fs_key);519if (ret != VK_SUCCESS) {520cmd_buffer->record_result = ret;521break;522}523524struct radv_image_view src_iview;525radv_image_view_init(&src_iview, cmd_buffer->device,526&(VkImageViewCreateInfo){527.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,528.image = radv_image_to_handle(src_image),529.viewType = radv_meta_get_view_type(src_image),530.format = src_image->vk_format,531.subresourceRange =532{533.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,534.baseMipLevel = region->srcSubresource.mipLevel,535.levelCount = 1,536.baseArrayLayer = src_base_layer + layer,537.layerCount = 1,538},539},540NULL);541542struct radv_image_view dst_iview;543radv_image_view_init(&dst_iview, cmd_buffer->device,544&(VkImageViewCreateInfo){545.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,546.image = radv_image_to_handle(dst_image),547.viewType = radv_meta_get_view_type(dst_image),548.format = dst_image->vk_format,549.subresourceRange =550{551.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,552.baseMipLevel = region->dstSubresource.mipLevel,553.levelCount = 1,554.baseArrayLayer = dst_base_layer + layer,555.layerCount = 1,556},557},558NULL);559560VkFramebuffer fb_h;561radv_CreateFramebuffer(562radv_device_to_handle(device),563&(VkFramebufferCreateInfo){564.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,565.attachmentCount = 2,566.pAttachments =567(VkImageView[]){568radv_image_view_to_handle(&src_iview),569radv_image_view_to_handle(&dst_iview),570},571.width = radv_minify(dst_image->info.width, region->dstSubresource.mipLevel),572.height = radv_minify(dst_image->info.height, region->dstSubresource.mipLevel),573.layers = 1},574&cmd_buffer->pool->alloc, &fb_h);575576radv_cmd_buffer_begin_render_pass(cmd_buffer,577&(VkRenderPassBeginInfo){578.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,579.renderPass = device->meta_state.resolve.pass[fs_key],580.framebuffer = fb_h,581.renderArea = {.offset =582{583dstOffset.x,584dstOffset.y,585},586.extent =587{588extent.width,589extent.height,590}},591.clearValueCount = 0,592.pClearValues = NULL,593},594NULL);595596radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);597598emit_resolve(cmd_buffer, src_image, dst_image, dst_iview.vk_format,599&(VkOffset2D){600.x = dstOffset.x,601.y = dstOffset.y,602},603&(VkExtent2D){604.width = extent.width,605.height = extent.height,606});607608radv_cmd_buffer_end_render_pass(cmd_buffer);609610radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);611}612613radv_meta_restore(&saved_state, cmd_buffer);614}615616static void617resolve_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,618VkImageLayout src_image_layout, struct radv_image *dst_image,619VkImageLayout dst_image_layout, const VkImageResolve2KHR *region,620enum radv_resolve_method resolve_method)621{622switch (resolve_method) {623case RESOLVE_HW:624radv_meta_resolve_hardware_image(cmd_buffer, src_image, src_image_layout, dst_image,625dst_image_layout, region);626break;627case RESOLVE_FRAGMENT:628radv_meta_resolve_fragment_image(cmd_buffer, src_image, src_image_layout, dst_image,629dst_image_layout, region);630break;631case RESOLVE_COMPUTE:632radv_meta_resolve_compute_image(cmd_buffer, src_image, src_image->vk_format, src_image_layout,633dst_image, dst_image->vk_format, dst_image_layout, region);634break;635default:636assert(!"Invalid resolve method selected");637}638}639640void641radv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,642const VkResolveImageInfo2KHR *pResolveImageInfo)643{644RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, commandBuffer);645RADV_FROM_HANDLE(radv_image, src_image, pResolveImageInfo->srcImage);646RADV_FROM_HANDLE(radv_image, dst_image, pResolveImageInfo->dstImage);647VkImageLayout src_image_layout = pResolveImageInfo->srcImageLayout;648VkImageLayout dst_image_layout = pResolveImageInfo->dstImageLayout;649enum radv_resolve_method resolve_method = RESOLVE_HW;650/* we can use the hw resolve only for single full resolves */651if (pResolveImageInfo->regionCount == 1) {652if (pResolveImageInfo->pRegions[0].srcOffset.x ||653pResolveImageInfo->pRegions[0].srcOffset.y || pResolveImageInfo->pRegions[0].srcOffset.z)654resolve_method = RESOLVE_COMPUTE;655if (pResolveImageInfo->pRegions[0].dstOffset.x ||656pResolveImageInfo->pRegions[0].dstOffset.y || pResolveImageInfo->pRegions[0].dstOffset.z)657resolve_method = RESOLVE_COMPUTE;658659if (pResolveImageInfo->pRegions[0].extent.width != src_image->info.width ||660pResolveImageInfo->pRegions[0].extent.height != src_image->info.height ||661pResolveImageInfo->pRegions[0].extent.depth != src_image->info.depth)662resolve_method = RESOLVE_COMPUTE;663} else664resolve_method = RESOLVE_COMPUTE;665666for (uint32_t r = 0; r < pResolveImageInfo->regionCount; r++) {667const VkImageResolve2KHR *region = &pResolveImageInfo->pRegions[r];668669radv_pick_resolve_method_images(cmd_buffer->device, src_image, src_image->vk_format, dst_image,670region->dstSubresource.mipLevel, dst_image_layout, false,671cmd_buffer, &resolve_method);672673resolve_image(cmd_buffer, src_image, src_image_layout, dst_image, dst_image_layout, region,674resolve_method);675}676}677678static void679radv_cmd_buffer_resolve_subpass_hw(struct radv_cmd_buffer *cmd_buffer)680{681struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;682const struct radv_subpass *subpass = cmd_buffer->state.subpass;683struct radv_meta_saved_state saved_state;684685radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE);686687for (uint32_t i = 0; i < subpass->color_count; ++i) {688struct radv_subpass_attachment src_att = subpass->color_attachments[i];689struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];690691if (dest_att.attachment == VK_ATTACHMENT_UNUSED)692continue;693694struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;695struct radv_image *src_img = src_iview->image;696697struct radv_image_view *dest_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;698struct radv_image *dst_img = dest_iview->image;699VkImageLayout dst_image_layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;700701uint32_t queue_mask = radv_image_queue_family_mask(dst_img, cmd_buffer->queue_family_index,702cmd_buffer->queue_family_index);703704if (radv_layout_dcc_compressed(cmd_buffer->device, dst_img, dest_iview->base_mip,705dst_image_layout, false, queue_mask)) {706VkImageSubresourceRange range = {707.aspectMask = dest_iview->aspect_mask,708.baseMipLevel = dest_iview->base_mip,709.levelCount = dest_iview->level_count,710.baseArrayLayer = dest_iview->base_layer,711.layerCount = dest_iview->layer_count,712};713714cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dst_img, &range, 0xffffffff);715cmd_buffer->state.attachments[dest_att.attachment].current_layout =716VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;717}718719struct radv_subpass resolve_subpass = {720.color_count = 2,721.color_attachments = (struct radv_subpass_attachment[]){src_att, dest_att},722.depth_stencil_attachment = NULL,723};724725radv_cmd_buffer_set_subpass(cmd_buffer, &resolve_subpass);726727VkResult ret = build_resolve_pipeline(728cmd_buffer->device, radv_format_meta_fs_key(cmd_buffer->device, dest_iview->vk_format));729if (ret != VK_SUCCESS) {730cmd_buffer->record_result = ret;731continue;732}733734emit_resolve(cmd_buffer, src_img, dst_img, dest_iview->vk_format, &(VkOffset2D){0, 0},735&(VkExtent2D){fb->width, fb->height});736}737738radv_cmd_buffer_set_subpass(cmd_buffer, subpass);739740radv_meta_restore(&saved_state, cmd_buffer);741}742743/**744* Emit any needed resolves for the current subpass.745*/746void747radv_cmd_buffer_resolve_subpass(struct radv_cmd_buffer *cmd_buffer)748{749const struct radv_subpass *subpass = cmd_buffer->state.subpass;750enum radv_resolve_method resolve_method = RESOLVE_HW;751752if (!subpass->has_color_resolve && !subpass->ds_resolve_attachment)753return;754755radv_describe_begin_render_pass_resolve(cmd_buffer);756757if (subpass->ds_resolve_attachment) {758struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;759struct radv_subpass_attachment dst_att = *subpass->ds_resolve_attachment;760struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;761struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;762763/* Make sure to not clear the depth/stencil attachment after resolves. */764cmd_buffer->state.attachments[dst_att.attachment].pending_clear_aspects = 0;765766radv_pick_resolve_method_images(cmd_buffer->device, src_iview->image, src_iview->vk_format,767dst_iview->image, dst_iview->base_mip, dst_att.layout,768dst_att.in_render_loop, cmd_buffer, &resolve_method);769770if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_DEPTH_BIT) &&771subpass->depth_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {772if (resolve_method == RESOLVE_FRAGMENT) {773radv_depth_stencil_resolve_subpass_fs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,774subpass->depth_resolve_mode);775} else {776assert(resolve_method == RESOLVE_COMPUTE);777radv_depth_stencil_resolve_subpass_cs(cmd_buffer, VK_IMAGE_ASPECT_DEPTH_BIT,778subpass->depth_resolve_mode);779}780}781782if ((src_iview->aspect_mask & VK_IMAGE_ASPECT_STENCIL_BIT) &&783subpass->stencil_resolve_mode != VK_RESOLVE_MODE_NONE_KHR) {784if (resolve_method == RESOLVE_FRAGMENT) {785radv_depth_stencil_resolve_subpass_fs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,786subpass->stencil_resolve_mode);787} else {788assert(resolve_method == RESOLVE_COMPUTE);789radv_depth_stencil_resolve_subpass_cs(cmd_buffer, VK_IMAGE_ASPECT_STENCIL_BIT,790subpass->stencil_resolve_mode);791}792}793794/* From the Vulkan spec 1.2.165:795*796* "VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT specifies797* write access to a color, resolve, or depth/stencil798* resolve attachment during a render pass or via799* certain subpass load and store operations."800*801* Yes, it's counterintuitive but it makes sense because ds802* resolve operations happen late at the end of the subpass.803*804* That said, RADV is wrong because it executes the subpass805* end barrier *before* any subpass resolves instead of after.806*807* TODO: Fix this properly by executing subpass end barriers808* after subpass resolves.809*/810cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB;811if (radv_image_has_htile(dst_iview->image))812cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_FLUSH_AND_INV_DB_META;813}814815if (subpass->has_color_resolve) {816for (uint32_t i = 0; i < subpass->color_count; ++i) {817struct radv_subpass_attachment src_att = subpass->color_attachments[i];818struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];819820if (dest_att.attachment == VK_ATTACHMENT_UNUSED)821continue;822823/* Make sure to not clear color attachments after resolves. */824cmd_buffer->state.attachments[dest_att.attachment].pending_clear_aspects = 0;825826struct radv_image_view *dst_iview =827cmd_buffer->state.attachments[dest_att.attachment].iview;828struct radv_image *dst_img = dst_iview->image;829struct radv_image_view *src_iview =830cmd_buffer->state.attachments[src_att.attachment].iview;831struct radv_image *src_img = src_iview->image;832833radv_pick_resolve_method_images(cmd_buffer->device, src_img, src_iview->vk_format, dst_img,834dst_iview->base_mip, dest_att.layout,835dest_att.in_render_loop, cmd_buffer, &resolve_method);836837if (resolve_method == RESOLVE_FRAGMENT) {838break;839}840}841842switch (resolve_method) {843case RESOLVE_HW:844radv_cmd_buffer_resolve_subpass_hw(cmd_buffer);845break;846case RESOLVE_COMPUTE:847radv_cmd_buffer_resolve_subpass_cs(cmd_buffer);848break;849case RESOLVE_FRAGMENT:850radv_cmd_buffer_resolve_subpass_fs(cmd_buffer);851break;852default:853unreachable("Invalid resolve method");854}855}856857radv_describe_end_render_pass_resolve(cmd_buffer);858}859860/**861* Decompress CMask/FMask before resolving a multisampled source image inside a862* subpass.863*/864void865radv_decompress_resolve_subpass_src(struct radv_cmd_buffer *cmd_buffer)866{867const struct radv_subpass *subpass = cmd_buffer->state.subpass;868struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;869uint32_t layer_count = fb->layers;870871if (subpass->view_mask)872layer_count = util_last_bit(subpass->view_mask);873874for (uint32_t i = 0; i < subpass->color_count; ++i) {875struct radv_subpass_attachment src_att = subpass->color_attachments[i];876struct radv_subpass_attachment dest_att = subpass->resolve_attachments[i];877878if (dest_att.attachment == VK_ATTACHMENT_UNUSED)879continue;880881struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;882struct radv_image *src_image = src_iview->image;883884VkImageResolve2KHR region = {0};885region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;886region.srcSubresource.aspectMask = src_iview->aspect_mask;887region.srcSubresource.mipLevel = 0;888region.srcSubresource.baseArrayLayer = src_iview->base_layer;889region.srcSubresource.layerCount = layer_count;890891radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, ®ion);892}893}894895static struct radv_sample_locations_state *896radv_get_resolve_sample_locations(struct radv_cmd_buffer *cmd_buffer)897{898struct radv_cmd_state *state = &cmd_buffer->state;899uint32_t subpass_id = radv_get_subpass_id(cmd_buffer);900901for (uint32_t i = 0; i < state->num_subpass_sample_locs; i++) {902if (state->subpass_sample_locs[i].subpass_idx == subpass_id)903return &state->subpass_sample_locs[i].sample_location;904}905906return NULL;907}908909/**910* Decompress CMask/FMask before resolving a multisampled source image.911*/912void913radv_decompress_resolve_src(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,914VkImageLayout src_image_layout, const VkImageResolve2KHR *region)915{916const uint32_t src_base_layer =917radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);918919VkImageMemoryBarrier barrier = {0};920barrier.srcAccessMask = VK_ACCESS_TRANSFER_WRITE_BIT;921barrier.dstAccessMask = VK_ACCESS_TRANSFER_READ_BIT;922barrier.oldLayout = src_image_layout;923barrier.newLayout = VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL;924barrier.image = radv_image_to_handle(src_image);925barrier.subresourceRange = (VkImageSubresourceRange){926.aspectMask = region->srcSubresource.aspectMask,927.baseMipLevel = region->srcSubresource.mipLevel,928.levelCount = 1,929.baseArrayLayer = src_base_layer,930.layerCount = region->srcSubresource.layerCount,931};932933if (src_image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT) {934/* If the depth/stencil image uses different sample935* locations, we need them during HTILE decompressions.936*/937struct radv_sample_locations_state *sample_locs =938radv_get_resolve_sample_locations(cmd_buffer);939940barrier.pNext = &(VkSampleLocationsInfoEXT){941.sType = VK_STRUCTURE_TYPE_SAMPLE_LOCATIONS_INFO_EXT,942.sampleLocationsPerPixel = sample_locs->per_pixel,943.sampleLocationGridSize = sample_locs->grid_size,944.sampleLocationsCount = sample_locs->count,945.pSampleLocations = sample_locs->locations,946};947}948949radv_CmdPipelineBarrier(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,950VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, false, 0, NULL, 0, NULL, 1,951&barrier);952}953954955