Path: blob/21.2-virgl/src/amd/vulkan/radv_meta_decompress.c
7237 views
/*1* Copyright © 2016 Intel Corporation2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <assert.h>24#include <stdbool.h>2526#include "radv_meta.h"27#include "radv_private.h"28#include "sid.h"2930enum radv_depth_op {31DEPTH_DECOMPRESS,32DEPTH_RESUMMARIZE,33};3435static VkResult36create_pass(struct radv_device *device, uint32_t samples, VkRenderPass *pass)37{38VkResult result;39VkDevice device_h = radv_device_to_handle(device);40const VkAllocationCallbacks *alloc = &device->meta_state.alloc;41VkAttachmentDescription2 attachment;4243attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;44attachment.pNext = NULL;45attachment.flags = 0;46attachment.format = VK_FORMAT_D32_SFLOAT_S8_UINT;47attachment.samples = samples;48attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;49attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;50attachment.stencilLoadOp = VK_ATTACHMENT_LOAD_OP_LOAD;51attachment.stencilStoreOp = VK_ATTACHMENT_STORE_OP_STORE;52attachment.initialLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;53attachment.finalLayout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL;5455result = radv_CreateRenderPass2(56device_h,57&(VkRenderPassCreateInfo2){58.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,59.attachmentCount = 1,60.pAttachments = &attachment,61.subpassCount = 1,62.pSubpasses =63&(VkSubpassDescription2){64.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,65.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,66.inputAttachmentCount = 0,67.colorAttachmentCount = 0,68.pColorAttachments = NULL,69.pResolveAttachments = NULL,70.pDepthStencilAttachment =71&(VkAttachmentReference2){72.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,73.attachment = 0,74.layout = VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL,75},76.preserveAttachmentCount = 0,77.pPreserveAttachments = NULL,78},79.dependencyCount = 2,80.pDependencies =81(VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,82.srcSubpass = VK_SUBPASS_EXTERNAL,83.dstSubpass = 0,84.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,85.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,86.srcAccessMask = 0,87.dstAccessMask = 0,88.dependencyFlags = 0},89{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,90.srcSubpass = 0,91.dstSubpass = VK_SUBPASS_EXTERNAL,92.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,93.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,94.srcAccessMask = 0,95.dstAccessMask = 0,96.dependencyFlags = 0}},97},98alloc, pass);99100return result;101}102103static VkResult104create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)105{106VkPipelineLayoutCreateInfo pl_create_info = {107.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,108.setLayoutCount = 0,109.pSetLayouts = NULL,110.pushConstantRangeCount = 0,111.pPushConstantRanges = NULL,112};113114return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,115&device->meta_state.alloc, layout);116}117118static VkResult119create_pipeline(struct radv_device *device, uint32_t samples, VkRenderPass pass,120VkPipelineLayout layout, enum radv_depth_op op, VkPipeline *pipeline)121{122VkResult result;123VkDevice device_h = radv_device_to_handle(device);124125mtx_lock(&device->meta_state.mtx);126if (*pipeline) {127mtx_unlock(&device->meta_state.mtx);128return VK_SUCCESS;129}130131nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();132nir_shader *fs_module = radv_meta_build_nir_fs_noop();133134if (!vs_module || !fs_module) {135/* XXX: Need more accurate error */136result = VK_ERROR_OUT_OF_HOST_MEMORY;137goto cleanup;138}139140const VkPipelineSampleLocationsStateCreateInfoEXT sample_locs_create_info = {141.sType = VK_STRUCTURE_TYPE_PIPELINE_SAMPLE_LOCATIONS_STATE_CREATE_INFO_EXT,142.sampleLocationsEnable = false,143};144145const VkGraphicsPipelineCreateInfo pipeline_create_info = {146.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,147.stageCount = 2,148.pStages =149(VkPipelineShaderStageCreateInfo[]){150{151.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,152.stage = VK_SHADER_STAGE_VERTEX_BIT,153.module = vk_shader_module_handle_from_nir(vs_module),154.pName = "main",155},156{157.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,158.stage = VK_SHADER_STAGE_FRAGMENT_BIT,159.module = vk_shader_module_handle_from_nir(fs_module),160.pName = "main",161},162},163.pVertexInputState =164&(VkPipelineVertexInputStateCreateInfo){165.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,166.vertexBindingDescriptionCount = 0,167.vertexAttributeDescriptionCount = 0,168},169.pInputAssemblyState =170&(VkPipelineInputAssemblyStateCreateInfo){171.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,172.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,173.primitiveRestartEnable = false,174},175.pViewportState =176&(VkPipelineViewportStateCreateInfo){177.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,178.viewportCount = 1,179.scissorCount = 1,180},181.pRasterizationState =182&(VkPipelineRasterizationStateCreateInfo){183.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,184.depthClampEnable = false,185.rasterizerDiscardEnable = false,186.polygonMode = VK_POLYGON_MODE_FILL,187.cullMode = VK_CULL_MODE_NONE,188.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,189},190.pMultisampleState =191&(VkPipelineMultisampleStateCreateInfo){192.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,193.pNext = &sample_locs_create_info,194.rasterizationSamples = samples,195.sampleShadingEnable = false,196.pSampleMask = NULL,197.alphaToCoverageEnable = false,198.alphaToOneEnable = false,199},200.pColorBlendState =201&(VkPipelineColorBlendStateCreateInfo){202.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,203.logicOpEnable = false,204.attachmentCount = 0,205.pAttachments = NULL,206},207.pDepthStencilState =208&(VkPipelineDepthStencilStateCreateInfo){209.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,210.depthTestEnable = false,211.depthWriteEnable = false,212.depthBoundsTestEnable = false,213.stencilTestEnable = false,214},215.pDynamicState =216&(VkPipelineDynamicStateCreateInfo){217.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,218.dynamicStateCount = 3,219.pDynamicStates =220(VkDynamicState[]){221VK_DYNAMIC_STATE_VIEWPORT,222VK_DYNAMIC_STATE_SCISSOR,223VK_DYNAMIC_STATE_SAMPLE_LOCATIONS_EXT,224},225},226.layout = layout,227.renderPass = pass,228.subpass = 0,229};230231struct radv_graphics_pipeline_create_info extra = {232.use_rectlist = true,233.depth_compress_disable = true,234.stencil_compress_disable = true,235.resummarize_enable = op == DEPTH_RESUMMARIZE,236};237238result = radv_graphics_pipeline_create(239device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache), &pipeline_create_info,240&extra, &device->meta_state.alloc, pipeline);241242cleanup:243ralloc_free(fs_module);244ralloc_free(vs_module);245mtx_unlock(&device->meta_state.mtx);246return result;247}248249void250radv_device_finish_meta_depth_decomp_state(struct radv_device *device)251{252struct radv_meta_state *state = &device->meta_state;253254for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {255radv_DestroyRenderPass(radv_device_to_handle(device), state->depth_decomp[i].pass,256&state->alloc);257radv_DestroyPipelineLayout(radv_device_to_handle(device), state->depth_decomp[i].p_layout,258&state->alloc);259260radv_DestroyPipeline(radv_device_to_handle(device),261state->depth_decomp[i].decompress_pipeline, &state->alloc);262radv_DestroyPipeline(radv_device_to_handle(device),263state->depth_decomp[i].resummarize_pipeline, &state->alloc);264}265}266267VkResult268radv_device_init_meta_depth_decomp_state(struct radv_device *device, bool on_demand)269{270struct radv_meta_state *state = &device->meta_state;271VkResult res = VK_SUCCESS;272273for (uint32_t i = 0; i < ARRAY_SIZE(state->depth_decomp); ++i) {274uint32_t samples = 1 << i;275276res = create_pass(device, samples, &state->depth_decomp[i].pass);277if (res != VK_SUCCESS)278goto fail;279280res = create_pipeline_layout(device, &state->depth_decomp[i].p_layout);281if (res != VK_SUCCESS)282goto fail;283284if (on_demand)285continue;286287res = create_pipeline(device, samples, state->depth_decomp[i].pass,288state->depth_decomp[i].p_layout, DEPTH_DECOMPRESS,289&state->depth_decomp[i].decompress_pipeline);290if (res != VK_SUCCESS)291goto fail;292293res = create_pipeline(device, samples, state->depth_decomp[i].pass,294state->depth_decomp[i].p_layout, DEPTH_RESUMMARIZE,295&state->depth_decomp[i].resummarize_pipeline);296if (res != VK_SUCCESS)297goto fail;298}299300return VK_SUCCESS;301302fail:303radv_device_finish_meta_depth_decomp_state(device);304return res;305}306307static VkPipeline *308radv_get_depth_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,309const VkImageSubresourceRange *subresourceRange, enum radv_depth_op op)310{311struct radv_meta_state *state = &cmd_buffer->device->meta_state;312uint32_t samples = image->info.samples;313uint32_t samples_log2 = ffs(samples) - 1;314VkPipeline *pipeline;315316if (!state->depth_decomp[samples_log2].decompress_pipeline) {317VkResult ret;318319ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,320state->depth_decomp[samples_log2].p_layout, DEPTH_DECOMPRESS,321&state->depth_decomp[samples_log2].decompress_pipeline);322if (ret != VK_SUCCESS) {323cmd_buffer->record_result = ret;324return NULL;325}326327ret = create_pipeline(cmd_buffer->device, samples, state->depth_decomp[samples_log2].pass,328state->depth_decomp[samples_log2].p_layout, DEPTH_RESUMMARIZE,329&state->depth_decomp[samples_log2].resummarize_pipeline);330if (ret != VK_SUCCESS) {331cmd_buffer->record_result = ret;332return NULL;333}334}335336switch (op) {337case DEPTH_DECOMPRESS:338pipeline = &state->depth_decomp[samples_log2].decompress_pipeline;339break;340case DEPTH_RESUMMARIZE:341pipeline = &state->depth_decomp[samples_log2].resummarize_pipeline;342break;343default:344unreachable("unknown operation");345}346347return pipeline;348}349350static void351radv_process_depth_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,352const VkImageSubresourceRange *range, int level, int layer)353{354struct radv_device *device = cmd_buffer->device;355struct radv_meta_state *state = &device->meta_state;356uint32_t samples_log2 = ffs(image->info.samples) - 1;357struct radv_image_view iview;358uint32_t width, height;359360width = radv_minify(image->info.width, range->baseMipLevel + level);361height = radv_minify(image->info.height, range->baseMipLevel + level);362363radv_image_view_init(&iview, device,364&(VkImageViewCreateInfo){365.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,366.image = radv_image_to_handle(image),367.viewType = radv_meta_get_view_type(image),368.format = image->vk_format,369.subresourceRange =370{371.aspectMask = VK_IMAGE_ASPECT_DEPTH_BIT,372.baseMipLevel = range->baseMipLevel + level,373.levelCount = 1,374.baseArrayLayer = range->baseArrayLayer + layer,375.layerCount = 1,376},377},378NULL);379380VkFramebuffer fb_h;381radv_CreateFramebuffer(382radv_device_to_handle(device),383&(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,384.attachmentCount = 1,385.pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},386.width = width,387.height = height,388.layers = 1},389&cmd_buffer->pool->alloc, &fb_h);390391radv_cmd_buffer_begin_render_pass(cmd_buffer,392&(VkRenderPassBeginInfo){393.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,394.renderPass = state->depth_decomp[samples_log2].pass,395.framebuffer = fb_h,396.renderArea = {.offset =397{3980,3990,400},401.extent =402{403width,404height,405}},406.clearValueCount = 0,407.pClearValues = NULL,408},409NULL);410radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);411412radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);413radv_cmd_buffer_end_render_pass(cmd_buffer);414415radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);416}417418static void419radv_process_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,420const VkImageSubresourceRange *subresourceRange,421struct radv_sample_locations_state *sample_locs, enum radv_depth_op op)422{423struct radv_meta_saved_state saved_state;424VkCommandBuffer cmd_buffer_h = radv_cmd_buffer_to_handle(cmd_buffer);425VkPipeline *pipeline;426427radv_meta_save(428&saved_state, cmd_buffer,429RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_SAMPLE_LOCATIONS | RADV_META_SAVE_PASS);430431pipeline = radv_get_depth_pipeline(cmd_buffer, image, subresourceRange, op);432433radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,434*pipeline);435436if (sample_locs) {437assert(image->flags & VK_IMAGE_CREATE_SAMPLE_LOCATIONS_COMPATIBLE_DEPTH_BIT_EXT);438439/* Set the sample locations specified during explicit or440* automatic layout transitions, otherwise the depth decompress441* pass uses the default HW locations.442*/443radv_CmdSetSampleLocationsEXT(cmd_buffer_h,444&(VkSampleLocationsInfoEXT){445.sampleLocationsPerPixel = sample_locs->per_pixel,446.sampleLocationGridSize = sample_locs->grid_size,447.sampleLocationsCount = sample_locs->count,448.pSampleLocations = sample_locs->locations,449});450}451452for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {453454/* Do not decompress levels without HTILE. */455if (!radv_htile_enabled(image, subresourceRange->baseMipLevel + l))456continue;457458uint32_t width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);459uint32_t height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);460461radv_CmdSetViewport(cmd_buffer_h, 0, 1,462&(VkViewport){.x = 0,463.y = 0,464.width = width,465.height = height,466.minDepth = 0.0f,467.maxDepth = 1.0f});468469radv_CmdSetScissor(cmd_buffer_h, 0, 1,470&(VkRect2D){471.offset = {0, 0},472.extent = {width, height},473});474475for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {476radv_process_depth_image_layer(cmd_buffer, image, subresourceRange, l, s);477}478}479480radv_meta_restore(&saved_state, cmd_buffer);481}482483void484radv_decompress_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,485const VkImageSubresourceRange *subresourceRange,486struct radv_sample_locations_state *sample_locs)487{488struct radv_barrier_data barrier = {0};489490barrier.layout_transitions.depth_stencil_expand = 1;491radv_describe_layout_transition(cmd_buffer, &barrier);492493assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);494radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_DECOMPRESS);495}496497void498radv_resummarize_depth_stencil(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,499const VkImageSubresourceRange *subresourceRange,500struct radv_sample_locations_state *sample_locs)501{502struct radv_barrier_data barrier = {0};503504barrier.layout_transitions.depth_stencil_resummarize = 1;505radv_describe_layout_transition(cmd_buffer, &barrier);506507assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);508radv_process_depth_stencil(cmd_buffer, image, subresourceRange, sample_locs, DEPTH_RESUMMARIZE);509}510511512