Path: blob/21.2-virgl/src/amd/vulkan/radv_meta_resolve_cs.c
7236 views
/*1* Copyright © 2016 Dave Airlie2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include <assert.h>24#include <stdbool.h>2526#include "nir/nir_builder.h"27#include "radv_meta.h"28#include "radv_private.h"29#include "sid.h"30#include "vk_format.h"3132static nir_ssa_def *33radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)34{35unsigned i;3637nir_ssa_def *cmp[3];38for (i = 0; i < 3; i++)39cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c));4041nir_ssa_def *ltvals[3];42for (i = 0; i < 3; i++)43ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));4445nir_ssa_def *gtvals[3];4647for (i = 0; i < 3; i++) {48gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4));49gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));50gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));51}5253nir_ssa_def *comp[4];54for (i = 0; i < 3; i++)55comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);56comp[3] = nir_channels(b, input, 1 << 3);57return nir_vec(b, comp, 4);58}5960static nir_shader *61build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)62{63const struct glsl_type *sampler_type =64glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);65const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);66nir_builder b =67nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs-%d-%s", samples,68is_integer ? "int" : (is_srgb ? "srgb" : "float"));69b.shader->info.workgroup_size[0] = 8;70b.shader->info.workgroup_size[1] = 8;71b.shader->info.workgroup_size[2] = 1;7273nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");74input_img->data.descriptor_set = 0;75input_img->data.binding = 0;7677nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");78output_img->data.descriptor_set = 0;79output_img->data.binding = 1;80nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);81nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);82nir_ssa_def *block_size =83nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],84b.shader->info.workgroup_size[2], 0);8586nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);8788nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);89nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);9091nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, src_offset), 0x3);92nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");9394radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, img_coord);9596nir_ssa_def *outval = nir_load_var(&b, color);97if (is_srgb)98outval = radv_meta_build_resolve_srgb_conversion(&b, outval);99100nir_ssa_def *coord = nir_iadd(&b, global_id, dst_offset);101nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,102nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));103return b.shader;104}105106enum {107DEPTH_RESOLVE,108STENCIL_RESOLVE,109};110111static const char *112get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)113{114switch (resolve_mode) {115case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:116return "zero";117case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:118return "average";119case VK_RESOLVE_MODE_MIN_BIT_KHR:120return "min";121case VK_RESOLVE_MODE_MAX_BIT_KHR:122return "max";123default:124unreachable("invalid resolve mode");125}126}127128static nir_shader *129build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,130VkResolveModeFlagBits resolve_mode)131{132const struct glsl_type *sampler_type =133glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);134const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);135136nir_builder b = nir_builder_init_simple_shader(137MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs_%s-%s-%d",138index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);139b.shader->info.workgroup_size[0] = 8;140b.shader->info.workgroup_size[1] = 8;141b.shader->info.workgroup_size[2] = 1;142143nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");144input_img->data.descriptor_set = 0;145input_img->data.binding = 0;146147nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");148output_img->data.descriptor_set = 0;149output_img->data.binding = 1;150nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);151nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);152nir_ssa_def *block_size =153nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],154b.shader->info.workgroup_size[2], 0);155156nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);157nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);158159nir_ssa_def *img_coord =160nir_vec3(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), layer_id);161162nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;163164nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;165166nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);167tex->sampler_dim = GLSL_SAMPLER_DIM_MS;168tex->op = nir_texop_txf_ms;169tex->src[0].src_type = nir_tex_src_coord;170tex->src[0].src = nir_src_for_ssa(img_coord);171tex->src[1].src_type = nir_tex_src_ms_index;172tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));173tex->src[2].src_type = nir_tex_src_texture_deref;174tex->src[2].src = nir_src_for_ssa(input_img_deref);175tex->dest_type = type;176tex->is_array = true;177tex->coord_components = 3;178179nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");180nir_builder_instr_insert(&b, &tex->instr);181182nir_ssa_def *outval = &tex->dest.ssa;183184if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {185for (int i = 1; i < samples; i++) {186nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);187tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;188tex_add->op = nir_texop_txf_ms;189tex_add->src[0].src_type = nir_tex_src_coord;190tex_add->src[0].src = nir_src_for_ssa(img_coord);191tex_add->src[1].src_type = nir_tex_src_ms_index;192tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));193tex_add->src[2].src_type = nir_tex_src_texture_deref;194tex_add->src[2].src = nir_src_for_ssa(input_img_deref);195tex_add->dest_type = type;196tex_add->is_array = true;197tex_add->coord_components = 3;198199nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");200nir_builder_instr_insert(&b, &tex_add->instr);201202switch (resolve_mode) {203case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:204assert(index == DEPTH_RESOLVE);205outval = nir_fadd(&b, outval, &tex_add->dest.ssa);206break;207case VK_RESOLVE_MODE_MIN_BIT_KHR:208if (index == DEPTH_RESOLVE)209outval = nir_fmin(&b, outval, &tex_add->dest.ssa);210else211outval = nir_umin(&b, outval, &tex_add->dest.ssa);212break;213case VK_RESOLVE_MODE_MAX_BIT_KHR:214if (index == DEPTH_RESOLVE)215outval = nir_fmax(&b, outval, &tex_add->dest.ssa);216else217outval = nir_umax(&b, outval, &tex_add->dest.ssa);218break;219default:220unreachable("invalid resolve mode");221}222}223224if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)225outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));226}227228nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),229nir_channel(&b, img_coord, 2), nir_imm_int(&b, 0));230nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,231nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));232return b.shader;233}234235static VkResult236create_layout(struct radv_device *device)237{238VkResult result;239/*240* two descriptors one for the image being sampled241* one for the buffer being written.242*/243VkDescriptorSetLayoutCreateInfo ds_create_info = {244.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,245.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,246.bindingCount = 2,247.pBindings = (VkDescriptorSetLayoutBinding[]){248{.binding = 0,249.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,250.descriptorCount = 1,251.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,252.pImmutableSamplers = NULL},253{.binding = 1,254.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,255.descriptorCount = 1,256.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,257.pImmutableSamplers = NULL},258}};259260result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,261&device->meta_state.alloc,262&device->meta_state.resolve_compute.ds_layout);263if (result != VK_SUCCESS)264goto fail;265266VkPipelineLayoutCreateInfo pl_create_info = {267.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,268.setLayoutCount = 1,269.pSetLayouts = &device->meta_state.resolve_compute.ds_layout,270.pushConstantRangeCount = 1,271.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},272};273274result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,275&device->meta_state.alloc,276&device->meta_state.resolve_compute.p_layout);277if (result != VK_SUCCESS)278goto fail;279return VK_SUCCESS;280fail:281return result;282}283284static VkResult285create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,286VkPipeline *pipeline)287{288VkResult result;289290mtx_lock(&device->meta_state.mtx);291if (*pipeline) {292mtx_unlock(&device->meta_state.mtx);293return VK_SUCCESS;294}295296nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);297298/* compute shader */299300VkPipelineShaderStageCreateInfo pipeline_shader_stage = {301.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,302.stage = VK_SHADER_STAGE_COMPUTE_BIT,303.module = vk_shader_module_handle_from_nir(cs),304.pName = "main",305.pSpecializationInfo = NULL,306};307308VkComputePipelineCreateInfo vk_pipeline_info = {309.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,310.stage = pipeline_shader_stage,311.flags = 0,312.layout = device->meta_state.resolve_compute.p_layout,313};314315result = radv_CreateComputePipelines(radv_device_to_handle(device),316radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,317&vk_pipeline_info, NULL, pipeline);318if (result != VK_SUCCESS)319goto fail;320321ralloc_free(cs);322mtx_unlock(&device->meta_state.mtx);323return VK_SUCCESS;324fail:325ralloc_free(cs);326mtx_unlock(&device->meta_state.mtx);327return result;328}329330static VkResult331create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,332VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)333{334VkResult result;335336mtx_lock(&device->meta_state.mtx);337if (*pipeline) {338mtx_unlock(&device->meta_state.mtx);339return VK_SUCCESS;340}341342nir_shader *cs =343build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);344345/* compute shader */346VkPipelineShaderStageCreateInfo pipeline_shader_stage = {347.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,348.stage = VK_SHADER_STAGE_COMPUTE_BIT,349.module = vk_shader_module_handle_from_nir(cs),350.pName = "main",351.pSpecializationInfo = NULL,352};353354VkComputePipelineCreateInfo vk_pipeline_info = {355.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,356.stage = pipeline_shader_stage,357.flags = 0,358.layout = device->meta_state.resolve_compute.p_layout,359};360361result = radv_CreateComputePipelines(radv_device_to_handle(device),362radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,363&vk_pipeline_info, NULL, pipeline);364if (result != VK_SUCCESS)365goto fail;366367ralloc_free(cs);368mtx_unlock(&device->meta_state.mtx);369return VK_SUCCESS;370fail:371ralloc_free(cs);372mtx_unlock(&device->meta_state.mtx);373return result;374}375376VkResult377radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)378{379struct radv_meta_state *state = &device->meta_state;380VkResult res;381382res = create_layout(device);383if (res != VK_SUCCESS)384goto fail;385386if (on_demand)387return VK_SUCCESS;388389for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {390uint32_t samples = 1 << i;391392res = create_resolve_pipeline(device, samples, false, false,393&state->resolve_compute.rc[i].pipeline);394if (res != VK_SUCCESS)395goto fail;396397res = create_resolve_pipeline(device, samples, true, false,398&state->resolve_compute.rc[i].i_pipeline);399if (res != VK_SUCCESS)400goto fail;401402res = create_resolve_pipeline(device, samples, false, true,403&state->resolve_compute.rc[i].srgb_pipeline);404if (res != VK_SUCCESS)405goto fail;406407res = create_depth_stencil_resolve_pipeline(408device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT_KHR,409&state->resolve_compute.depth[i].average_pipeline);410if (res != VK_SUCCESS)411goto fail;412413res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,414VK_RESOLVE_MODE_MAX_BIT_KHR,415&state->resolve_compute.depth[i].max_pipeline);416if (res != VK_SUCCESS)417goto fail;418419res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,420VK_RESOLVE_MODE_MIN_BIT_KHR,421&state->resolve_compute.depth[i].min_pipeline);422if (res != VK_SUCCESS)423goto fail;424425res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,426VK_RESOLVE_MODE_MAX_BIT_KHR,427&state->resolve_compute.stencil[i].max_pipeline);428if (res != VK_SUCCESS)429goto fail;430431res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,432VK_RESOLVE_MODE_MIN_BIT_KHR,433&state->resolve_compute.stencil[i].min_pipeline);434if (res != VK_SUCCESS)435goto fail;436}437438res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,439VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,440&state->resolve_compute.depth_zero_pipeline);441if (res != VK_SUCCESS)442goto fail;443444res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,445VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,446&state->resolve_compute.stencil_zero_pipeline);447if (res != VK_SUCCESS)448goto fail;449450return VK_SUCCESS;451fail:452radv_device_finish_meta_resolve_compute_state(device);453return res;454}455456void457radv_device_finish_meta_resolve_compute_state(struct radv_device *device)458{459struct radv_meta_state *state = &device->meta_state;460for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {461radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,462&state->alloc);463464radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,465&state->alloc);466467radv_DestroyPipeline(radv_device_to_handle(device),468state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);469470radv_DestroyPipeline(radv_device_to_handle(device),471state->resolve_compute.depth[i].average_pipeline, &state->alloc);472473radv_DestroyPipeline(radv_device_to_handle(device),474state->resolve_compute.depth[i].max_pipeline, &state->alloc);475476radv_DestroyPipeline(radv_device_to_handle(device),477state->resolve_compute.depth[i].min_pipeline, &state->alloc);478479radv_DestroyPipeline(radv_device_to_handle(device),480state->resolve_compute.stencil[i].max_pipeline, &state->alloc);481482radv_DestroyPipeline(radv_device_to_handle(device),483state->resolve_compute.stencil[i].min_pipeline, &state->alloc);484}485486radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,487&state->alloc);488489radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,490&state->alloc);491492radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,493&state->alloc);494radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,495&state->alloc);496}497498static VkPipeline *499radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)500{501struct radv_device *device = cmd_buffer->device;502struct radv_meta_state *state = &device->meta_state;503uint32_t samples = src_iview->image->info.samples;504uint32_t samples_log2 = ffs(samples) - 1;505VkPipeline *pipeline;506507if (vk_format_is_int(src_iview->vk_format))508pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;509else if (vk_format_is_srgb(src_iview->vk_format))510pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;511else512pipeline = &state->resolve_compute.rc[samples_log2].pipeline;513514if (!*pipeline) {515VkResult ret;516517ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk_format),518vk_format_is_srgb(src_iview->vk_format), pipeline);519if (ret != VK_SUCCESS) {520cmd_buffer->record_result = ret;521return NULL;522}523}524525return pipeline;526}527528static void529emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,530struct radv_image_view *dest_iview, const VkOffset2D *src_offset,531const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)532{533struct radv_device *device = cmd_buffer->device;534VkPipeline *pipeline;535536radv_meta_push_descriptor_set(537cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,5380, /* set */5392, /* descriptorWriteCount */540(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,541.dstBinding = 0,542.dstArrayElement = 0,543.descriptorCount = 1,544.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,545.pImageInfo =546(VkDescriptorImageInfo[]){547{.sampler = VK_NULL_HANDLE,548.imageView = radv_image_view_to_handle(src_iview),549.imageLayout = VK_IMAGE_LAYOUT_GENERAL},550}},551{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,552.dstBinding = 1,553.dstArrayElement = 0,554.descriptorCount = 1,555.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,556.pImageInfo = (VkDescriptorImageInfo[]){557{558.sampler = VK_NULL_HANDLE,559.imageView = radv_image_view_to_handle(dest_iview),560.imageLayout = VK_IMAGE_LAYOUT_GENERAL,561},562}}});563564pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);565566radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,567*pipeline);568569unsigned push_constants[4] = {570src_offset->x,571src_offset->y,572dest_offset->x,573dest_offset->y,574};575radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),576device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,5770, 16, push_constants);578radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);579}580581static void582emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,583struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent,584VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)585{586struct radv_device *device = cmd_buffer->device;587const uint32_t samples = src_iview->image->info.samples;588const uint32_t samples_log2 = ffs(samples) - 1;589VkPipeline *pipeline;590591radv_meta_push_descriptor_set(592cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,5930, /* set */5942, /* descriptorWriteCount */595(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,596.dstBinding = 0,597.dstArrayElement = 0,598.descriptorCount = 1,599.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,600.pImageInfo =601(VkDescriptorImageInfo[]){602{.sampler = VK_NULL_HANDLE,603.imageView = radv_image_view_to_handle(src_iview),604.imageLayout = VK_IMAGE_LAYOUT_GENERAL},605}},606{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,607.dstBinding = 1,608.dstArrayElement = 0,609.descriptorCount = 1,610.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,611.pImageInfo = (VkDescriptorImageInfo[]){612{613.sampler = VK_NULL_HANDLE,614.imageView = radv_image_view_to_handle(dest_iview),615.imageLayout = VK_IMAGE_LAYOUT_GENERAL,616},617}}});618619switch (resolve_mode) {620case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:621if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)622pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;623else624pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;625break;626case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:627assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);628pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;629break;630case VK_RESOLVE_MODE_MIN_BIT_KHR:631if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)632pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;633else634pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;635break;636case VK_RESOLVE_MODE_MAX_BIT_KHR:637if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)638pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;639else640pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;641break;642default:643unreachable("invalid resolve mode");644}645646if (!*pipeline) {647int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;648VkResult ret;649650ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);651if (ret != VK_SUCCESS) {652cmd_buffer->record_result = ret;653return;654}655}656657radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,658*pipeline);659660radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,661resolve_extent->depth);662}663664void665radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,666VkFormat src_format, VkImageLayout src_image_layout,667struct radv_image *dest_image, VkFormat dest_format,668VkImageLayout dest_image_layout, const VkImageResolve2KHR *region)669{670struct radv_meta_saved_state saved_state;671672radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);673674/* For partial resolves, DCC should be decompressed before resolving675* because the metadata is re-initialized to the uncompressed after.676*/677uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index,678cmd_buffer->queue_family_index);679680if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&681radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,682dest_image_layout, false, queue_mask) &&683(region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||684region->extent.width != dest_image->info.width ||685region->extent.height != dest_image->info.height ||686region->extent.depth != dest_image->info.depth)) {687radv_decompress_dcc(cmd_buffer, dest_image,688&(VkImageSubresourceRange){689.aspectMask = region->dstSubresource.aspectMask,690.baseMipLevel = region->dstSubresource.mipLevel,691.levelCount = 1,692.baseArrayLayer = region->dstSubresource.baseArrayLayer,693.layerCount = region->dstSubresource.layerCount,694});695}696697radv_meta_save(698&saved_state, cmd_buffer,699RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);700701assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);702assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);703assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);704705const uint32_t src_base_layer =706radv_meta_get_iview_layer(src_image, ®ion->srcSubresource, ®ion->srcOffset);707708const uint32_t dest_base_layer =709radv_meta_get_iview_layer(dest_image, ®ion->dstSubresource, ®ion->dstOffset);710711const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);712const struct VkOffset3D srcOffset =713radv_sanitize_image_offset(src_image->type, region->srcOffset);714const struct VkOffset3D dstOffset =715radv_sanitize_image_offset(dest_image->type, region->dstOffset);716717for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {718719struct radv_image_view src_iview;720radv_image_view_init(&src_iview, cmd_buffer->device,721&(VkImageViewCreateInfo){722.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,723.image = radv_image_to_handle(src_image),724.viewType = radv_meta_get_view_type(src_image),725.format = src_format,726.subresourceRange =727{728.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,729.baseMipLevel = region->srcSubresource.mipLevel,730.levelCount = 1,731.baseArrayLayer = src_base_layer + layer,732.layerCount = 1,733},734},735NULL);736737struct radv_image_view dest_iview;738radv_image_view_init(&dest_iview, cmd_buffer->device,739&(VkImageViewCreateInfo){740.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,741.image = radv_image_to_handle(dest_image),742.viewType = radv_meta_get_view_type(dest_image),743.format = vk_to_non_srgb_format(dest_format),744.subresourceRange =745{746.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,747.baseMipLevel = region->dstSubresource.mipLevel,748.levelCount = 1,749.baseArrayLayer = dest_base_layer + layer,750.layerCount = 1,751},752},753NULL);754755emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},756&(VkOffset2D){dstOffset.x, dstOffset.y},757&(VkExtent2D){extent.width, extent.height});758}759760radv_meta_restore(&saved_state, cmd_buffer);761762if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&763radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,764dest_image_layout, false, queue_mask)) {765766cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;767768VkImageSubresourceRange range = {769.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,770.baseMipLevel = region->dstSubresource.mipLevel,771.levelCount = 1,772.baseArrayLayer = dest_base_layer,773.layerCount = region->dstSubresource.layerCount,774};775776cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);777}778}779780/**781* Emit any needed resolves for the current subpass.782*/783void784radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)785{786struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;787const struct radv_subpass *subpass = cmd_buffer->state.subpass;788struct radv_subpass_barrier barrier;789uint32_t layer_count = fb->layers;790791if (subpass->view_mask)792layer_count = util_last_bit(subpass->view_mask);793794/* Resolves happen before the end-of-subpass barriers get executed, so795* we have to make the attachment shader-readable.796*/797barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;798barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;799barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;800radv_subpass_barrier(cmd_buffer, &barrier);801802for (uint32_t i = 0; i < subpass->color_count; ++i) {803struct radv_subpass_attachment src_att = subpass->color_attachments[i];804struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];805806if (dst_att.attachment == VK_ATTACHMENT_UNUSED)807continue;808809struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;810struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;811812VkImageResolve2KHR region = {813.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,814.extent = (VkExtent3D){fb->width, fb->height, 1},815.srcSubresource =816(VkImageSubresourceLayers){817.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,818.mipLevel = src_iview->base_mip,819.baseArrayLayer = src_iview->base_layer,820.layerCount = layer_count,821},822.dstSubresource =823(VkImageSubresourceLayers){824.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,825.mipLevel = dst_iview->base_mip,826.baseArrayLayer = dst_iview->base_layer,827.layerCount = layer_count,828},829.srcOffset = (VkOffset3D){0, 0, 0},830.dstOffset = (VkOffset3D){0, 0, 0},831};832833radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk_format,834src_att.layout, dst_iview->image, dst_iview->vk_format,835dst_att.layout, ®ion);836}837838cmd_buffer->state.flush_bits |=839RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |840radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);841}842843void844radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,845VkImageAspectFlags aspects,846VkResolveModeFlagBits resolve_mode)847{848struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;849const struct radv_subpass *subpass = cmd_buffer->state.subpass;850struct radv_meta_saved_state saved_state;851uint32_t layer_count = fb->layers;852853if (subpass->view_mask)854layer_count = util_last_bit(subpass->view_mask);855856/* Resolves happen before the end-of-subpass barriers get executed, so857* we have to make the attachment shader-readable.858*/859cmd_buffer->state.flush_bits |=860radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |861radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, NULL) |862radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);863864struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;865struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;866struct radv_image *src_image = src_iview->image;867868VkImageResolve2KHR region = {0};869region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;870region.srcSubresource.aspectMask = aspects;871region.srcSubresource.mipLevel = 0;872region.srcSubresource.baseArrayLayer = src_iview->base_layer;873region.srcSubresource.layerCount = layer_count;874875radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, ®ion);876877radv_meta_save(&saved_state, cmd_buffer,878RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);879880struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;881struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;882struct radv_image *dst_image = dst_iview->image;883884struct radv_image_view tsrc_iview;885radv_image_view_init(&tsrc_iview, cmd_buffer->device,886&(VkImageViewCreateInfo){887.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,888.image = radv_image_to_handle(src_image),889.viewType = radv_meta_get_view_type(src_image),890.format = src_iview->vk_format,891.subresourceRange =892{893.aspectMask = aspects,894.baseMipLevel = src_iview->base_mip,895.levelCount = 1,896.baseArrayLayer = src_iview->base_layer,897.layerCount = layer_count,898},899},900NULL);901902struct radv_image_view tdst_iview;903radv_image_view_init(&tdst_iview, cmd_buffer->device,904&(VkImageViewCreateInfo){905.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,906.image = radv_image_to_handle(dst_image),907.viewType = radv_meta_get_view_type(dst_image),908.format = dst_iview->vk_format,909.subresourceRange =910{911.aspectMask = aspects,912.baseMipLevel = dst_iview->base_mip,913.levelCount = 1,914.baseArrayLayer = dst_iview->base_layer,915.layerCount = layer_count,916},917},918NULL);919920emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,921&(VkExtent3D){fb->width, fb->height, layer_count}, aspects,922resolve_mode);923924cmd_buffer->state.flush_bits |=925RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |926radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);927928VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;929uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index,930cmd_buffer->queue_family_index);931932if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) {933VkImageSubresourceRange range = {0};934range.aspectMask = aspects;935range.baseMipLevel = dst_iview->base_mip;936range.levelCount = 1;937range.baseArrayLayer = dst_iview->base_layer;938range.layerCount = layer_count;939940uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);941942cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);943}944945radv_meta_restore(&saved_state, cmd_buffer);946}947948949