Path: blob/21.2-virgl/src/amd/vulkan/radv_meta_dcc_retile.c
7326 views
/*1* Copyright © 2021 Google2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#define AC_SURFACE_INCLUDE_NIR24#include "ac_surface.h"2526#include "radv_meta.h"27#include "radv_private.h"2829static nir_ssa_def *30get_global_ids(nir_builder *b, unsigned num_components)31{32unsigned mask = BITFIELD_MASK(num_components);3334nir_ssa_def *local_ids = nir_channels(b, nir_load_local_invocation_id(b), mask);35nir_ssa_def *block_ids = nir_channels(b, nir_load_workgroup_id(b, 32), mask);36nir_ssa_def *block_size = nir_channels(37b,38nir_imm_ivec4(b, b->shader->info.workgroup_size[0], b->shader->info.workgroup_size[1],39b->shader->info.workgroup_size[2], 0),40mask);4142return nir_iadd(b, nir_imul(b, block_ids, block_size), local_ids);43}4445static nir_shader *46build_dcc_retile_compute_shader(struct radv_device *dev, struct radeon_surf *surf)47{48const struct glsl_type *buf_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_UINT);49nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_retile_compute");5051b.shader->info.workgroup_size[0] = 8;52b.shader->info.workgroup_size[1] = 8;53b.shader->info.workgroup_size[2] = 1;5455nir_ssa_def *src_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 8);56nir_ssa_def *src_dcc_pitch = nir_channels(&b, src_dcc_size, 1);57nir_ssa_def *src_dcc_height = nir_channels(&b, src_dcc_size, 2);5859nir_ssa_def *dst_dcc_size = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 8);60nir_ssa_def *dst_dcc_pitch = nir_channels(&b, dst_dcc_size, 1);61nir_ssa_def *dst_dcc_height = nir_channels(&b, dst_dcc_size, 2);62nir_variable *input_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_in");63input_dcc->data.descriptor_set = 0;64input_dcc->data.binding = 0;65nir_variable *output_dcc = nir_variable_create(b.shader, nir_var_uniform, buf_type, "dcc_out");66output_dcc->data.descriptor_set = 0;67output_dcc->data.binding = 1;6869nir_ssa_def *input_dcc_ref = &nir_build_deref_var(&b, input_dcc)->dest.ssa;70nir_ssa_def *output_dcc_ref = &nir_build_deref_var(&b, output_dcc)->dest.ssa;7172nir_ssa_def *coord = get_global_ids(&b, 2);73nir_ssa_def *zero = nir_imm_int(&b, 0);74coord = nir_imul(75&b, coord,76nir_imm_ivec2(&b, surf->u.gfx9.color.dcc_block_width, surf->u.gfx9.color.dcc_block_height));7778nir_ssa_def *src = ac_nir_dcc_addr_from_coord(&b, &dev->physical_device->rad_info, surf->bpe,79&surf->u.gfx9.color.dcc_equation, src_dcc_pitch,80src_dcc_height, zero, nir_channel(&b, coord, 0),81nir_channel(&b, coord, 1), zero, zero, zero);82nir_ssa_def *dst = ac_nir_dcc_addr_from_coord(83&b, &dev->physical_device->rad_info, surf->bpe, &surf->u.gfx9.color.display_dcc_equation,84dst_dcc_pitch, dst_dcc_height, zero, nir_channel(&b, coord, 0), nir_channel(&b, coord, 1),85zero, zero, zero);8687nir_intrinsic_instr *dcc_val =88nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_load);89dcc_val->num_components = 1;90dcc_val->src[0] = nir_src_for_ssa(input_dcc_ref);91dcc_val->src[1] = nir_src_for_ssa(nir_vec4(&b, src, src, src, src));92dcc_val->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));93dcc_val->src[3] = nir_src_for_ssa(nir_imm_int(&b, 0));94nir_ssa_dest_init(&dcc_val->instr, &dcc_val->dest, 1, 32, "dcc_val");95nir_builder_instr_insert(&b, &dcc_val->instr);9697nir_intrinsic_instr *store =98nir_intrinsic_instr_create(b.shader, nir_intrinsic_image_deref_store);99store->num_components = 1;100store->src[0] = nir_src_for_ssa(output_dcc_ref);101store->src[1] = nir_src_for_ssa(nir_vec4(&b, dst, dst, dst, dst));102store->src[2] = nir_src_for_ssa(nir_ssa_undef(&b, 1, 32));103store->src[3] = nir_src_for_ssa(&dcc_val->dest.ssa);104store->src[4] = nir_src_for_ssa(nir_imm_int(&b, 0));105106nir_builder_instr_insert(&b, &store->instr);107return b.shader;108}109110void111radv_device_finish_meta_dcc_retile_state(struct radv_device *device)112{113struct radv_meta_state *state = &device->meta_state;114115radv_DestroyPipeline(radv_device_to_handle(device), state->dcc_retile.pipeline, &state->alloc);116radv_DestroyPipelineLayout(radv_device_to_handle(device), state->dcc_retile.p_layout,117&state->alloc);118radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->dcc_retile.ds_layout,119&state->alloc);120121/* Reset for next finish. */122memset(&state->dcc_retile, 0, sizeof(state->dcc_retile));123}124125/*126* This take a surface, but the only things used are:127* - BPE128* - DCC equations129* - DCC block size130*131* BPE is always 4 at the moment and the rest is derived from the tilemode,132* and ac_surface limits displayable DCC to at most 1 tiling mode. So in effect133* this shader is indepedent of the surface.134*/135static VkResult136radv_device_init_meta_dcc_retile_state(struct radv_device *device, struct radeon_surf *surf)137{138VkResult result = VK_SUCCESS;139nir_shader *cs = build_dcc_retile_compute_shader(device, surf);140141VkDescriptorSetLayoutCreateInfo ds_create_info = {142.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,143.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,144.bindingCount = 2,145.pBindings = (VkDescriptorSetLayoutBinding[]){146{.binding = 0,147.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,148.descriptorCount = 1,149.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,150.pImmutableSamplers = NULL},151{.binding = 1,152.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,153.descriptorCount = 1,154.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,155.pImmutableSamplers = NULL},156}};157158result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,159&device->meta_state.alloc,160&device->meta_state.dcc_retile.ds_layout);161if (result != VK_SUCCESS)162goto cleanup;163164VkPipelineLayoutCreateInfo pl_create_info = {165.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,166.setLayoutCount = 1,167.pSetLayouts = &device->meta_state.dcc_retile.ds_layout,168.pushConstantRangeCount = 1,169.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},170};171172result =173radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,174&device->meta_state.alloc, &device->meta_state.dcc_retile.p_layout);175if (result != VK_SUCCESS)176goto cleanup;177178/* compute shader */179180VkPipelineShaderStageCreateInfo pipeline_shader_stage = {181.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,182.stage = VK_SHADER_STAGE_COMPUTE_BIT,183.module = vk_shader_module_handle_from_nir(cs),184.pName = "main",185.pSpecializationInfo = NULL,186};187188VkComputePipelineCreateInfo vk_pipeline_info = {189.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,190.stage = pipeline_shader_stage,191.flags = 0,192.layout = device->meta_state.dcc_retile.p_layout,193};194195result = radv_CreateComputePipelines(196radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,197&vk_pipeline_info, NULL, &device->meta_state.dcc_retile.pipeline);198if (result != VK_SUCCESS)199goto cleanup;200201cleanup:202if (result != VK_SUCCESS)203radv_device_finish_meta_dcc_retile_state(device);204ralloc_free(cs);205return result;206}207208void209radv_retile_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image)210{211struct radv_meta_saved_state saved_state;212struct radv_device *device = cmd_buffer->device;213214assert(image->type == VK_IMAGE_TYPE_2D);215assert(image->info.array_size == 1 && image->info.levels == 1);216217struct radv_cmd_state *state = &cmd_buffer->state;218219state->flush_bits |= radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, image) |220radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);221222/* Compile pipelines if not already done so. */223if (!cmd_buffer->device->meta_state.dcc_retile.pipeline) {224VkResult ret =225radv_device_init_meta_dcc_retile_state(cmd_buffer->device, &image->planes[0].surface);226if (ret != VK_SUCCESS) {227cmd_buffer->record_result = ret;228return;229}230}231232radv_meta_save(233&saved_state, cmd_buffer,234RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS);235236radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,237device->meta_state.dcc_retile.pipeline);238239struct radv_buffer buffer = {.size = image->size, .bo = image->bo, .offset = image->offset};240241struct radv_buffer_view views[2];242VkBufferView view_handles[2];243radv_buffer_view_init(views, cmd_buffer->device,244&(VkBufferViewCreateInfo){245.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,246.buffer = radv_buffer_to_handle(&buffer),247.offset = image->planes[0].surface.meta_offset,248.range = image->planes[0].surface.meta_size,249.format = VK_FORMAT_R8_UINT,250});251radv_buffer_view_init(views + 1, cmd_buffer->device,252&(VkBufferViewCreateInfo){253.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,254.buffer = radv_buffer_to_handle(&buffer),255.offset = image->planes[0].surface.display_dcc_offset,256.range = image->planes[0].surface.u.gfx9.color.display_dcc_size,257.format = VK_FORMAT_R8_UINT,258});259for (unsigned i = 0; i < 2; ++i)260view_handles[i] = radv_buffer_view_to_handle(&views[i]);261262radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,263device->meta_state.dcc_retile.p_layout, 0, /* set */2642, /* descriptorWriteCount */265(VkWriteDescriptorSet[]){266{267.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,268.dstBinding = 0,269.dstArrayElement = 0,270.descriptorCount = 1,271.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,272.pTexelBufferView = &view_handles[0],273},274{275.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,276.dstBinding = 1,277.dstArrayElement = 0,278.descriptorCount = 1,279.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,280.pTexelBufferView = &view_handles[1],281},282});283284unsigned width = DIV_ROUND_UP(image->info.width, vk_format_get_blockwidth(image->vk_format));285unsigned height = DIV_ROUND_UP(image->info.height, vk_format_get_blockheight(image->vk_format));286287unsigned dcc_width = DIV_ROUND_UP(width, image->planes[0].surface.u.gfx9.color.dcc_block_width);288unsigned dcc_height =289DIV_ROUND_UP(height, image->planes[0].surface.u.gfx9.color.dcc_block_height);290291uint32_t constants[] = {292image->planes[0].surface.u.gfx9.color.dcc_pitch_max + 1,293image->planes[0].surface.u.gfx9.color.dcc_height,294image->planes[0].surface.u.gfx9.color.display_dcc_pitch_max + 1,295image->planes[0].surface.u.gfx9.color.display_dcc_height,296};297radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),298device->meta_state.dcc_retile.p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 16,299constants);300301radv_unaligned_dispatch(cmd_buffer, dcc_width, dcc_height, 1);302303radv_meta_restore(&saved_state, cmd_buffer);304305state->flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH |306radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);307}308309310