Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_pass.c
4560 views
/*1* Copyright © 2019 Raspberry Pi2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "v3dv_private.h"2425static uint32_t26num_subpass_attachments(const VkSubpassDescription *desc)27{28return desc->inputAttachmentCount +29desc->colorAttachmentCount +30(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +31(desc->pDepthStencilAttachment != NULL);32}3334static void35set_use_tlb_resolve(struct v3dv_device *device,36struct v3dv_render_pass_attachment *att)37{38const struct v3dv_format *format = v3dv_X(device, get_format)(att->desc.format);39att->use_tlb_resolve = v3dv_X(device, format_supports_tlb_resolve)(format);40}4142static void43pass_find_subpass_range_for_attachments(struct v3dv_device *device,44struct v3dv_render_pass *pass)45{46for (uint32_t i = 0; i < pass->attachment_count; i++) {47pass->attachments[i].first_subpass = pass->subpass_count - 1;48pass->attachments[i].last_subpass = 0;49}5051for (uint32_t i = 0; i < pass->subpass_count; i++) {52const struct v3dv_subpass *subpass = &pass->subpasses[i];5354for (uint32_t j = 0; j < subpass->color_count; j++) {55uint32_t attachment_idx = subpass->color_attachments[j].attachment;56if (attachment_idx == VK_ATTACHMENT_UNUSED)57continue;5859if (i < pass->attachments[attachment_idx].first_subpass)60pass->attachments[attachment_idx].first_subpass = i;61if (i > pass->attachments[attachment_idx].last_subpass)62pass->attachments[attachment_idx].last_subpass = i;6364if (subpass->resolve_attachments &&65subpass->resolve_attachments[j].attachment != VK_ATTACHMENT_UNUSED) {66set_use_tlb_resolve(device, &pass->attachments[attachment_idx]);67}68}6970uint32_t ds_attachment_idx = subpass->ds_attachment.attachment;71if (ds_attachment_idx != VK_ATTACHMENT_UNUSED) {72if (i < pass->attachments[ds_attachment_idx].first_subpass)73pass->attachments[ds_attachment_idx].first_subpass = i;74if (i > pass->attachments[ds_attachment_idx].last_subpass)75pass->attachments[ds_attachment_idx].last_subpass = i;76}7778for (uint32_t j = 0; j < subpass->input_count; j++) {79uint32_t input_attachment_idx = subpass->input_attachments[j].attachment;80if (input_attachment_idx == VK_ATTACHMENT_UNUSED)81continue;82if (i < pass->attachments[input_attachment_idx].first_subpass)83pass->attachments[input_attachment_idx].first_subpass = i;84if (i > pass->attachments[input_attachment_idx].last_subpass)85pass->attachments[input_attachment_idx].last_subpass = i;86}8788if (subpass->resolve_attachments) {89for (uint32_t j = 0; j < subpass->color_count; j++) {90uint32_t attachment_idx = subpass->resolve_attachments[j].attachment;91if (attachment_idx == VK_ATTACHMENT_UNUSED)92continue;93if (i < pass->attachments[attachment_idx].first_subpass)94pass->attachments[attachment_idx].first_subpass = i;95if (i > pass->attachments[attachment_idx].last_subpass)96pass->attachments[attachment_idx].last_subpass = i;97}98}99}100}101102103VKAPI_ATTR VkResult VKAPI_CALL104v3dv_CreateRenderPass(VkDevice _device,105const VkRenderPassCreateInfo *pCreateInfo,106const VkAllocationCallbacks *pAllocator,107VkRenderPass *pRenderPass)108{109V3DV_FROM_HANDLE(v3dv_device, device, _device);110struct v3dv_render_pass *pass;111112assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO);113114size_t size = sizeof(*pass);115size_t subpasses_offset = size;116size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);117size_t attachments_offset = size;118size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);119120pass = vk_object_zalloc(&device->vk, pAllocator, size,121VK_OBJECT_TYPE_RENDER_PASS);122if (pass == NULL)123return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);124125pass->attachment_count = pCreateInfo->attachmentCount;126pass->attachments = (void *) pass + attachments_offset;127pass->subpass_count = pCreateInfo->subpassCount;128pass->subpasses = (void *) pass + subpasses_offset;129130for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++)131pass->attachments[i].desc = pCreateInfo->pAttachments[i];132133uint32_t subpass_attachment_count = 0;134for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {135const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];136subpass_attachment_count += num_subpass_attachments(desc);137}138139if (subpass_attachment_count) {140const size_t subpass_attachment_bytes =141subpass_attachment_count * sizeof(struct v3dv_subpass_attachment);142pass->subpass_attachments =143vk_alloc2(&device->vk.alloc, pAllocator, subpass_attachment_bytes, 8,144VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);145if (pass->subpass_attachments == NULL) {146vk_object_free(&device->vk, pAllocator, pass);147return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);148}149} else {150pass->subpass_attachments = NULL;151}152153struct v3dv_subpass_attachment *p = pass->subpass_attachments;154for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {155const VkSubpassDescription *desc = &pCreateInfo->pSubpasses[i];156struct v3dv_subpass *subpass = &pass->subpasses[i];157158subpass->input_count = desc->inputAttachmentCount;159subpass->color_count = desc->colorAttachmentCount;160161if (desc->inputAttachmentCount > 0) {162subpass->input_attachments = p;163p += desc->inputAttachmentCount;164165for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {166subpass->input_attachments[j] = (struct v3dv_subpass_attachment) {167.attachment = desc->pInputAttachments[j].attachment,168.layout = desc->pInputAttachments[j].layout,169};170}171}172173if (desc->colorAttachmentCount > 0) {174subpass->color_attachments = p;175p += desc->colorAttachmentCount;176177for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {178subpass->color_attachments[j] = (struct v3dv_subpass_attachment) {179.attachment = desc->pColorAttachments[j].attachment,180.layout = desc->pColorAttachments[j].layout,181};182}183}184185if (desc->pResolveAttachments) {186subpass->resolve_attachments = p;187p += desc->colorAttachmentCount;188189for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {190subpass->resolve_attachments[j] = (struct v3dv_subpass_attachment) {191.attachment = desc->pResolveAttachments[j].attachment,192.layout = desc->pResolveAttachments[j].layout,193};194}195}196197if (desc->pDepthStencilAttachment) {198subpass->ds_attachment = (struct v3dv_subpass_attachment) {199.attachment = desc->pDepthStencilAttachment->attachment,200.layout = desc->pDepthStencilAttachment->layout,201};202203/* GFXH-1461: if depth is cleared but stencil is loaded (or viceversa),204* the clear might get lost. If a subpass has this then we can't emit205* the clear using the TLB and we have to do it as a draw call.206*207* FIXME: separate stencil.208*/209if (subpass->ds_attachment.attachment != VK_ATTACHMENT_UNUSED) {210struct v3dv_render_pass_attachment *att =211&pass->attachments[subpass->ds_attachment.attachment];212if (att->desc.format == VK_FORMAT_D24_UNORM_S8_UINT) {213if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_CLEAR &&214att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_LOAD) {215subpass->do_depth_clear_with_draw = true;216} else if (att->desc.loadOp == VK_ATTACHMENT_LOAD_OP_LOAD &&217att->desc.stencilLoadOp == VK_ATTACHMENT_LOAD_OP_CLEAR) {218subpass->do_stencil_clear_with_draw = true;219}220}221}222} else {223subpass->ds_attachment.attachment = VK_ATTACHMENT_UNUSED;224}225}226227pass_find_subpass_range_for_attachments(device, pass);228229/* FIXME: handle subpass dependencies */230231*pRenderPass = v3dv_render_pass_to_handle(pass);232233return VK_SUCCESS;234}235236VKAPI_ATTR void VKAPI_CALL237v3dv_DestroyRenderPass(VkDevice _device,238VkRenderPass _pass,239const VkAllocationCallbacks *pAllocator)240{241V3DV_FROM_HANDLE(v3dv_device, device, _device);242V3DV_FROM_HANDLE(v3dv_render_pass, pass, _pass);243244if (!_pass)245return;246247vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);248vk_object_free(&device->vk, pAllocator, pass);249}250251static void252subpass_get_granularity(struct v3dv_device *device,253struct v3dv_render_pass *pass,254uint32_t subpass_idx,255VkExtent2D *granularity)256{257static const uint8_t tile_sizes[] = {25864, 64,25964, 32,26032, 32,26132, 16,26216, 16,26316, 8,2648, 8265};266267/* Our tile size depends on the number of color attachments and the maximum268* bpp across them.269*/270assert(subpass_idx < pass->subpass_count);271struct v3dv_subpass *subpass = &pass->subpasses[subpass_idx];272const uint32_t color_attachment_count = subpass->color_count;273274uint32_t max_internal_bpp = 0;275for (uint32_t i = 0; i < color_attachment_count; i++) {276uint32_t attachment_idx = subpass->color_attachments[i].attachment;277if (attachment_idx == VK_ATTACHMENT_UNUSED)278continue;279const VkAttachmentDescription *desc =280&pass->attachments[attachment_idx].desc;281const struct v3dv_format *format = v3dv_X(device, get_format)(desc->format);282uint32_t internal_type, internal_bpp;283v3dv_X(device, get_internal_type_bpp_for_output_format)284(format->rt_type, &internal_type, &internal_bpp);285286max_internal_bpp = MAX2(max_internal_bpp, internal_bpp);287}288289uint32_t idx = 0;290if (color_attachment_count > 2)291idx += 2;292else if (color_attachment_count > 1)293idx += 1;294295idx += max_internal_bpp;296297assert(idx < ARRAY_SIZE(tile_sizes));298*granularity = (VkExtent2D) {299.width = tile_sizes[idx * 2],300.height = tile_sizes[idx * 2 + 1]301};302}303304VKAPI_ATTR void VKAPI_CALL305v3dv_GetRenderAreaGranularity(VkDevice _device,306VkRenderPass renderPass,307VkExtent2D *pGranularity)308{309V3DV_FROM_HANDLE(v3dv_render_pass, pass, renderPass);310V3DV_FROM_HANDLE(v3dv_device, device, _device);311312*pGranularity = (VkExtent2D) {313.width = 64,314.height = 64,315};316317for (uint32_t i = 0; i < pass->subpass_count; i++) {318VkExtent2D sg;319subpass_get_granularity(device, pass, i, &sg);320pGranularity->width = MIN2(pGranularity->width, sg.width);321pGranularity->height = MIN2(pGranularity->height, sg.height);322}323}324325/* Checks whether the render area rectangle covers a region that is aligned to326* tile boundaries. This means that we are writing to all pixels covered by327* all tiles in that area (except for pixels on edge tiles that are outside328* the framebuffer dimensions).329*330* When our framebuffer is aligned to tile boundaries we know we are writing331* valid data to all all pixels in each tile and we can apply certain332* optimizations, like avoiding tile loads, since we know that none of the333* original pixel values in each tile for that area need to be preserved.334* We also use this to decide if we can use TLB clears, as these clear whole335* tiles so we can't use them if the render area is not aligned.336*337* Note that when an image is created it will possibly include padding blocks338* depending on its tiling layout. When the framebuffer dimensions are not339* aligned to tile boundaries then edge tiles are only partially covered by the340* framebuffer pixels, but tile stores still seem to store full tiles341* writing to the padded sections. This is important when the framebuffer342* is aliasing a smaller section of a larger image, as in that case the edge343* tiles of the framebuffer would overwrite valid pixels in the larger image.344* In that case, we can't flag the area as being aligned.345*/346bool347v3dv_subpass_area_is_tile_aligned(struct v3dv_device *device,348const VkRect2D *area,349struct v3dv_framebuffer *fb,350struct v3dv_render_pass *pass,351uint32_t subpass_idx)352{353assert(subpass_idx < pass->subpass_count);354355VkExtent2D granularity;356subpass_get_granularity(device, pass, subpass_idx, &granularity);357358return area->offset.x % granularity.width == 0 &&359area->offset.y % granularity.height == 0 &&360(area->extent.width % granularity.width == 0 ||361(fb->has_edge_padding &&362area->offset.x + area->extent.width >= fb->width)) &&363(area->extent.height % granularity.height == 0 ||364(fb->has_edge_padding &&365area->offset.y + area->extent.height >= fb->height));366}367368369