Path: blob/21.2-virgl/src/freedreno/vulkan/tu_pass.c
4565 views
/*1* Copyright © 2016 Red Hat.2* Copyright © 2016 Bas Nieuwenhuizen3*4* based in part on anv driver which is:5* Copyright © 2015 Intel Corporation6*7* Permission is hereby granted, free of charge, to any person obtaining a8* copy of this software and associated documentation files (the "Software"),9* to deal in the Software without restriction, including without limitation10* the rights to use, copy, modify, merge, publish, distribute, sublicense,11* and/or sell copies of the Software, and to permit persons to whom the12* Software is furnished to do so, subject to the following conditions:13*14* The above copyright notice and this permission notice (including the next15* paragraph) shall be included in all copies or substantial portions of the16* Software.17*18* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR19* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,20* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL21* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER22* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING23* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER24* DEALINGS IN THE SOFTWARE.25*/26#include "tu_private.h"2728#include "vk_util.h"29#include "vk_format.h"3031/* Return true if we have to fallback to sysmem rendering because the32* dependency can't be satisfied with tiled rendering.33*/3435static bool36dep_invalid_for_gmem(const VkSubpassDependency2 *dep)37{38/* External dependencies don't matter here. */39if (dep->srcSubpass == VK_SUBPASS_EXTERNAL ||40dep->dstSubpass == VK_SUBPASS_EXTERNAL)41return false;4243/* We can conceptually break down the process of rewriting a sysmem44* renderpass into a gmem one into two parts:45*46* 1. Split each draw and multisample resolve into N copies, one for each47* bin. (If hardware binning, add one more copy where the FS is disabled48* for the binning pass). This is always allowed because the vertex stage49* is allowed to run an arbitrary number of times and there are no extra50* ordering constraints within a draw.51* 2. Take the last copy of the second-to-last draw and slide it down to52* before the last copy of the last draw. Repeat for each earlier draw53* until the draw pass for the last bin is complete, then repeat for each54* earlier bin until we finish with the first bin.55*56* During this rearranging process, we can't slide draws past each other in57* a way that breaks the subpass dependencies. For each draw, we must slide58* it past (copies of) the rest of the draws in the renderpass. We can59* slide a draw past another if there isn't a dependency between them, or60* if the dependenc(ies) are dependencies between framebuffer-space stages61* only with the BY_REGION bit set. Note that this includes62* self-dependencies, since these may result in pipeline barriers that also63* break the rearranging process.64*/6566/* This is straight from the Vulkan 1.2 spec, section 6.1.4 "Framebuffer67* Region Dependencies":68*/69const VkPipelineStageFlags framebuffer_space_stages =70VK_PIPELINE_STAGE_FRAGMENT_SHADER_BIT |71VK_PIPELINE_STAGE_EARLY_FRAGMENT_TESTS_BIT |72VK_PIPELINE_STAGE_LATE_FRAGMENT_TESTS_BIT |73VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;7475return76(dep->srcStageMask & ~framebuffer_space_stages) ||77(dep->dstStageMask & ~framebuffer_space_stages) ||78!(dep->dependencyFlags & VK_DEPENDENCY_BY_REGION_BIT);79}8081static void82tu_render_pass_add_subpass_dep(struct tu_render_pass *pass,83const VkSubpassDependency2 *dep)84{85uint32_t src = dep->srcSubpass;86uint32_t dst = dep->dstSubpass;8788if (dep_invalid_for_gmem(dep))89pass->gmem_pixels = 0;9091/* Ignore subpass self-dependencies as they allow the app to call92* vkCmdPipelineBarrier() inside the render pass and the driver should only93* do the barrier when called, not when starting the render pass.94*/95if (src == dst)96return;9798struct tu_subpass_barrier *src_barrier;99if (src == VK_SUBPASS_EXTERNAL) {100src_barrier = &pass->subpasses[0].start_barrier;101} else if (src == pass->subpass_count - 1) {102src_barrier = &pass->end_barrier;103} else {104src_barrier = &pass->subpasses[src + 1].start_barrier;105}106107struct tu_subpass_barrier *dst_barrier;108if (dst == VK_SUBPASS_EXTERNAL) {109dst_barrier = &pass->end_barrier;110} else {111dst_barrier = &pass->subpasses[dst].start_barrier;112}113114if (dep->dstStageMask != VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT)115src_barrier->src_stage_mask |= dep->srcStageMask;116src_barrier->src_access_mask |= dep->srcAccessMask;117dst_barrier->dst_access_mask |= dep->dstAccessMask;118}119120/* We currently only care about undefined layouts, because we have to121* flush/invalidate CCU for those. PREINITIALIZED is the same thing as122* UNDEFINED for anything not linear tiled, but we don't know yet whether the123* images used are tiled, so just assume they are.124*/125126static bool127layout_undefined(VkImageLayout layout)128{129return layout == VK_IMAGE_LAYOUT_UNDEFINED ||130layout == VK_IMAGE_LAYOUT_PREINITIALIZED;131}132133/* This implements the following bit of spec text:134*135* If there is no subpass dependency from VK_SUBPASS_EXTERNAL to the136* first subpass that uses an attachment, then an implicit subpass137* dependency exists from VK_SUBPASS_EXTERNAL to the first subpass it is138* used in. The implicit subpass dependency only exists if there139* exists an automatic layout transition away from initialLayout.140* The subpass dependency operates as if defined with the141* following parameters:142*143* VkSubpassDependency implicitDependency = {144* .srcSubpass = VK_SUBPASS_EXTERNAL;145* .dstSubpass = firstSubpass; // First subpass attachment is used in146* .srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT;147* .dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;148* .srcAccessMask = 0;149* .dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |150* VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |151* VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |152* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |153* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;154* .dependencyFlags = 0;155* };156*157* Similarly, if there is no subpass dependency from the last subpass158* that uses an attachment to VK_SUBPASS_EXTERNAL, then an implicit159* subpass dependency exists from the last subpass it is used in to160* VK_SUBPASS_EXTERNAL. The implicit subpass dependency only exists161* if there exists an automatic layout transition into finalLayout.162* The subpass dependency operates as if defined with the following163* parameters:164*165* VkSubpassDependency implicitDependency = {166* .srcSubpass = lastSubpass; // Last subpass attachment is used in167* .dstSubpass = VK_SUBPASS_EXTERNAL;168* .srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT;169* .dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT;170* .srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |171* VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |172* VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |173* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |174* VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT;175* .dstAccessMask = 0;176* .dependencyFlags = 0;177* };178*179* Note: currently this is the only use we have for layout transitions,180* besides needing to invalidate CCU at the beginning, so we also flag181* transitions from UNDEFINED here.182*/183static void184tu_render_pass_add_implicit_deps(struct tu_render_pass *pass,185const VkRenderPassCreateInfo2 *info)186{187const VkAttachmentDescription2* att = info->pAttachments;188bool has_external_src[info->subpassCount];189bool has_external_dst[info->subpassCount];190bool att_used[pass->attachment_count];191192memset(has_external_src, 0, sizeof(has_external_src));193memset(has_external_dst, 0, sizeof(has_external_dst));194195for (uint32_t i = 0; i < info->dependencyCount; i++) {196uint32_t src = info->pDependencies[i].srcSubpass;197uint32_t dst = info->pDependencies[i].dstSubpass;198199if (src == dst)200continue;201202if (src == VK_SUBPASS_EXTERNAL)203has_external_src[dst] = true;204if (dst == VK_SUBPASS_EXTERNAL)205has_external_dst[src] = true;206}207208memset(att_used, 0, sizeof(att_used));209210for (unsigned i = 0; i < info->subpassCount; i++) {211if (!has_external_src[i])212continue;213214const VkSubpassDescription2 *subpass = &info->pSubpasses[i];215bool src_implicit_dep = false;216217for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {218uint32_t a = subpass->pInputAttachments[j].attachment;219if (a == VK_ATTACHMENT_UNUSED)220continue;221if (att[a].initialLayout != subpass->pInputAttachments[j].layout && !att_used[a])222src_implicit_dep = true;223att_used[a] = true;224}225226for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {227uint32_t a = subpass->pColorAttachments[j].attachment;228if (a == VK_ATTACHMENT_UNUSED)229continue;230if (att[a].initialLayout != subpass->pColorAttachments[j].layout && !att_used[a])231src_implicit_dep = true;232att_used[a] = true;233}234235if (subpass->pResolveAttachments) {236for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {237uint32_t a = subpass->pResolveAttachments[j].attachment;238if (a == VK_ATTACHMENT_UNUSED)239continue;240if (att[a].initialLayout != subpass->pResolveAttachments[j].layout && !att_used[a])241src_implicit_dep = true;242att_used[a] = true;243}244}245246if (src_implicit_dep) {247tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {248.srcSubpass = VK_SUBPASS_EXTERNAL,249.dstSubpass = i,250.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,251.dstStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,252.srcAccessMask = 0,253.dstAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |254VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |255VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |256VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |257VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,258.dependencyFlags = 0,259});260}261}262263memset(att_used, 0, sizeof(att_used));264265for (int i = info->subpassCount - 1; i >= 0; i--) {266if (!has_external_dst[i])267continue;268269const VkSubpassDescription2 *subpass = &info->pSubpasses[i];270bool dst_implicit_dep = false;271272for (unsigned j = 0; j < subpass->inputAttachmentCount; j++) {273uint32_t a = subpass->pInputAttachments[j].attachment;274if (a == VK_ATTACHMENT_UNUSED)275continue;276if (att[a].finalLayout != subpass->pInputAttachments[j].layout && !att_used[a])277dst_implicit_dep = true;278att_used[a] = true;279}280281for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {282uint32_t a = subpass->pColorAttachments[j].attachment;283if (a == VK_ATTACHMENT_UNUSED)284continue;285if (att[a].finalLayout != subpass->pColorAttachments[j].layout && !att_used[a])286dst_implicit_dep = true;287att_used[a] = true;288}289290if (subpass->pResolveAttachments) {291for (unsigned j = 0; j < subpass->colorAttachmentCount; j++) {292uint32_t a = subpass->pResolveAttachments[j].attachment;293if (a == VK_ATTACHMENT_UNUSED)294continue;295if (att[a].finalLayout != subpass->pResolveAttachments[j].layout && !att_used[a])296dst_implicit_dep = true;297att_used[a] = true;298}299}300301if (dst_implicit_dep) {302tu_render_pass_add_subpass_dep(pass, &(VkSubpassDependency2KHR) {303.srcSubpass = i,304.dstSubpass = VK_SUBPASS_EXTERNAL,305.srcStageMask = VK_PIPELINE_STAGE_ALL_COMMANDS_BIT,306.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,307.srcAccessMask = VK_ACCESS_INPUT_ATTACHMENT_READ_BIT |308VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |309VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT |310VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_READ_BIT |311VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT,312.dstAccessMask = 0,313.dependencyFlags = 0,314});315}316}317318/* Handle UNDEFINED transitions, similar to the handling in tu_barrier().319* Assume that if an attachment has an initial layout of UNDEFINED, it gets320* transitioned eventually.321*/322for (unsigned i = 0; i < info->attachmentCount; i++) {323if (layout_undefined(att[i].initialLayout)) {324if (vk_format_is_depth_or_stencil(att[i].format)) {325pass->subpasses[0].start_barrier.incoherent_ccu_depth = true;326} else {327pass->subpasses[0].start_barrier.incoherent_ccu_color = true;328}329}330}331}332333static void update_samples(struct tu_subpass *subpass,334VkSampleCountFlagBits samples)335{336assert(subpass->samples == 0 || subpass->samples == samples);337subpass->samples = samples;338}339340static void341tu_render_pass_gmem_config(struct tu_render_pass *pass,342const struct tu_physical_device *phys_dev)343{344uint32_t block_align_shift = 3; /* log2(gmem_align/(tile_align_w*tile_align_h)) */345uint32_t tile_align_w = phys_dev->info->tile_align_w;346uint32_t gmem_align = (1 << block_align_shift) * tile_align_w * phys_dev->info->tile_align_h;347348/* calculate total bytes per pixel */349uint32_t cpp_total = 0;350for (uint32_t i = 0; i < pass->attachment_count; i++) {351struct tu_render_pass_attachment *att = &pass->attachments[i];352bool cpp1 = (att->cpp == 1);353if (att->gmem_offset >= 0) {354cpp_total += att->cpp;355356/* take into account the separate stencil: */357if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {358cpp1 = (att->samples == 1);359cpp_total += att->samples;360}361362/* texture pitch must be aligned to 64, use a tile_align_w that is363* a multiple of 64 for cpp==1 attachment to work as input attachment364*/365if (cpp1 && tile_align_w % 64 != 0) {366tile_align_w *= 2;367block_align_shift -= 1;368}369}370}371372pass->tile_align_w = tile_align_w;373374/* no gmem attachments */375if (cpp_total == 0) {376/* any value non-zero value so tiling config works with no attachments */377pass->gmem_pixels = 1024*1024;378return;379}380381/* TODO: using ccu_offset_gmem so that BLIT_OP_SCALE resolve path382* doesn't break things. maybe there is a better solution?383* TODO: this algorithm isn't optimal384* for example, two attachments with cpp = {1, 4}385* result: nblocks = {12, 52}, pixels = 196608386* optimal: nblocks = {13, 51}, pixels = 208896387*/388uint32_t gmem_blocks = phys_dev->ccu_offset_gmem / gmem_align;389uint32_t offset = 0, pixels = ~0u, i;390for (i = 0; i < pass->attachment_count; i++) {391struct tu_render_pass_attachment *att = &pass->attachments[i];392if (att->gmem_offset < 0)393continue;394395att->gmem_offset = offset;396397uint32_t align = MAX2(1, att->cpp >> block_align_shift);398uint32_t nblocks = MAX2((gmem_blocks * att->cpp / cpp_total) & ~(align - 1), align);399400if (nblocks > gmem_blocks)401break;402403gmem_blocks -= nblocks;404cpp_total -= att->cpp;405offset += nblocks * gmem_align;406pixels = MIN2(pixels, nblocks * gmem_align / att->cpp);407408/* repeat the same for separate stencil */409if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {410att->gmem_offset_stencil = offset;411412/* note: for s8_uint, block align is always 1 */413uint32_t nblocks = gmem_blocks * att->samples / cpp_total;414if (nblocks > gmem_blocks)415break;416417gmem_blocks -= nblocks;418cpp_total -= att->samples;419offset += nblocks * gmem_align;420pixels = MIN2(pixels, nblocks * gmem_align / att->samples);421}422}423424/* if the loop didn't complete then the gmem config is impossible */425if (i == pass->attachment_count)426pass->gmem_pixels = pixels;427}428429static void430attachment_set_ops(struct tu_render_pass_attachment *att,431VkAttachmentLoadOp load_op,432VkAttachmentLoadOp stencil_load_op,433VkAttachmentStoreOp store_op,434VkAttachmentStoreOp stencil_store_op)435{436/* load/store ops */437att->clear_mask =438(load_op == VK_ATTACHMENT_LOAD_OP_CLEAR) ? VK_IMAGE_ASPECT_COLOR_BIT : 0;439att->load = (load_op == VK_ATTACHMENT_LOAD_OP_LOAD);440att->store = (store_op == VK_ATTACHMENT_STORE_OP_STORE);441442bool stencil_clear = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_CLEAR);443bool stencil_load = (stencil_load_op == VK_ATTACHMENT_LOAD_OP_LOAD);444bool stencil_store = (stencil_store_op == VK_ATTACHMENT_STORE_OP_STORE);445446switch (att->format) {447case VK_FORMAT_D24_UNORM_S8_UINT: /* || stencil load/store */448if (att->clear_mask)449att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;450if (stencil_clear)451att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;452if (stencil_load)453att->load = true;454if (stencil_store)455att->store = true;456break;457case VK_FORMAT_S8_UINT: /* replace load/store with stencil load/store */458att->clear_mask = stencil_clear ? VK_IMAGE_ASPECT_COLOR_BIT : 0;459att->load = stencil_load;460att->store = stencil_store;461break;462case VK_FORMAT_D32_SFLOAT_S8_UINT: /* separate stencil */463if (att->clear_mask)464att->clear_mask = VK_IMAGE_ASPECT_DEPTH_BIT;465if (stencil_clear)466att->clear_mask |= VK_IMAGE_ASPECT_STENCIL_BIT;467if (stencil_load)468att->load_stencil = true;469if (stencil_store)470att->store_stencil = true;471break;472default:473break;474}475}476477static bool478is_depth_stencil_resolve_enabled(const VkSubpassDescriptionDepthStencilResolve *depth_stencil_resolve)479{480if (depth_stencil_resolve &&481depth_stencil_resolve->pDepthStencilResolveAttachment &&482depth_stencil_resolve->pDepthStencilResolveAttachment->attachment != VK_ATTACHMENT_UNUSED) {483return true;484}485return false;486}487488VKAPI_ATTR VkResult VKAPI_CALL489tu_CreateRenderPass2(VkDevice _device,490const VkRenderPassCreateInfo2KHR *pCreateInfo,491const VkAllocationCallbacks *pAllocator,492VkRenderPass *pRenderPass)493{494TU_FROM_HANDLE(tu_device, device, _device);495struct tu_render_pass *pass;496size_t size;497size_t attachments_offset;498499assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2_KHR);500501size = sizeof(*pass);502size += pCreateInfo->subpassCount * sizeof(pass->subpasses[0]);503attachments_offset = size;504size += pCreateInfo->attachmentCount * sizeof(pass->attachments[0]);505506pass = vk_object_zalloc(&device->vk, pAllocator, size,507VK_OBJECT_TYPE_RENDER_PASS);508if (pass == NULL)509return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);510511pass->attachment_count = pCreateInfo->attachmentCount;512pass->subpass_count = pCreateInfo->subpassCount;513pass->attachments = (void *) pass + attachments_offset;514515for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {516struct tu_render_pass_attachment *att = &pass->attachments[i];517518att->format = pCreateInfo->pAttachments[i].format;519att->samples = pCreateInfo->pAttachments[i].samples;520/* for d32s8, cpp is for the depth image, and521* att->samples will be used as the cpp for the stencil image522*/523if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT)524att->cpp = 4 * att->samples;525else526att->cpp = vk_format_get_blocksize(att->format) * att->samples;527att->gmem_offset = -1;528529attachment_set_ops(att,530pCreateInfo->pAttachments[i].loadOp,531pCreateInfo->pAttachments[i].stencilLoadOp,532pCreateInfo->pAttachments[i].storeOp,533pCreateInfo->pAttachments[i].stencilStoreOp);534}535uint32_t subpass_attachment_count = 0;536struct tu_subpass_attachment *p;537for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {538const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];539const VkSubpassDescriptionDepthStencilResolve *ds_resolve =540vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);541542subpass_attachment_count +=543desc->inputAttachmentCount + desc->colorAttachmentCount +544(desc->pResolveAttachments ? desc->colorAttachmentCount : 0) +545(is_depth_stencil_resolve_enabled(ds_resolve) ? 1 : 0);546}547548if (subpass_attachment_count) {549pass->subpass_attachments = vk_alloc2(550&device->vk.alloc, pAllocator,551subpass_attachment_count * sizeof(struct tu_subpass_attachment), 8,552VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);553if (pass->subpass_attachments == NULL) {554vk_object_free(&device->vk, pAllocator, pass);555return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);556}557} else558pass->subpass_attachments = NULL;559560p = pass->subpass_attachments;561for (uint32_t i = 0; i < pCreateInfo->subpassCount; i++) {562const VkSubpassDescription2 *desc = &pCreateInfo->pSubpasses[i];563const VkSubpassDescriptionDepthStencilResolve *ds_resolve =564vk_find_struct_const(desc->pNext, SUBPASS_DESCRIPTION_DEPTH_STENCIL_RESOLVE_KHR);565struct tu_subpass *subpass = &pass->subpasses[i];566567subpass->input_count = desc->inputAttachmentCount;568subpass->color_count = desc->colorAttachmentCount;569subpass->resolve_count = 0;570subpass->resolve_depth_stencil = is_depth_stencil_resolve_enabled(ds_resolve);571subpass->samples = 0;572subpass->srgb_cntl = 0;573574subpass->multiview_mask = desc->viewMask;575576if (desc->inputAttachmentCount > 0) {577subpass->input_attachments = p;578p += desc->inputAttachmentCount;579580for (uint32_t j = 0; j < desc->inputAttachmentCount; j++) {581uint32_t a = desc->pInputAttachments[j].attachment;582subpass->input_attachments[j].attachment = a;583if (a != VK_ATTACHMENT_UNUSED)584pass->attachments[a].gmem_offset = 0;585}586}587588if (desc->colorAttachmentCount > 0) {589subpass->color_attachments = p;590p += desc->colorAttachmentCount;591592for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {593uint32_t a = desc->pColorAttachments[j].attachment;594subpass->color_attachments[j].attachment = a;595596if (a != VK_ATTACHMENT_UNUSED) {597pass->attachments[a].gmem_offset = 0;598update_samples(subpass, pCreateInfo->pAttachments[a].samples);599600if (vk_format_is_srgb(pass->attachments[a].format))601subpass->srgb_cntl |= 1 << j;602603pass->attachments[a].clear_views |= subpass->multiview_mask;604}605}606}607608subpass->resolve_attachments = (desc->pResolveAttachments || subpass->resolve_depth_stencil) ? p : NULL;609if (desc->pResolveAttachments) {610p += desc->colorAttachmentCount;611subpass->resolve_count += desc->colorAttachmentCount;612for (uint32_t j = 0; j < desc->colorAttachmentCount; j++) {613subpass->resolve_attachments[j].attachment =614desc->pResolveAttachments[j].attachment;615}616}617618if (subpass->resolve_depth_stencil) {619p++;620subpass->resolve_count++;621uint32_t a = ds_resolve->pDepthStencilResolveAttachment->attachment;622subpass->resolve_attachments[subpass->resolve_count - 1].attachment = a;623}624625uint32_t a = desc->pDepthStencilAttachment ?626desc->pDepthStencilAttachment->attachment : VK_ATTACHMENT_UNUSED;627subpass->depth_stencil_attachment.attachment = a;628if (a != VK_ATTACHMENT_UNUSED) {629pass->attachments[a].gmem_offset = 0;630update_samples(subpass, pCreateInfo->pAttachments[a].samples);631}632}633634/* disable unused attachments */635for (uint32_t i = 0; i < pass->attachment_count; i++) {636struct tu_render_pass_attachment *att = &pass->attachments[i];637if (att->gmem_offset < 0) {638att->clear_mask = 0;639att->load = false;640}641}642643/* From the VK_KHR_multiview spec:644*645* Multiview is all-or-nothing for a render pass - that is, either all646* subpasses must have a non-zero view mask (though some subpasses may647* have only one view) or all must be zero.648*649* This means we only have to check one of the view masks.650*/651if (pCreateInfo->pSubpasses[0].viewMask) {652/* It seems multiview must use sysmem rendering. */653pass->gmem_pixels = 0;654} else {655tu_render_pass_gmem_config(pass, device->physical_device);656}657658for (unsigned i = 0; i < pCreateInfo->dependencyCount; ++i) {659tu_render_pass_add_subpass_dep(pass, &pCreateInfo->pDependencies[i]);660}661662tu_render_pass_add_implicit_deps(pass, pCreateInfo);663664*pRenderPass = tu_render_pass_to_handle(pass);665666return VK_SUCCESS;667}668669VKAPI_ATTR void VKAPI_CALL670tu_DestroyRenderPass(VkDevice _device,671VkRenderPass _pass,672const VkAllocationCallbacks *pAllocator)673{674TU_FROM_HANDLE(tu_device, device, _device);675TU_FROM_HANDLE(tu_render_pass, pass, _pass);676677if (!_pass)678return;679680vk_free2(&device->vk.alloc, pAllocator, pass->subpass_attachments);681vk_object_free(&device->vk, pAllocator, pass);682}683684VKAPI_ATTR void VKAPI_CALL685tu_GetRenderAreaGranularity(VkDevice _device,686VkRenderPass renderPass,687VkExtent2D *pGranularity)688{689TU_FROM_HANDLE(tu_device, device, _device);690pGranularity->width = device->physical_device->info->gmem_align_w;691pGranularity->height = device->physical_device->info->gmem_align_h;692}693694uint32_t695tu_subpass_get_attachment_to_resolve(const struct tu_subpass *subpass, uint32_t index)696{697if (subpass->resolve_depth_stencil &&698index == (subpass->resolve_count - 1))699return subpass->depth_stencil_attachment.attachment;700701return subpass->color_attachments[index].attachment;702}703704705