Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_meta_copy.c
4560 views
/*1* Copyright © 2019 Raspberry Pi2*3* Permission is hereby granted, free of charge, to any person obtaining a4* copy of this software and associated documentation files (the "Software"),5* to deal in the Software without restriction, including without limitation6* the rights to use, copy, modify, merge, publish, distribute, sublicense,7* and/or sell copies of the Software, and to permit persons to whom the8* Software is furnished to do so, subject to the following conditions:9*10* The above copyright notice and this permission notice (including the next11* paragraph) shall be included in all copies or substantial portions of the12* Software.13*14* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR15* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,16* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL17* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER18* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING19* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS20* IN THE SOFTWARE.21*/2223#include "v3dv_private.h"24#include "v3dv_meta_copy.h"2526#include "compiler/nir/nir_builder.h"27#include "vk_format_info.h"28#include "util/u_pack_color.h"29#include "vulkan/util/vk_common_entrypoints.h"3031static uint32_t32meta_blit_key_hash(const void *key)33{34return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);35}3637static bool38meta_blit_key_compare(const void *key1, const void *key2)39{40return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;41}4243static bool44create_blit_pipeline_layout(struct v3dv_device *device,45VkDescriptorSetLayout *descriptor_set_layout,46VkPipelineLayout *pipeline_layout)47{48VkResult result;4950if (*descriptor_set_layout == 0) {51VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {52.binding = 0,53.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,54.descriptorCount = 1,55.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,56};57VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {58.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,59.bindingCount = 1,60.pBindings = &descriptor_set_layout_binding,61};62result =63v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),64&descriptor_set_layout_info,65&device->vk.alloc,66descriptor_set_layout);67if (result != VK_SUCCESS)68return false;69}7071assert(*pipeline_layout == 0);72VkPipelineLayoutCreateInfo pipeline_layout_info = {73.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,74.setLayoutCount = 1,75.pSetLayouts = descriptor_set_layout,76.pushConstantRangeCount = 1,77.pPushConstantRanges =78&(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },79};8081result =82v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),83&pipeline_layout_info,84&device->vk.alloc,85pipeline_layout);86return result == VK_SUCCESS;87}8889void90v3dv_meta_blit_init(struct v3dv_device *device)91{92for (uint32_t i = 0; i < 3; i++) {93device->meta.blit.cache[i] =94_mesa_hash_table_create(NULL,95meta_blit_key_hash,96meta_blit_key_compare);97}9899create_blit_pipeline_layout(device,100&device->meta.blit.ds_layout,101&device->meta.blit.p_layout);102}103104void105v3dv_meta_blit_finish(struct v3dv_device *device)106{107VkDevice _device = v3dv_device_to_handle(device);108109for (uint32_t i = 0; i < 3; i++) {110hash_table_foreach(device->meta.blit.cache[i], entry) {111struct v3dv_meta_blit_pipeline *item = entry->data;112v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);113v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);114v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);115vk_free(&device->vk.alloc, item);116}117_mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);118}119120if (device->meta.blit.p_layout) {121v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,122&device->vk.alloc);123}124125if (device->meta.blit.ds_layout) {126v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,127&device->vk.alloc);128}129}130131static uint32_t132meta_texel_buffer_copy_key_hash(const void *key)133{134return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);135}136137static bool138meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)139{140return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;141}142143static bool144create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,145VkDescriptorSetLayout *ds_layout,146VkPipelineLayout *p_layout)147{148VkResult result;149150if (*ds_layout == 0) {151VkDescriptorSetLayoutBinding ds_layout_binding = {152.binding = 0,153.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,154.descriptorCount = 1,155.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,156};157VkDescriptorSetLayoutCreateInfo ds_layout_info = {158.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,159.bindingCount = 1,160.pBindings = &ds_layout_binding,161};162result =163v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),164&ds_layout_info,165&device->vk.alloc,166ds_layout);167if (result != VK_SUCCESS)168return false;169}170171assert(*p_layout == 0);172/* FIXME: this is abusing a bit the API, since not all of our copy173* pipelines have a geometry shader. We could create 2 different pipeline174* layouts, but this works for us for now.175*/176#define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0177#define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16178#define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20179#define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24180VkPushConstantRange ranges[2] = {181{ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },182{ VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },183};184185VkPipelineLayoutCreateInfo p_layout_info = {186.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,187.setLayoutCount = 1,188.pSetLayouts = ds_layout,189.pushConstantRangeCount = 2,190.pPushConstantRanges = ranges,191};192193result =194v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),195&p_layout_info,196&device->vk.alloc,197p_layout);198return result == VK_SUCCESS;199}200201void202v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)203{204for (uint32_t i = 0; i < 3; i++) {205device->meta.texel_buffer_copy.cache[i] =206_mesa_hash_table_create(NULL,207meta_texel_buffer_copy_key_hash,208meta_texel_buffer_copy_key_compare);209}210211create_texel_buffer_copy_pipeline_layout(212device,213&device->meta.texel_buffer_copy.ds_layout,214&device->meta.texel_buffer_copy.p_layout);215}216217void218v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)219{220VkDevice _device = v3dv_device_to_handle(device);221222for (uint32_t i = 0; i < 3; i++) {223hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {224struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;225v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);226v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);227v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);228vk_free(&device->vk.alloc, item);229}230_mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);231}232233if (device->meta.texel_buffer_copy.p_layout) {234v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,235&device->vk.alloc);236}237238if (device->meta.texel_buffer_copy.ds_layout) {239v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,240&device->vk.alloc);241}242}243244static inline bool245can_use_tlb(struct v3dv_image *image,246const VkOffset3D *offset,247VkFormat *compat_format);248249/* Implements a copy using the TLB.250*251* This only works if we are copying from offset (0,0), since a TLB store for252* tile (x,y) will be written at the same tile offset into the destination.253* When this requirement is not met, we need to use a blit instead.254*255* Returns true if the implementation supports the requested operation (even if256* it failed to process it, for example, due to an out-of-memory error).257*258*/259static bool260copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,261struct v3dv_buffer *buffer,262struct v3dv_image *image,263const VkBufferImageCopy2KHR *region)264{265VkFormat fb_format;266if (!can_use_tlb(image, ®ion->imageOffset, &fb_format))267return false;268269uint32_t internal_type, internal_bpp;270v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)271(fb_format, region->imageSubresource.aspectMask,272&internal_type, &internal_bpp);273274uint32_t num_layers;275if (image->type != VK_IMAGE_TYPE_3D)276num_layers = region->imageSubresource.layerCount;277else278num_layers = region->imageExtent.depth;279assert(num_layers > 0);280281struct v3dv_job *job =282v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);283if (!job)284return true;285286/* Handle copy from compressed format using a compatible format */287const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);288const uint32_t block_h = vk_format_get_blockheight(image->vk_format);289const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);290const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);291292v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);293294struct framebuffer_data framebuffer;295v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,296&job->frame_tiling);297298v3dv_X(job->device, job_emit_binning_flush)(job);299v3dv_X(job->device, job_emit_copy_image_to_buffer_rcl)300(job, buffer, image, &framebuffer, region);301302v3dv_cmd_buffer_finish_job(cmd_buffer);303304return true;305}306307static bool308blit_shader(struct v3dv_cmd_buffer *cmd_buffer,309struct v3dv_image *dst,310VkFormat dst_format,311struct v3dv_image *src,312VkFormat src_format,313VkColorComponentFlags cmask,314VkComponentMapping *cswizzle,315const VkImageBlit2KHR *region,316VkFilter filter,317bool dst_is_padded_image);318319/**320* Returns true if the implementation supports the requested operation (even if321* it failed to process it, for example, due to an out-of-memory error).322*/323static bool324copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,325struct v3dv_buffer *buffer,326struct v3dv_image *image,327const VkBufferImageCopy2KHR *region)328{329bool handled = false;330331/* Generally, the bpp of the data in the buffer matches that of the332* source image. The exception is the case where we are copying333* stencil (8bpp) to a combined d24s8 image (32bpp).334*/335uint32_t buffer_bpp = image->cpp;336337VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;338339/* Because we are going to implement the copy as a blit, we need to create340* a linear image from the destination buffer and we also want our blit341* source and destination formats to be the same (to avoid any format342* conversions), so we choose a canonical format that matches the343* source image bpp.344*345* The exception to the above is copying from combined depth/stencil images346* because we are copying only one aspect of the image, so we need to setup347* our formats, color write mask and source swizzle mask to match that.348*/349VkFormat dst_format;350VkFormat src_format;351VkColorComponentFlags cmask = 0; /* All components */352VkComponentMapping cswizzle = {353.r = VK_COMPONENT_SWIZZLE_IDENTITY,354.g = VK_COMPONENT_SWIZZLE_IDENTITY,355.b = VK_COMPONENT_SWIZZLE_IDENTITY,356.a = VK_COMPONENT_SWIZZLE_IDENTITY,357};358switch (buffer_bpp) {359case 16:360assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);361dst_format = VK_FORMAT_R32G32B32A32_UINT;362src_format = dst_format;363break;364case 8:365assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);366dst_format = VK_FORMAT_R16G16B16A16_UINT;367src_format = dst_format;368break;369case 4:370switch (copy_aspect) {371case VK_IMAGE_ASPECT_COLOR_BIT:372src_format = VK_FORMAT_R8G8B8A8_UINT;373dst_format = VK_FORMAT_R8G8B8A8_UINT;374break;375case VK_IMAGE_ASPECT_DEPTH_BIT:376assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||377image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||378image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);379if (image->vk_format == VK_FORMAT_D32_SFLOAT) {380src_format = VK_FORMAT_R32_UINT;381dst_format = VK_FORMAT_R32_UINT;382} else {383/* We want to write depth in the buffer in the first 24-bits,384* however, the hardware has depth in bits 8-31, so swizzle the385* the source components to match what we want. Also, we don't386* want to write bits 24-31 in the destination.387*/388src_format = VK_FORMAT_R8G8B8A8_UINT;389dst_format = VK_FORMAT_R8G8B8A8_UINT;390cmask = VK_COLOR_COMPONENT_R_BIT |391VK_COLOR_COMPONENT_G_BIT |392VK_COLOR_COMPONENT_B_BIT;393cswizzle.r = VK_COMPONENT_SWIZZLE_G;394cswizzle.g = VK_COMPONENT_SWIZZLE_B;395cswizzle.b = VK_COMPONENT_SWIZZLE_A;396cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;397}398break;399case VK_IMAGE_ASPECT_STENCIL_BIT:400assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);401assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);402/* Copying from S8D24. We want to write 8-bit stencil values only,403* so adjust the buffer bpp for that. Since the hardware stores stencil404* in the LSB, we can just do a RGBA8UI to R8UI blit.405*/406src_format = VK_FORMAT_R8G8B8A8_UINT;407dst_format = VK_FORMAT_R8_UINT;408buffer_bpp = 1;409break;410default:411unreachable("unsupported aspect");412return handled;413};414break;415case 2:416assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||417copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);418dst_format = VK_FORMAT_R16_UINT;419src_format = dst_format;420break;421case 1:422assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);423dst_format = VK_FORMAT_R8_UINT;424src_format = dst_format;425break;426default:427unreachable("unsupported bit-size");428return handled;429};430431/* The hardware doesn't support linear depth/stencil stores, so we432* implement copies of depth/stencil aspect as color copies using a433* compatible color format.434*/435assert(vk_format_is_color(src_format));436assert(vk_format_is_color(dst_format));437copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;438439/* We should be able to handle the blit if we got this far */440handled = true;441442/* Obtain the 2D buffer region spec */443uint32_t buf_width, buf_height;444if (region->bufferRowLength == 0)445buf_width = region->imageExtent.width;446else447buf_width = region->bufferRowLength;448449if (region->bufferImageHeight == 0)450buf_height = region->imageExtent.height;451else452buf_height = region->bufferImageHeight;453454/* If the image is compressed, the bpp refers to blocks, not pixels */455uint32_t block_width = vk_format_get_blockwidth(image->vk_format);456uint32_t block_height = vk_format_get_blockheight(image->vk_format);457buf_width = buf_width / block_width;458buf_height = buf_height / block_height;459460/* Compute layers to copy */461uint32_t num_layers;462if (image->type != VK_IMAGE_TYPE_3D)463num_layers = region->imageSubresource.layerCount;464else465num_layers = region->imageExtent.depth;466assert(num_layers > 0);467468/* Our blit interface can see the real format of the images to detect469* copies between compressed and uncompressed images and adapt the470* blit region accordingly. Here we are just doing a raw copy of471* compressed data, but we are passing an uncompressed view of the472* buffer for the blit destination image (since compressed formats are473* not renderable), so we also want to provide an uncompressed view of474* the source image.475*/476VkResult result;477struct v3dv_device *device = cmd_buffer->device;478VkDevice _device = v3dv_device_to_handle(device);479if (vk_format_is_compressed(image->vk_format)) {480VkImage uiview;481VkImageCreateInfo uiview_info = {482.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,483.imageType = VK_IMAGE_TYPE_3D,484.format = dst_format,485.extent = { buf_width, buf_height, image->extent.depth },486.mipLevels = image->levels,487.arrayLayers = image->array_size,488.samples = image->samples,489.tiling = image->tiling,490.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,491.sharingMode = VK_SHARING_MODE_EXCLUSIVE,492.queueFamilyIndexCount = 0,493.initialLayout = VK_IMAGE_LAYOUT_GENERAL,494};495result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);496if (result != VK_SUCCESS)497return handled;498499v3dv_cmd_buffer_add_private_obj(500cmd_buffer, (uintptr_t)uiview,501(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);502503result =504vk_common_BindImageMemory(_device, uiview,505v3dv_device_memory_to_handle(image->mem),506image->mem_offset);507if (result != VK_SUCCESS)508return handled;509510image = v3dv_image_from_handle(uiview);511}512513/* Copy requested layers */514for (uint32_t i = 0; i < num_layers; i++) {515/* Create the destination blit image from the destination buffer */516VkImageCreateInfo image_info = {517.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,518.imageType = VK_IMAGE_TYPE_2D,519.format = dst_format,520.extent = { buf_width, buf_height, 1 },521.mipLevels = 1,522.arrayLayers = 1,523.samples = VK_SAMPLE_COUNT_1_BIT,524.tiling = VK_IMAGE_TILING_LINEAR,525.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,526.sharingMode = VK_SHARING_MODE_EXCLUSIVE,527.queueFamilyIndexCount = 0,528.initialLayout = VK_IMAGE_LAYOUT_GENERAL,529};530531VkImage buffer_image;532result =533v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);534if (result != VK_SUCCESS)535return handled;536537v3dv_cmd_buffer_add_private_obj(538cmd_buffer, (uintptr_t)buffer_image,539(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);540541/* Bind the buffer memory to the image */542VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +543i * buf_width * buf_height * buffer_bpp;544result =545vk_common_BindImageMemory(_device, buffer_image,546v3dv_device_memory_to_handle(buffer->mem),547buffer_offset);548if (result != VK_SUCCESS)549return handled;550551/* Blit-copy the requested image extent.552*553* Since we are copying, the blit must use the same format on the554* destination and source images to avoid format conversions. The555* only exception is copying stencil, which we upload to a R8UI source556* image, but that we need to blit to a S8D24 destination (the only557* stencil format we support).558*/559const VkImageBlit2KHR blit_region = {560.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,561.srcSubresource = {562.aspectMask = copy_aspect,563.mipLevel = region->imageSubresource.mipLevel,564.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,565.layerCount = 1,566},567.srcOffsets = {568{569DIV_ROUND_UP(region->imageOffset.x, block_width),570DIV_ROUND_UP(region->imageOffset.y, block_height),571region->imageOffset.z + i,572},573{574DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,575block_width),576DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,577block_height),578region->imageOffset.z + i + 1,579},580},581.dstSubresource = {582.aspectMask = copy_aspect,583.mipLevel = 0,584.baseArrayLayer = 0,585.layerCount = 1,586},587.dstOffsets = {588{ 0, 0, 0 },589{590DIV_ROUND_UP(region->imageExtent.width, block_width),591DIV_ROUND_UP(region->imageExtent.height, block_height),5921593},594},595};596597handled = blit_shader(cmd_buffer,598v3dv_image_from_handle(buffer_image), dst_format,599image, src_format,600cmask, &cswizzle,601&blit_region, VK_FILTER_NEAREST, false);602if (!handled) {603/* This is unexpected, we should have a supported blit spec */604unreachable("Unable to blit buffer to destination image");605return false;606}607}608609assert(handled);610return true;611}612613static VkFormat614get_compatible_tlb_format(VkFormat format)615{616switch (format) {617case VK_FORMAT_R8G8B8A8_SNORM:618return VK_FORMAT_R8G8B8A8_UINT;619620case VK_FORMAT_R8G8_SNORM:621return VK_FORMAT_R8G8_UINT;622623case VK_FORMAT_R8_SNORM:624return VK_FORMAT_R8_UINT;625626case VK_FORMAT_A8B8G8R8_SNORM_PACK32:627return VK_FORMAT_A8B8G8R8_UINT_PACK32;628629case VK_FORMAT_R16_UNORM:630case VK_FORMAT_R16_SNORM:631return VK_FORMAT_R16_UINT;632633case VK_FORMAT_R16G16_UNORM:634case VK_FORMAT_R16G16_SNORM:635return VK_FORMAT_R16G16_UINT;636637case VK_FORMAT_R16G16B16A16_UNORM:638case VK_FORMAT_R16G16B16A16_SNORM:639return VK_FORMAT_R16G16B16A16_UINT;640641case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:642return VK_FORMAT_R32_SFLOAT;643644/* We can't render to compressed formats using the TLB so instead we use645* a compatible format with the same bpp as the compressed format. Because646* the compressed format's bpp is for a full block (i.e. 4x4 pixels in the647* case of ETC), when we implement copies with the compatible format we648* will have to divide offsets and dimensions on the compressed image by649* the compressed block size.650*/651case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:652case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:653case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:654case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:655case VK_FORMAT_BC2_UNORM_BLOCK:656case VK_FORMAT_BC2_SRGB_BLOCK:657case VK_FORMAT_BC3_SRGB_BLOCK:658case VK_FORMAT_BC3_UNORM_BLOCK:659case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:660case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:661case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:662case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:663case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:664case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:665case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:666case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:667case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:668case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:669case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:670case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:671case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:672case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:673case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:674case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:675case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:676case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:677case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:678case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:679case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:680case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:681case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:682case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:683case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:684case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:685case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:686case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:687return VK_FORMAT_R32G32B32A32_UINT;688689case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:690case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:691case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:692case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:693case VK_FORMAT_EAC_R11_UNORM_BLOCK:694case VK_FORMAT_EAC_R11_SNORM_BLOCK:695case VK_FORMAT_BC1_RGB_UNORM_BLOCK:696case VK_FORMAT_BC1_RGB_SRGB_BLOCK:697case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:698case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:699return VK_FORMAT_R16G16B16A16_UINT;700701default:702return VK_FORMAT_UNDEFINED;703}704}705706static inline bool707can_use_tlb(struct v3dv_image *image,708const VkOffset3D *offset,709VkFormat *compat_format)710{711if (offset->x != 0 || offset->y != 0)712return false;713714if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {715if (compat_format)716*compat_format = image->vk_format;717return true;718}719720/* If the image format is not TLB-supported, then check if we can use721* a compatible format instead.722*/723if (compat_format) {724*compat_format = get_compatible_tlb_format(image->vk_format);725if (*compat_format != VK_FORMAT_UNDEFINED)726return true;727}728729return false;730}731732VKAPI_ATTR void VKAPI_CALL733v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,734const VkCopyImageToBufferInfo2KHR *info)735736{737V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);738V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);739V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);740741assert(image->samples == VK_SAMPLE_COUNT_1_BIT);742743for (uint32_t i = 0; i < info->regionCount; i++) {744if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i]))745continue;746if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i]))747continue;748unreachable("Unsupported image to buffer copy.");749}750}751752/**753* Returns true if the implementation supports the requested operation (even if754* it failed to process it, for example, due to an out-of-memory error).755*/756static bool757copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,758struct v3dv_image *dst,759struct v3dv_image *src,760const VkImageCopy2KHR *region)761{762/* Destination can't be raster format */763if (dst->tiling == VK_IMAGE_TILING_LINEAR)764return false;765766/* We can only do full copies, so if the format is D24S8 both aspects need767* to be copied. We only need to check the dst format because the spec768* states that depth/stencil formats must match exactly.769*/770if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {771const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |772VK_IMAGE_ASPECT_STENCIL_BIT;773if (region->dstSubresource.aspectMask != ds_aspects)774return false;775}776777/* Don't handle copies between uncompressed and compressed formats for now.778*779* FIXME: we should be able to handle these easily but there is no coverage780* in CTS at the moment that make such copies with full images (which we781* require here), only partial copies. Also, in that case the code below that782* checks for "dst image complete" requires some changes, since it is783* checking against the region dimensions, which are in units of the source784* image format.785*/786if (vk_format_is_compressed(dst->vk_format) !=787vk_format_is_compressed(src->vk_format)) {788return false;789}790791/* Source region must start at (0,0) */792if (region->srcOffset.x != 0 || region->srcOffset.y != 0)793return false;794795/* Destination image must be complete */796if (region->dstOffset.x != 0 || region->dstOffset.y != 0)797return false;798799const uint32_t dst_mip_level = region->dstSubresource.mipLevel;800uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);801uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);802if (region->extent.width != dst_width || region->extent.height != dst_height)803return false;804805/* From vkCmdCopyImage:806*807* "When copying between compressed and uncompressed formats the extent808* members represent the texel dimensions of the source image and not809* the destination."810*/811const uint32_t block_w = vk_format_get_blockwidth(src->vk_format);812const uint32_t block_h = vk_format_get_blockheight(src->vk_format);813uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);814uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);815816/* Account for sample count */817assert(dst->samples == src->samples);818if (dst->samples > VK_SAMPLE_COUNT_1_BIT) {819assert(dst->samples == VK_SAMPLE_COUNT_4_BIT);820width *= 2;821height *= 2;822}823824/* The TFU unit doesn't handle format conversions so we need the formats to825* match. On the other hand, vkCmdCopyImage allows different color formats826* on the source and destination images, but only if they are texel827* compatible. For us, this means that we can effectively ignore different828* formats and just make the copy using either of them, since we are just829* moving raw data and not making any conversions.830*831* Also, the formats supported by the TFU unit are limited, but again, since832* we are only doing raw copies here without interpreting or converting833* the underlying pixel data according to its format, we can always choose834* to use compatible formats that are supported with the TFU unit.835*/836assert(dst->cpp == src->cpp);837const struct v3dv_format *format =838v3dv_get_compatible_tfu_format(cmd_buffer->device,839dst->cpp, NULL);840841/* Emit a TFU job for each layer to blit */842const uint32_t layer_count = dst->type != VK_IMAGE_TYPE_3D ?843region->dstSubresource.layerCount :844region->extent.depth;845const uint32_t src_mip_level = region->srcSubresource.mipLevel;846847const uint32_t base_src_layer = src->type != VK_IMAGE_TYPE_3D ?848region->srcSubresource.baseArrayLayer : region->srcOffset.z;849const uint32_t base_dst_layer = dst->type != VK_IMAGE_TYPE_3D ?850region->dstSubresource.baseArrayLayer : region->dstOffset.z;851for (uint32_t i = 0; i < layer_count; i++) {852v3dv_X(cmd_buffer->device, cmd_buffer_emit_tfu_job)853(cmd_buffer, dst, dst_mip_level, base_dst_layer + i,854src, src_mip_level, base_src_layer + i,855width, height, format);856}857858return true;859}860861/**862* Returns true if the implementation supports the requested operation (even if863* it failed to process it, for example, due to an out-of-memory error).864*/865static bool866copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,867struct v3dv_image *dst,868struct v3dv_image *src,869const VkImageCopy2KHR *region)870{871VkFormat fb_format;872if (!can_use_tlb(src, ®ion->srcOffset, &fb_format) ||873!can_use_tlb(dst, ®ion->dstOffset, &fb_format)) {874return false;875}876877/* From the Vulkan spec, VkImageCopy valid usage:878*879* "If neither the calling command’s srcImage nor the calling command’s880* dstImage has a multi-planar image format then the aspectMask member881* of srcSubresource and dstSubresource must match."882*/883assert(region->dstSubresource.aspectMask ==884region->srcSubresource.aspectMask);885uint32_t internal_type, internal_bpp;886v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)887(fb_format, region->dstSubresource.aspectMask,888&internal_type, &internal_bpp);889890/* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:891*892* "The number of slices of the extent (for 3D) or layers of the893* srcSubresource (for non-3D) must match the number of slices of the894* extent (for 3D) or layers of the dstSubresource (for non-3D)."895*/896assert((src->type != VK_IMAGE_TYPE_3D ?897region->srcSubresource.layerCount : region->extent.depth) ==898(dst->type != VK_IMAGE_TYPE_3D ?899region->dstSubresource.layerCount : region->extent.depth));900uint32_t num_layers;901if (dst->type != VK_IMAGE_TYPE_3D)902num_layers = region->dstSubresource.layerCount;903else904num_layers = region->extent.depth;905assert(num_layers > 0);906907struct v3dv_job *job =908v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);909if (!job)910return true;911912/* Handle copy to compressed image using compatible format */913const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);914const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);915const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);916const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);917918v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp,919src->samples > VK_SAMPLE_COUNT_1_BIT);920921struct framebuffer_data framebuffer;922v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,923&job->frame_tiling);924925v3dv_X(job->device, job_emit_binning_flush)(job);926v3dv_X(job->device, job_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);927928v3dv_cmd_buffer_finish_job(cmd_buffer);929930return true;931}932933/**934* Takes the image provided as argument and creates a new image that has935* the same specification and aliases the same memory storage, except that:936*937* - It has the uncompressed format passed in.938* - Its original width/height are scaled by the factors passed in.939*940* This is useful to implement copies from compressed images using the blit941* path. The idea is that we create uncompressed "image views" of both the942* source and destination images using the uncompressed format and then we943* define the copy blit in terms of that format.944*/945static struct v3dv_image *946create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,947struct v3dv_image *src,948float width_scale,949float height_scale,950VkFormat format)951{952assert(!vk_format_is_compressed(format));953954VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);955956VkImageCreateInfo info = {957.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,958.imageType = src->type,959.format = format,960.extent = {961.width = src->extent.width * width_scale,962.height = src->extent.height * height_scale,963.depth = src->extent.depth,964},965.mipLevels = src->levels,966.arrayLayers = src->array_size,967.samples = src->samples,968.tiling = src->tiling,969.usage = src->usage,970};971972VkImage _image;973VkResult result =974v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);975if (result != VK_SUCCESS) {976v3dv_flag_oom(cmd_buffer, NULL);977return NULL;978}979980struct v3dv_image *image = v3dv_image_from_handle(_image);981image->mem = src->mem;982image->mem_offset = src->mem_offset;983return image;984}985986/**987* Returns true if the implementation supports the requested operation (even if988* it failed to process it, for example, due to an out-of-memory error).989*/990static bool991copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,992struct v3dv_image *dst,993struct v3dv_image *src,994const VkImageCopy2KHR *region)995{996const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);997const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);998const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);999const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);1000const float block_scale_w = (float)src_block_w / (float)dst_block_w;1001const float block_scale_h = (float)src_block_h / (float)dst_block_h;10021003/* We need to choose a single format for the blit to ensure that this is1004* really a copy and there are not format conversions going on. Since we1005* going to blit, we need to make sure that the selected format can be1006* both rendered to and textured from.1007*/1008VkFormat format;1009float src_scale_w = 1.0f;1010float src_scale_h = 1.0f;1011float dst_scale_w = block_scale_w;1012float dst_scale_h = block_scale_h;1013if (vk_format_is_compressed(src->vk_format)) {1014/* If we are copying from a compressed format we should be aware that we1015* are going to texture from the source image, and the texture setup1016* knows the actual size of the image, so we need to choose a format1017* that has a per-texel (not per-block) bpp that is compatible for that1018* image size. For example, for a source image with size Bw*WxBh*H1019* and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,1020* each of the Bw*WxBh*H texels in the compressed source image is 8-bit1021* (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),1022* so we could specify a blit with size Bw*WxBh*H and a format with1023* a bpp of 8-bit per texel (R8_UINT).1024*1025* Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,1026* which is 64-bit per texel, then we would need a 4-bit format, which1027* we don't have, so instead we still choose an 8-bit format, but we1028* apply a divisor to the row dimensions of the blit, since we are1029* copying two texels per item.1030*1031* Generally, we can choose any format so long as we compute appropriate1032* divisors for the width and height depending on the source image's1033* bpp.1034*/1035assert(src->cpp == dst->cpp);10361037format = VK_FORMAT_R32G32_UINT;1038switch (src->cpp) {1039case 16:1040format = VK_FORMAT_R32G32B32A32_UINT;1041break;1042case 8:1043format = VK_FORMAT_R16G16B16A16_UINT;1044break;1045default:1046unreachable("Unsupported compressed format");1047}10481049/* Create image views of the src/dst images that we can interpret in1050* terms of the canonical format.1051*/1052src_scale_w /= src_block_w;1053src_scale_h /= src_block_h;1054dst_scale_w /= src_block_w;1055dst_scale_h /= src_block_h;10561057src = create_image_alias(cmd_buffer, src,1058src_scale_w, src_scale_h, format);10591060dst = create_image_alias(cmd_buffer, dst,1061dst_scale_w, dst_scale_h, format);1062} else {1063format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?1064src->vk_format : get_compatible_tlb_format(src->vk_format);1065if (format == VK_FORMAT_UNDEFINED)1066return false;10671068const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);1069if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)1070return false;1071}10721073/* Given an uncompressed image with size WxH, if we copy it to a compressed1074* image, it will result in an image with size W*bWxH*bH, where bW and bH1075* are the compressed format's block width and height. This means that1076* copies between compressed and uncompressed images involve different1077* image sizes, and therefore, we need to take that into account when1078* setting up the source and destination blit regions below, so they are1079* consistent from the point of view of the single compatible format1080* selected for the copy.1081*1082* We should take into account that the dimensions of the region provided1083* to the copy command are specified in terms of the source image. With that1084* in mind, below we adjust the blit destination region to be consistent with1085* the source region for the compatible format, so basically, we apply1086* the block scale factor to the destination offset provided by the copy1087* command (because it is specified in terms of the destination image, not1088* the source), and then we just add the region copy dimensions to that1089* (since the region dimensions are already specified in terms of the source1090* image).1091*/1092const VkOffset3D src_start = {1093region->srcOffset.x * src_scale_w,1094region->srcOffset.y * src_scale_h,1095region->srcOffset.z,1096};1097const VkOffset3D src_end = {1098src_start.x + region->extent.width * src_scale_w,1099src_start.y + region->extent.height * src_scale_h,1100src_start.z + region->extent.depth,1101};11021103const VkOffset3D dst_start = {1104region->dstOffset.x * dst_scale_w,1105region->dstOffset.y * dst_scale_h,1106region->dstOffset.z,1107};1108const VkOffset3D dst_end = {1109dst_start.x + region->extent.width * src_scale_w,1110dst_start.y + region->extent.height * src_scale_h,1111dst_start.z + region->extent.depth,1112};11131114const VkImageBlit2KHR blit_region = {1115.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,1116.srcSubresource = region->srcSubresource,1117.srcOffsets = { src_start, src_end },1118.dstSubresource = region->dstSubresource,1119.dstOffsets = { dst_start, dst_end },1120};1121bool handled = blit_shader(cmd_buffer,1122dst, format,1123src, format,11240, NULL,1125&blit_region, VK_FILTER_NEAREST, true);11261127/* We should have selected formats that we can blit */1128assert(handled);1129return handled;1130}11311132VKAPI_ATTR void VKAPI_CALL1133v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,1134const VkCopyImageInfo2KHR *info)11351136{1137V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);1138V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);1139V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);11401141assert(src->samples == dst->samples);11421143for (uint32_t i = 0; i < info->regionCount; i++) {1144if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i]))1145continue;1146if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))1147continue;1148if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))1149continue;1150unreachable("Image copy not supported");1151}1152}11531154static void1155get_hw_clear_color(struct v3dv_device *device,1156const VkClearColorValue *color,1157VkFormat fb_format,1158VkFormat image_format,1159uint32_t internal_type,1160uint32_t internal_bpp,1161uint32_t *hw_color)1162{1163const uint32_t internal_size = 4 << internal_bpp;11641165/* If the image format doesn't match the framebuffer format, then we are1166* trying to clear an unsupported tlb format using a compatible1167* format for the framebuffer. In this case, we want to make sure that1168* we pack the clear value according to the original format semantics,1169* not the compatible format.1170*/1171if (fb_format == image_format) {1172v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size, hw_color);1173} else {1174union util_color uc;1175enum pipe_format pipe_image_format =1176vk_format_to_pipe_format(image_format);1177util_pack_color(color->float32, pipe_image_format, &uc);1178memcpy(hw_color, uc.ui, internal_size);1179}1180}11811182/* Returns true if the implementation is able to handle the case, false1183* otherwise.1184*/1185static bool1186clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,1187struct v3dv_image *image,1188const VkClearValue *clear_value,1189const VkImageSubresourceRange *range)1190{1191const VkOffset3D origin = { 0, 0, 0 };1192VkFormat fb_format;1193if (!can_use_tlb(image, &origin, &fb_format))1194return false;11951196uint32_t internal_type, internal_bpp;1197v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)1198(fb_format, range->aspectMask,1199&internal_type, &internal_bpp);12001201union v3dv_clear_value hw_clear_value = { 0 };1202if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {1203get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,1204image->vk_format, internal_type, internal_bpp,1205&hw_clear_value.color[0]);1206} else {1207assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||1208(range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));1209hw_clear_value.z = clear_value->depthStencil.depth;1210hw_clear_value.s = clear_value->depthStencil.stencil;1211}12121213uint32_t level_count = range->levelCount == VK_REMAINING_MIP_LEVELS ?1214image->levels - range->baseMipLevel :1215range->levelCount;1216uint32_t min_level = range->baseMipLevel;1217uint32_t max_level = range->baseMipLevel + level_count;12181219/* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.1220* Instead, we need to consider the full depth dimension of the image, which1221* goes from 0 up to the level's depth extent.1222*/1223uint32_t min_layer;1224uint32_t max_layer;1225if (image->type != VK_IMAGE_TYPE_3D) {1226uint32_t layer_count = range->layerCount == VK_REMAINING_ARRAY_LAYERS ?1227image->array_size - range->baseArrayLayer :1228range->layerCount;1229min_layer = range->baseArrayLayer;1230max_layer = range->baseArrayLayer + layer_count;1231} else {1232min_layer = 0;1233max_layer = 0;1234}12351236for (uint32_t level = min_level; level < max_level; level++) {1237if (image->type == VK_IMAGE_TYPE_3D)1238max_layer = u_minify(image->extent.depth, level);1239for (uint32_t layer = min_layer; layer < max_layer; layer++) {1240uint32_t width = u_minify(image->extent.width, level);1241uint32_t height = u_minify(image->extent.height, level);12421243struct v3dv_job *job =1244v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);12451246if (!job)1247return true;12481249/* We start a a new job for each layer so the frame "depth" is 1 */1250v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp,1251image->samples > VK_SAMPLE_COUNT_1_BIT);12521253struct framebuffer_data framebuffer;1254v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,1255&job->frame_tiling);12561257v3dv_X(job->device, job_emit_binning_flush)(job);12581259/* If this triggers it is an application bug: the spec requires1260* that any aspects to clear are present in the image.1261*/1262assert(range->aspectMask & image->aspects);12631264v3dv_X(job->device, job_emit_clear_image_rcl)1265(job, image, &framebuffer, &hw_clear_value,1266range->aspectMask, layer, level);12671268v3dv_cmd_buffer_finish_job(cmd_buffer);1269}1270}12711272return true;1273}12741275VKAPI_ATTR void VKAPI_CALL1276v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,1277VkImage _image,1278VkImageLayout imageLayout,1279const VkClearColorValue *pColor,1280uint32_t rangeCount,1281const VkImageSubresourceRange *pRanges)1282{1283V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);1284V3DV_FROM_HANDLE(v3dv_image, image, _image);12851286const VkClearValue clear_value = {1287.color = *pColor,1288};12891290for (uint32_t i = 0; i < rangeCount; i++) {1291if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))1292continue;1293unreachable("Unsupported color clear.");1294}1295}12961297VKAPI_ATTR void VKAPI_CALL1298v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,1299VkImage _image,1300VkImageLayout imageLayout,1301const VkClearDepthStencilValue *pDepthStencil,1302uint32_t rangeCount,1303const VkImageSubresourceRange *pRanges)1304{1305V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);1306V3DV_FROM_HANDLE(v3dv_image, image, _image);13071308const VkClearValue clear_value = {1309.depthStencil = *pDepthStencil,1310};13111312for (uint32_t i = 0; i < rangeCount; i++) {1313if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))1314continue;1315unreachable("Unsupported depth/stencil clear.");1316}1317}13181319VKAPI_ATTR void VKAPI_CALL1320v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,1321const VkCopyBufferInfo2KHR *pCopyBufferInfo)1322{1323V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);1324V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);1325V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);13261327for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {1328v3dv_X(cmd_buffer->device, cmd_buffer_copy_buffer)1329(cmd_buffer,1330dst_buffer->mem->bo, dst_buffer->mem_offset,1331src_buffer->mem->bo, src_buffer->mem_offset,1332&pCopyBufferInfo->pRegions[i]);1333}1334}13351336static void1337destroy_update_buffer_cb(VkDevice _device,1338uint64_t pobj,1339VkAllocationCallbacks *alloc)1340{1341V3DV_FROM_HANDLE(v3dv_device, device, _device);1342struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);1343v3dv_bo_free(device, bo);1344}13451346VKAPI_ATTR void VKAPI_CALL1347v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,1348VkBuffer dstBuffer,1349VkDeviceSize dstOffset,1350VkDeviceSize dataSize,1351const void *pData)1352{1353V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);1354V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);13551356struct v3dv_bo *src_bo =1357v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);1358if (!src_bo) {1359fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");1360return;1361}13621363bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);1364if (!ok) {1365fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");1366return;1367}13681369memcpy(src_bo->map, pData, dataSize);13701371v3dv_bo_unmap(cmd_buffer->device, src_bo);13721373VkBufferCopy2KHR region = {1374.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR,1375.srcOffset = 0,1376.dstOffset = dstOffset,1377.size = dataSize,1378};1379struct v3dv_job *copy_job =1380v3dv_X(cmd_buffer->device, cmd_buffer_copy_buffer)1381(cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,1382src_bo, 0, ®ion);13831384if (!copy_job)1385return;13861387v3dv_cmd_buffer_add_private_obj(1388cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);1389}13901391VKAPI_ATTR void VKAPI_CALL1392v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,1393VkBuffer dstBuffer,1394VkDeviceSize dstOffset,1395VkDeviceSize size,1396uint32_t data)1397{1398V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);1399V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);14001401struct v3dv_bo *bo = dst_buffer->mem->bo;14021403/* From the Vulkan spec:1404*1405* "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not1406* a multiple of 4, then the nearest smaller multiple is used."1407*/1408if (size == VK_WHOLE_SIZE) {1409size = dst_buffer->size - dstOffset;1410size -= size % 4;1411}14121413v3dv_X(cmd_buffer->device, cmd_buffer_fill_buffer)1414(cmd_buffer, bo, dstOffset, size, data);1415}14161417/**1418* Returns true if the implementation supports the requested operation (even if1419* it failed to process it, for example, due to an out-of-memory error).1420*/1421static bool1422copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,1423struct v3dv_image *image,1424struct v3dv_buffer *buffer,1425const VkBufferImageCopy2KHR *region)1426{1427assert(image->samples == VK_SAMPLE_COUNT_1_BIT);14281429/* Destination can't be raster format */1430if (image->tiling == VK_IMAGE_TILING_LINEAR)1431return false;14321433/* We can't copy D24S8 because buffer to image copies only copy one aspect1434* at a time, and the TFU copies full images. Also, V3D depth bits for1435* both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but1436* the Vulkan spec has the buffer data specified the other way around, so it1437* is not a straight copy, we would havew to swizzle the channels, which the1438* TFU can't do.1439*/1440if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||1441image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) {1442return false;1443}14441445/* Region must include full slice */1446const uint32_t offset_x = region->imageOffset.x;1447const uint32_t offset_y = region->imageOffset.y;1448if (offset_x != 0 || offset_y != 0)1449return false;14501451uint32_t width, height;1452if (region->bufferRowLength == 0)1453width = region->imageExtent.width;1454else1455width = region->bufferRowLength;14561457if (region->bufferImageHeight == 0)1458height = region->imageExtent.height;1459else1460height = region->bufferImageHeight;14611462if (width != image->extent.width || height != image->extent.height)1463return false;14641465/* Handle region semantics for compressed images */1466const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);1467const uint32_t block_h = vk_format_get_blockheight(image->vk_format);1468width = DIV_ROUND_UP(width, block_w);1469height = DIV_ROUND_UP(height, block_h);14701471/* Format must be supported for texturing via the TFU. Since we are just1472* copying raw data and not converting between pixel formats, we can ignore1473* the image's format and choose a compatible TFU format for the image1474* texel size instead, which expands the list of formats we can handle here.1475*/1476const struct v3dv_format *format =1477v3dv_get_compatible_tfu_format(cmd_buffer->device,1478image->cpp, NULL);14791480const uint32_t mip_level = region->imageSubresource.mipLevel;1481const struct v3d_resource_slice *slice = &image->slices[mip_level];14821483uint32_t num_layers;1484if (image->type != VK_IMAGE_TYPE_3D)1485num_layers = region->imageSubresource.layerCount;1486else1487num_layers = region->imageExtent.depth;1488assert(num_layers > 0);14891490assert(image->mem && image->mem->bo);1491const struct v3dv_bo *dst_bo = image->mem->bo;14921493assert(buffer->mem && buffer->mem->bo);1494const struct v3dv_bo *src_bo = buffer->mem->bo;14951496/* Emit a TFU job per layer to copy */1497const uint32_t buffer_stride = width * image->cpp;1498for (int i = 0; i < num_layers; i++) {1499uint32_t layer;1500if (image->type != VK_IMAGE_TYPE_3D)1501layer = region->imageSubresource.baseArrayLayer + i;1502else1503layer = region->imageOffset.z + i;15041505struct drm_v3d_submit_tfu tfu = {1506.ios = (height << 16) | width,1507.bo_handles = {1508dst_bo->handle,1509src_bo->handle != dst_bo->handle ? src_bo->handle : 01510},1511};15121513const uint32_t buffer_offset =1514buffer->mem_offset + region->bufferOffset +1515height * buffer_stride * i;15161517const uint32_t src_offset = src_bo->offset + buffer_offset;1518tfu.iia |= src_offset;1519tfu.icfg |= V3D_TFU_ICFG_FORMAT_RASTER << V3D_TFU_ICFG_FORMAT_SHIFT;1520tfu.iis |= width;15211522const uint32_t dst_offset =1523dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);1524tfu.ioa |= dst_offset;15251526tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +1527(slice->tiling - V3D_TILING_LINEARTILE)) <<1528V3D_TFU_IOA_FORMAT_SHIFT;1529tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;15301531/* If we're writing level 0 (!IOA_DIMTW), then we need to supply the1532* OPAD field for the destination (how many extra UIF blocks beyond1533* those necessary to cover the height).1534*/1535if (slice->tiling == V3D_TILING_UIF_NO_XOR ||1536slice->tiling == V3D_TILING_UIF_XOR) {1537uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp);1538uint32_t implicit_padded_height = align(height, uif_block_h);1539uint32_t icfg =1540(slice->padded_height - implicit_padded_height) / uif_block_h;1541tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;1542}15431544v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);1545}15461547return true;1548}15491550/**1551* Returns true if the implementation supports the requested operation (even if1552* it failed to process it, for example, due to an out-of-memory error).1553*/1554static bool1555copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,1556struct v3dv_image *image,1557struct v3dv_buffer *buffer,1558const VkBufferImageCopy2KHR *region)1559{1560VkFormat fb_format;1561if (!can_use_tlb(image, ®ion->imageOffset, &fb_format))1562return false;15631564uint32_t internal_type, internal_bpp;1565v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)1566(fb_format, region->imageSubresource.aspectMask,1567&internal_type, &internal_bpp);15681569uint32_t num_layers;1570if (image->type != VK_IMAGE_TYPE_3D)1571num_layers = region->imageSubresource.layerCount;1572else1573num_layers = region->imageExtent.depth;1574assert(num_layers > 0);15751576struct v3dv_job *job =1577v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);1578if (!job)1579return true;15801581/* Handle copy to compressed format using a compatible format */1582const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);1583const uint32_t block_h = vk_format_get_blockheight(image->vk_format);1584const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);1585const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);15861587v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);15881589struct framebuffer_data framebuffer;1590v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,1591&job->frame_tiling);15921593v3dv_X(job->device, job_emit_binning_flush)(job);1594v3dv_X(job->device, job_emit_copy_buffer_to_image_rcl)1595(job, image, buffer, &framebuffer, region);15961597v3dv_cmd_buffer_finish_job(cmd_buffer);15981599return true;1600}16011602static bool1603create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,1604struct v3dv_image *image,1605struct v3dv_buffer *buffer,1606const VkBufferImageCopy2KHR *region)1607{1608if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))1609return true;1610if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))1611return true;1612return false;1613}16141615static VkResult1616create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)1617{1618/* If this is not the first pool we create for this command buffer1619* size it based on the size of the currently exhausted pool.1620*/1621uint32_t descriptor_count = 64;1622if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {1623struct v3dv_descriptor_pool *exhausted_pool =1624v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);1625descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);1626}16271628/* Create the descriptor pool */1629cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;1630VkDescriptorPoolSize pool_size = {1631.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,1632.descriptorCount = descriptor_count,1633};1634VkDescriptorPoolCreateInfo info = {1635.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,1636.maxSets = descriptor_count,1637.poolSizeCount = 1,1638.pPoolSizes = &pool_size,1639.flags = 0,1640};1641VkResult result =1642v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),1643&info,1644&cmd_buffer->device->vk.alloc,1645&cmd_buffer->meta.texel_buffer_copy.dspool);16461647if (result == VK_SUCCESS) {1648assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);1649const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;16501651v3dv_cmd_buffer_add_private_obj(1652cmd_buffer, (uintptr_t) _pool,1653(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);16541655struct v3dv_descriptor_pool *pool =1656v3dv_descriptor_pool_from_handle(_pool);1657pool->is_driver_internal = true;1658}16591660return result;1661}16621663static VkResult1664allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,1665VkDescriptorSet *set)1666{1667/* Make sure we have a descriptor pool */1668VkResult result;1669if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {1670result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);1671if (result != VK_SUCCESS)1672return result;1673}1674assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);16751676/* Allocate descriptor set */1677struct v3dv_device *device = cmd_buffer->device;1678VkDevice _device = v3dv_device_to_handle(device);1679VkDescriptorSetAllocateInfo info = {1680.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,1681.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,1682.descriptorSetCount = 1,1683.pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,1684};1685result = v3dv_AllocateDescriptorSets(_device, &info, set);16861687/* If we ran out of pool space, grow the pool and try again */1688if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {1689result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);1690if (result == VK_SUCCESS) {1691info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;1692result = v3dv_AllocateDescriptorSets(_device, &info, set);1693}1694}16951696return result;1697}16981699static void1700get_texel_buffer_copy_pipeline_cache_key(VkFormat format,1701VkColorComponentFlags cmask,1702VkComponentMapping *cswizzle,1703bool is_layered,1704uint8_t *key)1705{1706memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);17071708uint32_t *p = (uint32_t *) key;17091710*p = format;1711p++;17121713*p = cmask;1714p++;17151716/* Note that that we are using a single byte for this, so we could pack1717* more data into this 32-bit slot in the future.1718*/1719*p = is_layered ? 1 : 0;1720p++;17211722memcpy(p, cswizzle, sizeof(VkComponentMapping));1723p += sizeof(VkComponentMapping) / sizeof(uint32_t);17241725assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);1726}17271728static bool1729create_blit_render_pass(struct v3dv_device *device,1730VkFormat dst_format,1731VkFormat src_format,1732VkRenderPass *pass_load,1733VkRenderPass *pass_no_load);17341735static nir_ssa_def *gen_rect_vertices(nir_builder *b);17361737static bool1738create_pipeline(struct v3dv_device *device,1739struct v3dv_render_pass *pass,1740struct nir_shader *vs_nir,1741struct nir_shader *gs_nir,1742struct nir_shader *fs_nir,1743const VkPipelineVertexInputStateCreateInfo *vi_state,1744const VkPipelineDepthStencilStateCreateInfo *ds_state,1745const VkPipelineColorBlendStateCreateInfo *cb_state,1746const VkPipelineMultisampleStateCreateInfo *ms_state,1747const VkPipelineLayout layout,1748VkPipeline *pipeline);17491750static nir_shader *1751get_texel_buffer_copy_vs()1752{1753const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();1754nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,1755"meta texel buffer copy vs");1756nir_variable *vs_out_pos =1757nir_variable_create(b.shader, nir_var_shader_out,1758glsl_vec4_type(), "gl_Position");1759vs_out_pos->data.location = VARYING_SLOT_POS;17601761nir_ssa_def *pos = gen_rect_vertices(&b);1762nir_store_var(&b, vs_out_pos, pos, 0xf);17631764return b.shader;1765}17661767static nir_shader *1768get_texel_buffer_copy_gs()1769{1770/* FIXME: this creates a geometry shader that takes the index of a single1771* layer to clear from push constants, so we need to emit a draw call for1772* each layer that we want to clear. We could actually do better and have it1773* take a range of layers however, if we were to do this, we would need to1774* be careful not to exceed the maximum number of output vertices allowed in1775* a geometry shader.1776*/1777const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();1778nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,1779"meta texel buffer copy gs");1780nir_shader *nir = b.shader;1781nir->info.inputs_read = 1ull << VARYING_SLOT_POS;1782nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |1783(1ull << VARYING_SLOT_LAYER);1784nir->info.gs.input_primitive = GL_TRIANGLES;1785nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;1786nir->info.gs.vertices_in = 3;1787nir->info.gs.vertices_out = 3;1788nir->info.gs.invocations = 1;1789nir->info.gs.active_stream_mask = 0x1;17901791/* in vec4 gl_Position[3] */1792nir_variable *gs_in_pos =1793nir_variable_create(b.shader, nir_var_shader_in,1794glsl_array_type(glsl_vec4_type(), 3, 0),1795"in_gl_Position");1796gs_in_pos->data.location = VARYING_SLOT_POS;17971798/* out vec4 gl_Position */1799nir_variable *gs_out_pos =1800nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),1801"out_gl_Position");1802gs_out_pos->data.location = VARYING_SLOT_POS;18031804/* out float gl_Layer */1805nir_variable *gs_out_layer =1806nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),1807"out_gl_Layer");1808gs_out_layer->data.location = VARYING_SLOT_LAYER;18091810/* Emit output triangle */1811for (uint32_t i = 0; i < 3; i++) {1812/* gl_Position from shader input */1813nir_deref_instr *in_pos_i =1814nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);1815nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);18161817/* gl_Layer from push constants */1818nir_ssa_def *layer =1819nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),1820.base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,1821.range = 4);1822nir_store_var(&b, gs_out_layer, layer, 0x1);18231824nir_emit_vertex(&b, 0);1825}18261827nir_end_primitive(&b, 0);18281829return nir;1830}18311832static nir_ssa_def *1833load_frag_coord(nir_builder *b)1834{1835nir_foreach_shader_in_variable(var, b->shader) {1836if (var->data.location == VARYING_SLOT_POS)1837return nir_load_var(b, var);1838}1839nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,1840glsl_vec4_type(), NULL);1841pos->data.location = VARYING_SLOT_POS;1842return nir_load_var(b, pos);1843}18441845static uint32_t1846component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)1847{1848if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)1849swz = comp;18501851switch (swz) {1852case VK_COMPONENT_SWIZZLE_R:1853return 0;1854case VK_COMPONENT_SWIZZLE_G:1855return 1;1856case VK_COMPONENT_SWIZZLE_B:1857return 2;1858case VK_COMPONENT_SWIZZLE_A:1859return 3;1860default:1861unreachable("Invalid swizzle");1862};1863}18641865static nir_shader *1866get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,1867VkComponentMapping *cswizzle)1868{1869const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();1870nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,1871"meta texel buffer copy fs");18721873/* We only use the copy from texel buffer shader to implement1874* copy_buffer_to_image_shader, which always selects a compatible integer1875* format for the copy.1876*/1877assert(vk_format_is_int(format));18781879/* Fragment shader output color */1880nir_variable *fs_out_color =1881nir_variable_create(b.shader, nir_var_shader_out,1882glsl_uvec4_type(), "out_color");1883fs_out_color->data.location = FRAG_RESULT_DATA0;18841885/* Texel buffer input */1886const struct glsl_type *sampler_type =1887glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);1888nir_variable *sampler =1889nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");1890sampler->data.descriptor_set = 0;1891sampler->data.binding = 0;18921893/* Load the box describing the pixel region we want to copy from the1894* texel buffer.1895*/1896nir_ssa_def *box =1897nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),1898.base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,1899.range = 16);19001901/* Load the buffer stride (this comes in texel units) */1902nir_ssa_def *stride =1903nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),1904.base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,1905.range = 4);19061907/* Load the buffer offset (this comes in texel units) */1908nir_ssa_def *offset =1909nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),1910.base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,1911.range = 4);19121913nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));19141915/* Load pixel data from texel buffer based on the x,y offset of the pixel1916* within the box. Texel buffers are 1D arrays of texels.1917*1918* Notice that we already make sure that we only generate fragments that are1919* inside the box through the scissor/viewport state, so our offset into the1920* texel buffer should always be within its bounds and we we don't need1921* to add a check for that here.1922*/1923nir_ssa_def *x_offset =1924nir_isub(&b, nir_channel(&b, coord, 0),1925nir_channel(&b, box, 0));1926nir_ssa_def *y_offset =1927nir_isub(&b, nir_channel(&b, coord, 1),1928nir_channel(&b, box, 1));1929nir_ssa_def *texel_offset =1930nir_iadd(&b, nir_iadd(&b, offset, x_offset),1931nir_imul(&b, y_offset, stride));19321933nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;1934nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);1935tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;1936tex->op = nir_texop_txf;1937tex->src[0].src_type = nir_tex_src_coord;1938tex->src[0].src = nir_src_for_ssa(texel_offset);1939tex->src[1].src_type = nir_tex_src_texture_deref;1940tex->src[1].src = nir_src_for_ssa(tex_deref);1941tex->dest_type = nir_type_uint32;1942tex->is_array = false;1943tex->coord_components = 1;1944nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result");1945nir_builder_instr_insert(&b, &tex->instr);19461947uint32_t swiz[4];1948swiz[0] =1949component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);1950swiz[1] =1951component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);1952swiz[2] =1953component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);1954swiz[3] =1955component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);1956nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);1957nir_store_var(&b, fs_out_color, s, 0xf);19581959return b.shader;1960}19611962static bool1963create_texel_buffer_copy_pipeline(struct v3dv_device *device,1964VkFormat format,1965VkColorComponentFlags cmask,1966VkComponentMapping *cswizzle,1967bool is_layered,1968VkRenderPass _pass,1969VkPipelineLayout pipeline_layout,1970VkPipeline *pipeline)1971{1972struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);19731974assert(vk_format_is_color(format));19751976nir_shader *vs_nir = get_texel_buffer_copy_vs();1977nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);1978nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;19791980const VkPipelineVertexInputStateCreateInfo vi_state = {1981.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,1982.vertexBindingDescriptionCount = 0,1983.vertexAttributeDescriptionCount = 0,1984};19851986VkPipelineDepthStencilStateCreateInfo ds_state = {1987.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,1988};19891990VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };1991blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {1992.blendEnable = false,1993.colorWriteMask = cmask,1994};19951996const VkPipelineColorBlendStateCreateInfo cb_state = {1997.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,1998.logicOpEnable = false,1999.attachmentCount = 1,2000.pAttachments = blend_att_state2001};20022003const VkPipelineMultisampleStateCreateInfo ms_state = {2004.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,2005.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,2006.sampleShadingEnable = false,2007.pSampleMask = NULL,2008.alphaToCoverageEnable = false,2009.alphaToOneEnable = false,2010};20112012return create_pipeline(device,2013pass,2014vs_nir, gs_nir, fs_nir,2015&vi_state,2016&ds_state,2017&cb_state,2018&ms_state,2019pipeline_layout,2020pipeline);2021}20222023static bool2024get_copy_texel_buffer_pipeline(2025struct v3dv_device *device,2026VkFormat format,2027VkColorComponentFlags cmask,2028VkComponentMapping *cswizzle,2029VkImageType image_type,2030bool is_layered,2031struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)2032{2033bool ok = true;20342035uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];2036get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,2037key);20382039mtx_lock(&device->meta.mtx);2040struct hash_entry *entry =2041_mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],2042&key);2043if (entry) {2044mtx_unlock(&device->meta.mtx);2045*pipeline = entry->data;2046return true;2047}20482049*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,2050VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);20512052if (*pipeline == NULL)2053goto fail;20542055/* The blit render pass is compatible */2056ok = create_blit_render_pass(device, format, format,2057&(*pipeline)->pass,2058&(*pipeline)->pass_no_load);2059if (!ok)2060goto fail;20612062ok =2063create_texel_buffer_copy_pipeline(device,2064format, cmask, cswizzle, is_layered,2065(*pipeline)->pass,2066device->meta.texel_buffer_copy.p_layout,2067&(*pipeline)->pipeline);2068if (!ok)2069goto fail;20702071_mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],2072&key, *pipeline);20732074mtx_unlock(&device->meta.mtx);2075return true;20762077fail:2078mtx_unlock(&device->meta.mtx);20792080VkDevice _device = v3dv_device_to_handle(device);2081if (*pipeline) {2082if ((*pipeline)->pass)2083v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);2084if ((*pipeline)->pipeline)2085v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);2086vk_free(&device->vk.alloc, *pipeline);2087*pipeline = NULL;2088}20892090return false;2091}20922093static bool2094texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,2095VkImageAspectFlags aspect,2096struct v3dv_image *image,2097VkFormat dst_format,2098VkFormat src_format,2099struct v3dv_buffer *buffer,2100uint32_t buffer_bpp,2101VkColorComponentFlags cmask,2102VkComponentMapping *cswizzle,2103uint32_t region_count,2104const VkBufferImageCopy2KHR *regions)2105{2106VkResult result;2107bool handled = false;21082109assert(cswizzle);21102111/* This is a copy path, so we don't handle format conversions. The only2112* exception are stencil to D24S8 copies, which are handled as a color2113* masked R8->RGBA8 copy.2114*/2115assert(src_format == dst_format ||2116(dst_format == VK_FORMAT_R8G8B8A8_UINT &&2117src_format == VK_FORMAT_R8_UINT &&2118cmask == VK_COLOR_COMPONENT_R_BIT));21192120/* We only handle color copies. Callers can copy D/S aspects by using2121* a compatible color format and maybe a cmask/cswizzle for D24 formats.2122*/2123if (aspect != VK_IMAGE_ASPECT_COLOR_BIT)2124return handled;21252126/* FIXME: we only handle uncompressed images for now. */2127if (vk_format_is_compressed(image->vk_format))2128return handled;21292130const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |2131VK_COLOR_COMPONENT_G_BIT |2132VK_COLOR_COMPONENT_B_BIT |2133VK_COLOR_COMPONENT_A_BIT;2134if (cmask == 0)2135cmask = full_cmask;21362137/* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT2138* so we can bind it as a texel buffer. Otherwise, the buffer view2139* we create below won't setup the texture state that we need for this.2140*/2141if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {2142if (v3dv_buffer_format_supports_features(2143cmd_buffer->device, src_format,2144VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {2145buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;2146} else {2147return handled;2148}2149}21502151/* At this point we should be able to handle the copy unless an unexpected2152* error occurs, such as an OOM.2153*/2154handled = true;215521562157/* Compute the number of layers to copy.2158*2159* If we are batching (region_count > 1) all our regions have the same2160* image subresource so we can take this from the first region. For 3D2161* images we require the same depth extent.2162*/2163const VkImageSubresourceLayers *resource = ®ions[0].imageSubresource;2164uint32_t num_layers;2165if (image->type != VK_IMAGE_TYPE_3D) {2166num_layers = resource->layerCount;2167} else {2168assert(region_count == 1);2169num_layers = regions[0].imageExtent.depth;2170}2171assert(num_layers > 0);21722173/* Get the texel buffer copy pipeline */2174struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;2175bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,2176dst_format, cmask, cswizzle,2177image->type, num_layers > 1,2178&pipeline);2179if (!ok)2180return handled;2181assert(pipeline && pipeline->pipeline && pipeline->pass);21822183/* Setup descriptor set for the source texel buffer. We don't have to2184* register the descriptor as a private command buffer object since2185* all descriptors will be freed automatically with the descriptor2186* pool.2187*/2188VkDescriptorSet set;2189result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);2190if (result != VK_SUCCESS)2191return handled;21922193/* FIXME: for some reason passing region->bufferOffset here for the2194* offset field doesn't work, making the following CTS tests fail:2195*2196* dEQP-VK.api.copy_and_blit.core.buffer_to_image.*buffer_offset*2197*2198* So instead we pass 0 here and we pass the offset in texels as a push2199* constant to the shader, which seems to work correctly.2200*/2201VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);2202VkBufferViewCreateInfo buffer_view_info = {2203.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,2204.buffer = v3dv_buffer_to_handle(buffer),2205.format = src_format,2206.offset = 0,2207.range = VK_WHOLE_SIZE,2208};22092210VkBufferView texel_buffer_view;2211result = v3dv_CreateBufferView(_device, &buffer_view_info,2212&cmd_buffer->device->vk.alloc,2213&texel_buffer_view);2214if (result != VK_SUCCESS)2215return handled;22162217v3dv_cmd_buffer_add_private_obj(2218cmd_buffer, (uintptr_t)texel_buffer_view,2219(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);22202221VkWriteDescriptorSet write = {2222.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,2223.dstSet = set,2224.dstBinding = 0,2225.dstArrayElement = 0,2226.descriptorCount = 1,2227.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,2228.pTexelBufferView = &texel_buffer_view,2229};2230v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);22312232/* Push command buffer state before starting meta operation */2233v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);2234uint32_t dirty_dynamic_state = 0;22352236/* Bind common state for all layers and regions */2237VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);2238v3dv_CmdBindPipeline(_cmd_buffer,2239VK_PIPELINE_BIND_POINT_GRAPHICS,2240pipeline->pipeline);22412242v3dv_CmdBindDescriptorSets(_cmd_buffer,2243VK_PIPELINE_BIND_POINT_GRAPHICS,2244cmd_buffer->device->meta.texel_buffer_copy.p_layout,22450, 1, &set,22460, NULL);22472248/* Setup framebuffer.2249*2250* For 3D images, this creates a layered framebuffer with a number of2251* layers matching the depth extent of the 3D image.2252*/2253uint32_t fb_width = u_minify(image->extent.width, resource->mipLevel);2254uint32_t fb_height = u_minify(image->extent.height, resource->mipLevel);2255VkImageViewCreateInfo image_view_info = {2256.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,2257.image = v3dv_image_to_handle(image),2258.viewType = v3dv_image_type_to_view_type(image->type),2259.format = dst_format,2260.subresourceRange = {2261.aspectMask = aspect,2262.baseMipLevel = resource->mipLevel,2263.levelCount = 1,2264.baseArrayLayer = resource->baseArrayLayer,2265.layerCount = num_layers,2266},2267};2268VkImageView image_view;2269result = v3dv_CreateImageView(_device, &image_view_info,2270&cmd_buffer->device->vk.alloc, &image_view);2271if (result != VK_SUCCESS)2272goto fail;22732274v3dv_cmd_buffer_add_private_obj(2275cmd_buffer, (uintptr_t)image_view,2276(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);22772278VkFramebufferCreateInfo fb_info = {2279.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,2280.renderPass = pipeline->pass,2281.attachmentCount = 1,2282.pAttachments = &image_view,2283.width = fb_width,2284.height = fb_height,2285.layers = num_layers,2286};22872288VkFramebuffer fb;2289result = v3dv_CreateFramebuffer(_device, &fb_info,2290&cmd_buffer->device->vk.alloc, &fb);2291if (result != VK_SUCCESS)2292goto fail;22932294v3dv_cmd_buffer_add_private_obj(2295cmd_buffer, (uintptr_t)fb,2296(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);22972298/* For each layer */2299for (uint32_t l = 0; l < num_layers; l++) {2300/* Start render pass for this layer.2301*2302* If the we only have one region to copy, then we might be able to2303* skip the TLB load if it is aligned to tile boundaries. All layers2304* copy the same area, so we only need to check this once.2305*/2306bool can_skip_tlb_load = false;2307VkRect2D render_area;2308if (region_count == 1) {2309render_area.offset.x = regions[0].imageOffset.x;2310render_area.offset.y = regions[0].imageOffset.y;2311render_area.extent.width = regions[0].imageExtent.width;2312render_area.extent.height = regions[0].imageExtent.height;23132314if (l == 0) {2315struct v3dv_render_pass *pipeline_pass =2316v3dv_render_pass_from_handle(pipeline->pass);2317can_skip_tlb_load =2318cmask == full_cmask &&2319v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,2320v3dv_framebuffer_from_handle(fb),2321pipeline_pass, 0);2322}2323} else {2324render_area.offset.x = 0;2325render_area.offset.y = 0;2326render_area.extent.width = fb_width;2327render_area.extent.height = fb_height;2328}23292330VkRenderPassBeginInfo rp_info = {2331.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,2332.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :2333pipeline->pass,2334.framebuffer = fb,2335.renderArea = render_area,2336.clearValueCount = 0,2337};23382339v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);2340struct v3dv_job *job = cmd_buffer->state.job;2341if (!job)2342goto fail;23432344/* If we are using a layered copy we need to specify the layer for the2345* Geometry Shader.2346*/2347if (num_layers > 1) {2348uint32_t layer = resource->baseArrayLayer + l;2349v3dv_CmdPushConstants(_cmd_buffer,2350cmd_buffer->device->meta.texel_buffer_copy.p_layout,2351VK_SHADER_STAGE_GEOMETRY_BIT,235224, 4, &layer);2353}23542355/* For each region */2356dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;2357for (uint32_t r = 0; r < region_count; r++) {2358const VkBufferImageCopy2KHR *region = ®ions[r];23592360/* Obtain the 2D buffer region spec */2361uint32_t buf_width, buf_height;2362if (region->bufferRowLength == 0)2363buf_width = region->imageExtent.width;2364else2365buf_width = region->bufferRowLength;23662367if (region->bufferImageHeight == 0)2368buf_height = region->imageExtent.height;2369else2370buf_height = region->bufferImageHeight;23712372const VkViewport viewport = {2373.x = region->imageOffset.x,2374.y = region->imageOffset.y,2375.width = region->imageExtent.width,2376.height = region->imageExtent.height,2377.minDepth = 0.0f,2378.maxDepth = 1.0f2379};2380v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);2381const VkRect2D scissor = {2382.offset = { region->imageOffset.x, region->imageOffset.y },2383.extent = { region->imageExtent.width, region->imageExtent.height }2384};2385v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);23862387const VkDeviceSize buf_offset =2388region->bufferOffset / buffer_bpp + l * buf_height * buf_width;2389uint32_t push_data[6] = {2390region->imageOffset.x,2391region->imageOffset.y,2392region->imageOffset.x + region->imageExtent.width - 1,2393region->imageOffset.y + region->imageExtent.height - 1,2394buf_width,2395buf_offset,2396};23972398v3dv_CmdPushConstants(_cmd_buffer,2399cmd_buffer->device->meta.texel_buffer_copy.p_layout,2400VK_SHADER_STAGE_FRAGMENT_BIT,24010, sizeof(push_data), &push_data);24022403v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);2404} /* For each region */24052406v3dv_CmdEndRenderPass(_cmd_buffer);2407} /* For each layer */24082409fail:2410v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);2411return handled;2412}24132414/**2415* Returns true if the implementation supports the requested operation (even if2416* it failed to process it, for example, due to an out-of-memory error).2417*/2418static bool2419copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,2420VkImageAspectFlags aspect,2421struct v3dv_image *image,2422VkFormat dst_format,2423VkFormat src_format,2424struct v3dv_buffer *buffer,2425uint32_t buffer_bpp,2426VkColorComponentFlags cmask,2427VkComponentMapping *cswizzle,2428uint32_t region_count,2429const VkBufferImageCopy2KHR *regions)2430{2431/* Since we can't sample linear images we need to upload the linear2432* buffer to a tiled image that we can use as a blit source, which2433* is slow.2434*/2435perf_debug("Falling back to blit path for buffer to image copy.\n");24362437struct v3dv_device *device = cmd_buffer->device;2438VkDevice _device = v3dv_device_to_handle(device);2439bool handled = true;24402441/* Allocate memory for the tiled image. Since we copy layer by layer2442* we allocate memory to hold a full layer, which is the worse case.2443* For that we create a dummy image with that spec, get memory requirements2444* for it and use that information to create the memory allocation.2445* We will then reuse this memory store for all the regions we want to2446* copy.2447*/2448VkImage dummy_image;2449VkImageCreateInfo dummy_info = {2450.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,2451.imageType = VK_IMAGE_TYPE_2D,2452.format = src_format,2453.extent = { image->extent.width, image->extent.height, 1 },2454.mipLevels = 1,2455.arrayLayers = 1,2456.samples = VK_SAMPLE_COUNT_1_BIT,2457.tiling = VK_IMAGE_TILING_OPTIMAL,2458.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |2459VK_IMAGE_USAGE_TRANSFER_DST_BIT,2460.sharingMode = VK_SHARING_MODE_EXCLUSIVE,2461.queueFamilyIndexCount = 0,2462.initialLayout = VK_IMAGE_LAYOUT_GENERAL,2463};2464VkResult result =2465v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);2466if (result != VK_SUCCESS)2467return handled;24682469VkMemoryRequirements reqs;2470vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);2471v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);24722473VkDeviceMemory mem;2474VkMemoryAllocateInfo alloc_info = {2475.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,2476.allocationSize = reqs.size,2477.memoryTypeIndex = 0,2478};2479result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);2480if (result != VK_SUCCESS)2481return handled;24822483v3dv_cmd_buffer_add_private_obj(2484cmd_buffer, (uintptr_t)mem,2485(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);24862487/* Obtain the layer count.2488*2489* If we are batching (region_count > 1) all our regions have the same2490* image subresource so we can take this from the first region.2491*/2492uint32_t num_layers;2493if (image->type != VK_IMAGE_TYPE_3D)2494num_layers = regions[0].imageSubresource.layerCount;2495else2496num_layers = regions[0].imageExtent.depth;2497assert(num_layers > 0);24982499/* Sanity check: we can only batch multiple regions together if they have2500* the same framebuffer (so the same layer).2501*/2502assert(num_layers == 1 || region_count == 1);25032504const uint32_t block_width = vk_format_get_blockwidth(image->vk_format);2505const uint32_t block_height = vk_format_get_blockheight(image->vk_format);25062507/* Copy regions by uploading each region to a temporary tiled image using2508* the memory we have just allocated as storage.2509*/2510for (uint32_t r = 0; r < region_count; r++) {2511const VkBufferImageCopy2KHR *region = ®ions[r];25122513/* Obtain the 2D buffer region spec */2514uint32_t buf_width, buf_height;2515if (region->bufferRowLength == 0)2516buf_width = region->imageExtent.width;2517else2518buf_width = region->bufferRowLength;25192520if (region->bufferImageHeight == 0)2521buf_height = region->imageExtent.height;2522else2523buf_height = region->bufferImageHeight;25242525/* If the image is compressed, the bpp refers to blocks, not pixels */2526buf_width = buf_width / block_width;2527buf_height = buf_height / block_height;25282529for (uint32_t i = 0; i < num_layers; i++) {2530/* Create the tiled image */2531VkImageCreateInfo image_info = {2532.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,2533.imageType = VK_IMAGE_TYPE_2D,2534.format = src_format,2535.extent = { buf_width, buf_height, 1 },2536.mipLevels = 1,2537.arrayLayers = 1,2538.samples = VK_SAMPLE_COUNT_1_BIT,2539.tiling = VK_IMAGE_TILING_OPTIMAL,2540.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |2541VK_IMAGE_USAGE_TRANSFER_DST_BIT,2542.sharingMode = VK_SHARING_MODE_EXCLUSIVE,2543.queueFamilyIndexCount = 0,2544.initialLayout = VK_IMAGE_LAYOUT_GENERAL,2545};25462547VkImage buffer_image;2548VkResult result =2549v3dv_CreateImage(_device, &image_info, &device->vk.alloc,2550&buffer_image);2551if (result != VK_SUCCESS)2552return handled;25532554v3dv_cmd_buffer_add_private_obj(2555cmd_buffer, (uintptr_t)buffer_image,2556(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);25572558result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);2559if (result != VK_SUCCESS)2560return handled;25612562/* Upload buffer contents for the selected layer */2563const VkDeviceSize buf_offset_bytes =2564region->bufferOffset + i * buf_height * buf_width * buffer_bpp;2565const VkBufferImageCopy2KHR buffer_image_copy = {2566.sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR,2567.bufferOffset = buf_offset_bytes,2568.bufferRowLength = region->bufferRowLength / block_width,2569.bufferImageHeight = region->bufferImageHeight / block_height,2570.imageSubresource = {2571.aspectMask = aspect,2572.mipLevel = 0,2573.baseArrayLayer = 0,2574.layerCount = 1,2575},2576.imageOffset = { 0, 0, 0 },2577.imageExtent = { buf_width, buf_height, 1 }2578};2579handled =2580create_tiled_image_from_buffer(cmd_buffer,2581v3dv_image_from_handle(buffer_image),2582buffer, &buffer_image_copy);2583if (!handled) {2584/* This is unexpected, we should have setup the upload to be2585* conformant to a TFU or TLB copy.2586*/2587unreachable("Unable to copy buffer to image through TLB");2588return false;2589}25902591/* Blit-copy the requested image extent from the buffer image to the2592* destination image.2593*2594* Since we are copying, the blit must use the same format on the2595* destination and source images to avoid format conversions. The2596* only exception is copying stencil, which we upload to a R8UI source2597* image, but that we need to blit to a S8D24 destination (the only2598* stencil format we support).2599*/2600const VkImageBlit2KHR blit_region = {2601.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,2602.srcSubresource = {2603.aspectMask = aspect,2604.mipLevel = 0,2605.baseArrayLayer = 0,2606.layerCount = 1,2607},2608.srcOffsets = {2609{ 0, 0, 0 },2610{ region->imageExtent.width, region->imageExtent.height, 1 },2611},2612.dstSubresource = {2613.aspectMask = aspect,2614.mipLevel = region->imageSubresource.mipLevel,2615.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,2616.layerCount = 1,2617},2618.dstOffsets = {2619{2620DIV_ROUND_UP(region->imageOffset.x, block_width),2621DIV_ROUND_UP(region->imageOffset.y, block_height),2622region->imageOffset.z + i,2623},2624{2625DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,2626block_width),2627DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,2628block_height),2629region->imageOffset.z + i + 1,2630},2631},2632};26332634handled = blit_shader(cmd_buffer,2635image, dst_format,2636v3dv_image_from_handle(buffer_image), src_format,2637cmask, cswizzle,2638&blit_region, VK_FILTER_NEAREST, true);2639if (!handled) {2640/* This is unexpected, we should have a supported blit spec */2641unreachable("Unable to blit buffer to destination image");2642return false;2643}2644}2645}26462647return handled;2648}26492650/**2651* Returns true if the implementation supports the requested operation (even if2652* it failed to process it, for example, due to an out-of-memory error).2653*/2654static bool2655copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,2656struct v3dv_image *image,2657struct v3dv_buffer *buffer,2658uint32_t region_count,2659const VkBufferImageCopy2KHR *regions,2660bool use_texel_buffer)2661{2662/* We can only call this with region_count > 1 if we can batch the regions2663* together, in which case they share the same image subresource, and so2664* the same aspect.2665*/2666VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;26672668/* Generally, the bpp of the data in the buffer matches that of the2669* destination image. The exception is the case where we are uploading2670* stencil (8bpp) to a combined d24s8 image (32bpp).2671*/2672uint32_t buf_bpp = image->cpp;26732674/* We are about to upload the buffer data to an image so we can then2675* blit that to our destination region. Because we are going to implement2676* the copy as a blit, we want our blit source and destination formats to be2677* the same (to avoid any format conversions), so we choose a canonical2678* format that matches the destination image bpp.2679*/2680VkComponentMapping ident_swizzle = {2681.r = VK_COMPONENT_SWIZZLE_IDENTITY,2682.g = VK_COMPONENT_SWIZZLE_IDENTITY,2683.b = VK_COMPONENT_SWIZZLE_IDENTITY,2684.a = VK_COMPONENT_SWIZZLE_IDENTITY,2685};26862687VkComponentMapping cswizzle = ident_swizzle;2688VkColorComponentFlags cmask = 0; /* Write all components */2689VkFormat src_format;2690VkFormat dst_format;2691switch (buf_bpp) {2692case 16:2693assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);2694src_format = VK_FORMAT_R32G32B32A32_UINT;2695dst_format = src_format;2696break;2697case 8:2698assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);2699src_format = VK_FORMAT_R16G16B16A16_UINT;2700dst_format = src_format;2701break;2702case 4:2703switch (aspect) {2704case VK_IMAGE_ASPECT_COLOR_BIT:2705src_format = VK_FORMAT_R8G8B8A8_UINT;2706dst_format = src_format;2707break;2708case VK_IMAGE_ASPECT_DEPTH_BIT:2709assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||2710image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||2711image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);2712src_format = VK_FORMAT_R8G8B8A8_UINT;2713dst_format = src_format;2714aspect = VK_IMAGE_ASPECT_COLOR_BIT;27152716/* For D24 formats, the Vulkan spec states that the depth component2717* in the buffer is stored in the 24-LSB, but V3D wants it in the2718* 24-MSB.2719*/2720if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||2721image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) {2722cmask = VK_COLOR_COMPONENT_G_BIT |2723VK_COLOR_COMPONENT_B_BIT |2724VK_COLOR_COMPONENT_A_BIT;2725cswizzle.r = VK_COMPONENT_SWIZZLE_R;2726cswizzle.g = VK_COMPONENT_SWIZZLE_R;2727cswizzle.b = VK_COMPONENT_SWIZZLE_G;2728cswizzle.a = VK_COMPONENT_SWIZZLE_B;2729}2730break;2731case VK_IMAGE_ASPECT_STENCIL_BIT:2732/* Since we don't support separate stencil this is always a stencil2733* copy to a combined depth/stencil image. Because we don't support2734* separate stencil images, we interpret the buffer data as a2735* color R8UI image, and implement the blit as a compatible color2736* blit to an RGBA8UI destination masking out writes to components2737* GBA (which map to the D24 component of a S8D24 image).2738*/2739assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);2740buf_bpp = 1;2741src_format = VK_FORMAT_R8_UINT;2742dst_format = VK_FORMAT_R8G8B8A8_UINT;2743cmask = VK_COLOR_COMPONENT_R_BIT;2744aspect = VK_IMAGE_ASPECT_COLOR_BIT;2745break;2746default:2747unreachable("unsupported aspect");2748return false;2749};2750break;2751case 2:2752aspect = VK_IMAGE_ASPECT_COLOR_BIT;2753src_format = VK_FORMAT_R16_UINT;2754dst_format = src_format;2755break;2756case 1:2757assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);2758src_format = VK_FORMAT_R8_UINT;2759dst_format = src_format;2760break;2761default:2762unreachable("unsupported bit-size");2763return false;2764}27652766if (use_texel_buffer) {2767return texel_buffer_shader_copy(cmd_buffer, aspect, image,2768dst_format, src_format,2769buffer, buf_bpp,2770cmask, &cswizzle,2771region_count, regions);2772} else {2773return copy_buffer_to_image_blit(cmd_buffer, aspect, image,2774dst_format, src_format,2775buffer, buf_bpp,2776cmask, &cswizzle,2777region_count, regions);2778}2779}27802781/**2782* Returns true if the implementation supports the requested operation (even if2783* it failed to process it, for example, due to an out-of-memory error).2784*/2785static bool2786copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,2787struct v3dv_image *image,2788struct v3dv_buffer *buffer,2789const VkBufferImageCopy2KHR *region)2790{2791/* FIXME */2792if (vk_format_is_depth_or_stencil(image->vk_format))2793return false;27942795if (vk_format_is_compressed(image->vk_format))2796return false;27972798if (image->tiling == VK_IMAGE_TILING_LINEAR)2799return false;28002801uint32_t buffer_width, buffer_height;2802if (region->bufferRowLength == 0)2803buffer_width = region->imageExtent.width;2804else2805buffer_width = region->bufferRowLength;28062807if (region->bufferImageHeight == 0)2808buffer_height = region->imageExtent.height;2809else2810buffer_height = region->bufferImageHeight;28112812uint32_t buffer_stride = buffer_width * image->cpp;2813uint32_t buffer_layer_stride = buffer_stride * buffer_height;28142815uint32_t num_layers;2816if (image->type != VK_IMAGE_TYPE_3D)2817num_layers = region->imageSubresource.layerCount;2818else2819num_layers = region->imageExtent.depth;2820assert(num_layers > 0);28212822struct v3dv_job *job =2823v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,2824V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,2825cmd_buffer, -1);2826if (!job)2827return true;28282829job->cpu.copy_buffer_to_image.image = image;2830job->cpu.copy_buffer_to_image.buffer = buffer;2831job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;2832job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;2833job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;2834job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;2835job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;2836job->cpu.copy_buffer_to_image.mip_level =2837region->imageSubresource.mipLevel;2838job->cpu.copy_buffer_to_image.base_layer =2839region->imageSubresource.baseArrayLayer;2840job->cpu.copy_buffer_to_image.layer_count = num_layers;28412842list_addtail(&job->list_link, &cmd_buffer->jobs);28432844return true;2845}28462847VKAPI_ATTR void VKAPI_CALL2848v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,2849const VkCopyBufferToImageInfo2KHR *info)2850{2851V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);2852V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);2853V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);28542855assert(image->samples == VK_SAMPLE_COUNT_1_BIT);28562857uint32_t r = 0;2858while (r < info->regionCount) {2859/* The TFU and TLB paths can only copy one region at a time and the region2860* needs to start at the origin. We try these first for the common case2861* where we are copying full images, since they should be the fastest.2862*/2863uint32_t batch_size = 1;2864if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))2865goto handled;28662867if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))2868goto handled;28692870/* Otherwise, we are copying subrects, so we fallback to copying2871* via shader and texel buffers and we try to batch the regions2872* if possible. We can only batch copies if they have the same2873* framebuffer spec, which is mostly determined by the image2874* subresource of the region.2875*/2876const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;2877for (uint32_t s = r + 1; s < info->regionCount; s++) {2878const VkImageSubresourceLayers *rsc_s =2879&info->pRegions[s].imageSubresource;28802881if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)2882break;28832884/* For 3D images we also need to check the depth extent */2885if (image->type == VK_IMAGE_TYPE_3D &&2886info->pRegions[s].imageExtent.depth !=2887info->pRegions[r].imageExtent.depth) {2888break;2889}28902891batch_size++;2892}28932894if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,2895batch_size, &info->pRegions[r], true)) {2896goto handled;2897}28982899/* If we still could not copy, fallback to slower paths.2900*2901* FIXME: we could try to batch these too, but since they are bound to be2902* slow it might not be worth it and we should instead put more effort2903* in handling more cases with the other paths.2904*/2905if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer,2906&info->pRegions[r])) {2907batch_size = 1;2908goto handled;2909}29102911if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,2912batch_size, &info->pRegions[r], false)) {2913goto handled;2914}29152916unreachable("Unsupported buffer to image copy.");29172918handled:2919r += batch_size;2920}2921}29222923static void2924compute_blit_3d_layers(const VkOffset3D *offsets,2925uint32_t *min_layer, uint32_t *max_layer,2926bool *mirror_z);29272928/**2929* Returns true if the implementation supports the requested operation (even if2930* it failed to process it, for example, due to an out-of-memory error).2931*2932* The TFU blit path doesn't handle scaling so the blit filter parameter can2933* be ignored.2934*/2935static bool2936blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,2937struct v3dv_image *dst,2938struct v3dv_image *src,2939const VkImageBlit2KHR *region)2940{2941assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);2942assert(src->samples == VK_SAMPLE_COUNT_1_BIT);29432944/* Format must match */2945if (src->vk_format != dst->vk_format)2946return false;29472948/* Destination can't be raster format */2949if (dst->tiling == VK_IMAGE_TILING_LINEAR)2950return false;29512952/* Source region must start at (0,0) */2953if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)2954return false;29552956/* Destination image must be complete */2957if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)2958return false;29592960const uint32_t dst_mip_level = region->dstSubresource.mipLevel;2961const uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);2962const uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);2963if (region->dstOffsets[1].x < dst_width - 1||2964region->dstOffsets[1].y < dst_height - 1) {2965return false;2966}29672968/* No XY scaling */2969if (region->srcOffsets[1].x != region->dstOffsets[1].x ||2970region->srcOffsets[1].y != region->dstOffsets[1].y) {2971return false;2972}29732974/* If the format is D24S8 both aspects need to be copied, since the TFU2975* can't be programmed to copy only one aspect of the image.2976*/2977if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {2978const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |2979VK_IMAGE_ASPECT_STENCIL_BIT;2980if (region->dstSubresource.aspectMask != ds_aspects)2981return false;2982}29832984/* Our TFU blits only handle exact copies (it requires same formats2985* on input and output, no scaling, etc), so there is no pixel format2986* conversions and we can rewrite the format to use one that is TFU2987* compatible based on its texel size.2988*/2989const struct v3dv_format *format =2990v3dv_get_compatible_tfu_format(cmd_buffer->device,2991dst->cpp, NULL);29922993/* Emit a TFU job for each layer to blit */2994assert(region->dstSubresource.layerCount ==2995region->srcSubresource.layerCount);29962997uint32_t min_dst_layer;2998uint32_t max_dst_layer;2999bool dst_mirror_z = false;3000if (dst->type == VK_IMAGE_TYPE_3D) {3001compute_blit_3d_layers(region->dstOffsets,3002&min_dst_layer, &max_dst_layer,3003&dst_mirror_z);3004} else {3005min_dst_layer = region->dstSubresource.baseArrayLayer;3006max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;3007}30083009uint32_t min_src_layer;3010uint32_t max_src_layer;3011bool src_mirror_z = false;3012if (src->type == VK_IMAGE_TYPE_3D) {3013compute_blit_3d_layers(region->srcOffsets,3014&min_src_layer, &max_src_layer,3015&src_mirror_z);3016} else {3017min_src_layer = region->srcSubresource.baseArrayLayer;3018max_src_layer = min_src_layer + region->srcSubresource.layerCount;3019}30203021/* No Z scaling for 3D images (for non-3D images both src and dst must3022* have the same layerCount).3023*/3024if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)3025return false;30263027const uint32_t layer_count = max_dst_layer - min_dst_layer;3028const uint32_t src_mip_level = region->srcSubresource.mipLevel;3029for (uint32_t i = 0; i < layer_count; i++) {3030/* Since the TFU path doesn't handle scaling, Z mirroring for 3D images3031* only involves reversing the order of the slices.3032*/3033const uint32_t dst_layer =3034dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;3035const uint32_t src_layer =3036src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;3037v3dv_X(cmd_buffer->device, cmd_buffer_emit_tfu_job)3038(cmd_buffer, dst, dst_mip_level, dst_layer,3039src, src_mip_level, src_layer,3040dst_width, dst_height, format);3041}30423043return true;3044}30453046static bool3047format_needs_software_int_clamp(VkFormat format)3048{3049switch (format) {3050case VK_FORMAT_A2R10G10B10_UINT_PACK32:3051case VK_FORMAT_A2R10G10B10_SINT_PACK32:3052case VK_FORMAT_A2B10G10R10_UINT_PACK32:3053case VK_FORMAT_A2B10G10R10_SINT_PACK32:3054return true;3055default:3056return false;3057};3058}30593060static void3061get_blit_pipeline_cache_key(VkFormat dst_format,3062VkFormat src_format,3063VkColorComponentFlags cmask,3064VkSampleCountFlagBits dst_samples,3065VkSampleCountFlagBits src_samples,3066uint8_t *key)3067{3068memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);30693070uint32_t *p = (uint32_t *) key;30713072*p = dst_format;3073p++;30743075/* Generally, when blitting from a larger format to a smaller format3076* the hardware takes care of clamping the source to the RT range.3077* Specifically, for integer formats, this is done by using3078* V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this3079* clamps to the bit-size of the render type, and some formats, such as3080* rgb10a2_uint have a 16-bit type, so it won't do what we need and we3081* require to clamp in software. In these cases, we need to amend the blit3082* shader with clamp code that depends on both the src and dst formats, so3083* we need the src format to be part of the key.3084*/3085*p = format_needs_software_int_clamp(dst_format) ? src_format : 0;3086p++;30873088*p = cmask;3089p++;30903091*p = (dst_samples << 8) | src_samples;3092p++;30933094assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);3095}30963097static bool3098create_blit_render_pass(struct v3dv_device *device,3099VkFormat dst_format,3100VkFormat src_format,3101VkRenderPass *pass_load,3102VkRenderPass *pass_no_load)3103{3104const bool is_color_blit = vk_format_is_color(dst_format);31053106/* Attachment load operation is specified below */3107VkAttachmentDescription att = {3108.format = dst_format,3109.samples = VK_SAMPLE_COUNT_1_BIT,3110.storeOp = VK_ATTACHMENT_STORE_OP_STORE,3111.initialLayout = VK_IMAGE_LAYOUT_GENERAL,3112.finalLayout = VK_IMAGE_LAYOUT_GENERAL,3113};31143115VkAttachmentReference att_ref = {3116.attachment = 0,3117.layout = VK_IMAGE_LAYOUT_GENERAL,3118};31193120VkSubpassDescription subpass = {3121.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,3122.inputAttachmentCount = 0,3123.colorAttachmentCount = is_color_blit ? 1 : 0,3124.pColorAttachments = is_color_blit ? &att_ref : NULL,3125.pResolveAttachments = NULL,3126.pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,3127.preserveAttachmentCount = 0,3128.pPreserveAttachments = NULL,3129};31303131VkRenderPassCreateInfo info = {3132.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,3133.attachmentCount = 1,3134.pAttachments = &att,3135.subpassCount = 1,3136.pSubpasses = &subpass,3137.dependencyCount = 0,3138.pDependencies = NULL,3139};31403141VkResult result;3142att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;3143result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),3144&info, &device->vk.alloc, pass_load);3145if (result != VK_SUCCESS)3146return false;31473148att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;3149result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),3150&info, &device->vk.alloc, pass_no_load);3151return result == VK_SUCCESS;3152}31533154static nir_ssa_def *3155gen_rect_vertices(nir_builder *b)3156{3157nir_ssa_def *vertex_id = nir_load_vertex_id(b);31583159/* vertex 0: -1.0, -1.03160* vertex 1: -1.0, 1.03161* vertex 2: 1.0, -1.03162* vertex 3: 1.0, 1.03163*3164* so:3165*3166* channel 0 is vertex_id < 2 ? -1.0 : 1.03167* channel 1 is vertex id & 1 ? 1.0 : -1.03168*/31693170nir_ssa_def *one = nir_imm_int(b, 1);3171nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));3172nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);31733174nir_ssa_def *comp[4];3175comp[0] = nir_bcsel(b, c0cmp,3176nir_imm_float(b, -1.0f),3177nir_imm_float(b, 1.0f));31783179comp[1] = nir_bcsel(b, c1cmp,3180nir_imm_float(b, 1.0f),3181nir_imm_float(b, -1.0f));3182comp[2] = nir_imm_float(b, 0.0f);3183comp[3] = nir_imm_float(b, 1.0f);3184return nir_vec(b, comp, 4);3185}31863187static nir_ssa_def *3188gen_tex_coords(nir_builder *b)3189{3190nir_ssa_def *tex_box =3191nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);31923193nir_ssa_def *tex_z =3194nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);31953196nir_ssa_def *vertex_id = nir_load_vertex_id(b);31973198/* vertex 0: src0_x, src0_y3199* vertex 1: src0_x, src1_y3200* vertex 2: src1_x, src0_y3201* vertex 3: src1_x, src1_y3202*3203* So:3204*3205* channel 0 is vertex_id < 2 ? src0_x : src1_x3206* channel 1 is vertex id & 1 ? src1_y : src0_y3207*/32083209nir_ssa_def *one = nir_imm_int(b, 1);3210nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));3211nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);32123213nir_ssa_def *comp[4];3214comp[0] = nir_bcsel(b, c0cmp,3215nir_channel(b, tex_box, 0),3216nir_channel(b, tex_box, 2));32173218comp[1] = nir_bcsel(b, c1cmp,3219nir_channel(b, tex_box, 3),3220nir_channel(b, tex_box, 1));3221comp[2] = tex_z;3222comp[3] = nir_imm_float(b, 1.0f);3223return nir_vec(b, comp, 4);3224}32253226static nir_ssa_def *3227build_nir_tex_op_read(struct nir_builder *b,3228nir_ssa_def *tex_pos,3229enum glsl_base_type tex_type,3230enum glsl_sampler_dim dim)3231{3232assert(dim != GLSL_SAMPLER_DIM_MS);32333234const struct glsl_type *sampler_type =3235glsl_sampler_type(dim, false, false, tex_type);3236nir_variable *sampler =3237nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");3238sampler->data.descriptor_set = 0;3239sampler->data.binding = 0;32403241nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;3242nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);3243tex->sampler_dim = dim;3244tex->op = nir_texop_tex;3245tex->src[0].src_type = nir_tex_src_coord;3246tex->src[0].src = nir_src_for_ssa(tex_pos);3247tex->src[1].src_type = nir_tex_src_texture_deref;3248tex->src[1].src = nir_src_for_ssa(tex_deref);3249tex->src[2].src_type = nir_tex_src_sampler_deref;3250tex->src[2].src = nir_src_for_ssa(tex_deref);3251tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);3252tex->is_array = glsl_sampler_type_is_array(sampler_type);3253tex->coord_components = tex_pos->num_components;32543255nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");3256nir_builder_instr_insert(b, &tex->instr);3257return &tex->dest.ssa;3258}32593260static nir_ssa_def *3261build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,3262nir_variable *sampler,3263nir_ssa_def *tex_deref,3264enum glsl_base_type tex_type,3265nir_ssa_def *tex_pos,3266nir_ssa_def *sample_idx)3267{3268nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);3269tex->sampler_dim = GLSL_SAMPLER_DIM_MS;3270tex->op = nir_texop_txf_ms;3271tex->src[0].src_type = nir_tex_src_coord;3272tex->src[0].src = nir_src_for_ssa(tex_pos);3273tex->src[1].src_type = nir_tex_src_texture_deref;3274tex->src[1].src = nir_src_for_ssa(tex_deref);3275tex->src[2].src_type = nir_tex_src_sampler_deref;3276tex->src[2].src = nir_src_for_ssa(tex_deref);3277tex->src[3].src_type = nir_tex_src_ms_index;3278tex->src[3].src = nir_src_for_ssa(sample_idx);3279tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);3280tex->is_array = false;3281tex->coord_components = tex_pos->num_components;32823283nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");3284nir_builder_instr_insert(b, &tex->instr);3285return &tex->dest.ssa;3286}32873288/* Fetches all samples at the given position and averages them */3289static nir_ssa_def *3290build_nir_tex_op_ms_resolve(struct nir_builder *b,3291nir_ssa_def *tex_pos,3292enum glsl_base_type tex_type,3293VkSampleCountFlagBits src_samples)3294{3295assert(src_samples > VK_SAMPLE_COUNT_1_BIT);3296const struct glsl_type *sampler_type =3297glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);3298nir_variable *sampler =3299nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");3300sampler->data.descriptor_set = 0;3301sampler->data.binding = 0;33023303const bool is_int = glsl_base_type_is_integer(tex_type);33043305nir_ssa_def *tmp = NULL;3306nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;3307for (uint32_t i = 0; i < src_samples; i++) {3308nir_ssa_def *s =3309build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,3310tex_type, tex_pos,3311nir_imm_int(b, i));33123313/* For integer formats, the multisample resolve operation is expected to3314* return one of the samples, we just return the first one.3315*/3316if (is_int)3317return s;33183319tmp = i == 0 ? s : nir_fadd(b, tmp, s);3320}33213322assert(!is_int);3323return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));3324}33253326/* Fetches the current sample (gl_SampleID) at the given position */3327static nir_ssa_def *3328build_nir_tex_op_ms_read(struct nir_builder *b,3329nir_ssa_def *tex_pos,3330enum glsl_base_type tex_type)3331{3332const struct glsl_type *sampler_type =3333glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);3334nir_variable *sampler =3335nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");3336sampler->data.descriptor_set = 0;3337sampler->data.binding = 0;33383339nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;33403341return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,3342tex_type, tex_pos,3343nir_load_sample_id(b));3344}33453346static nir_ssa_def *3347build_nir_tex_op(struct nir_builder *b,3348struct v3dv_device *device,3349nir_ssa_def *tex_pos,3350enum glsl_base_type tex_type,3351VkSampleCountFlagBits dst_samples,3352VkSampleCountFlagBits src_samples,3353enum glsl_sampler_dim dim)3354{3355switch (dim) {3356case GLSL_SAMPLER_DIM_MS:3357assert(src_samples == VK_SAMPLE_COUNT_4_BIT);3358/* For multisampled texture sources we need to use fetching instead of3359* normalized texture coordinates. We already configured our blit3360* coordinates to be in texel units, but here we still need to convert3361* them from floating point to integer.3362*/3363tex_pos = nir_f2i32(b, tex_pos);33643365if (dst_samples == VK_SAMPLE_COUNT_1_BIT)3366return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);3367else3368return build_nir_tex_op_ms_read(b, tex_pos, tex_type);3369default:3370assert(src_samples == VK_SAMPLE_COUNT_1_BIT);3371return build_nir_tex_op_read(b, tex_pos, tex_type, dim);3372}3373}33743375static nir_shader *3376get_blit_vs()3377{3378const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();3379nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,3380"meta blit vs");33813382const struct glsl_type *vec4 = glsl_vec4_type();33833384nir_variable *vs_out_pos =3385nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");3386vs_out_pos->data.location = VARYING_SLOT_POS;33873388nir_variable *vs_out_tex_coord =3389nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");3390vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;3391vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;33923393nir_ssa_def *pos = gen_rect_vertices(&b);3394nir_store_var(&b, vs_out_pos, pos, 0xf);33953396nir_ssa_def *tex_coord = gen_tex_coords(&b);3397nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);33983399return b.shader;3400}34013402static uint32_t3403get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)3404{3405switch (sampler_dim) {3406case GLSL_SAMPLER_DIM_1D: return 0x1;3407case GLSL_SAMPLER_DIM_2D: return 0x3;3408case GLSL_SAMPLER_DIM_MS: return 0x3;3409case GLSL_SAMPLER_DIM_3D: return 0x7;3410default:3411unreachable("invalid sampler dim");3412};3413}34143415static nir_shader *3416get_color_blit_fs(struct v3dv_device *device,3417VkFormat dst_format,3418VkFormat src_format,3419VkSampleCountFlagBits dst_samples,3420VkSampleCountFlagBits src_samples,3421enum glsl_sampler_dim sampler_dim)3422{3423const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();3424nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,3425"meta blit fs");34263427const struct glsl_type *vec4 = glsl_vec4_type();34283429nir_variable *fs_in_tex_coord =3430nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");3431fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;34323433const struct glsl_type *fs_out_type =3434vk_format_is_sint(dst_format) ? glsl_ivec4_type() :3435vk_format_is_uint(dst_format) ? glsl_uvec4_type() :3436glsl_vec4_type();34373438enum glsl_base_type src_base_type =3439vk_format_is_sint(src_format) ? GLSL_TYPE_INT :3440vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :3441GLSL_TYPE_FLOAT;34423443nir_variable *fs_out_color =3444nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");3445fs_out_color->data.location = FRAG_RESULT_DATA0;34463447nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);3448const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);3449tex_coord = nir_channels(&b, tex_coord, channel_mask);34503451nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,3452dst_samples, src_samples, sampler_dim);34533454/* For integer textures, if the bit-size of the destination is too small to3455* hold source value, Vulkan (CTS) expects the implementation to clamp to the3456* maximum value the destination can hold. The hardware can clamp to the3457* render target type, which usually matches the component bit-size, but3458* there are some cases that won't match, such as rgb10a2, which has a 16-bit3459* render target type, so in these cases we need to clamp manually.3460*/3461if (format_needs_software_int_clamp(dst_format)) {3462assert(vk_format_is_int(dst_format));3463enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);3464enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);34653466nir_ssa_def *c[4];3467for (uint32_t i = 0; i < 4; i++) {3468c[i] = nir_channel(&b, color, i);34693470const uint32_t src_bit_size =3471util_format_get_component_bits(src_pformat,3472UTIL_FORMAT_COLORSPACE_RGB,3473i);3474const uint32_t dst_bit_size =3475util_format_get_component_bits(dst_pformat,3476UTIL_FORMAT_COLORSPACE_RGB,3477i);34783479if (dst_bit_size >= src_bit_size)3480continue;34813482assert(dst_bit_size > 0);3483if (util_format_is_pure_uint(dst_pformat)) {3484nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);3485c[i] = nir_umin(&b, c[i], max);3486} else {3487nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);3488nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));3489c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);3490}3491}34923493color = nir_vec4(&b, c[0], c[1], c[2], c[3]);3494}34953496nir_store_var(&b, fs_out_color, color, 0xf);34973498return b.shader;3499}35003501static bool3502create_pipeline(struct v3dv_device *device,3503struct v3dv_render_pass *pass,3504struct nir_shader *vs_nir,3505struct nir_shader *gs_nir,3506struct nir_shader *fs_nir,3507const VkPipelineVertexInputStateCreateInfo *vi_state,3508const VkPipelineDepthStencilStateCreateInfo *ds_state,3509const VkPipelineColorBlendStateCreateInfo *cb_state,3510const VkPipelineMultisampleStateCreateInfo *ms_state,3511const VkPipelineLayout layout,3512VkPipeline *pipeline)3513{3514struct vk_shader_module vs_m;3515struct vk_shader_module gs_m;3516struct vk_shader_module fs_m;35173518uint32_t num_stages = gs_nir ? 3 : 2;35193520v3dv_shader_module_internal_init(device, &vs_m, vs_nir);3521v3dv_shader_module_internal_init(device, &fs_m, fs_nir);35223523VkPipelineShaderStageCreateInfo stages[3] = {3524{3525.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,3526.stage = VK_SHADER_STAGE_VERTEX_BIT,3527.module = vk_shader_module_to_handle(&vs_m),3528.pName = "main",3529},3530{3531.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,3532.stage = VK_SHADER_STAGE_FRAGMENT_BIT,3533.module = vk_shader_module_to_handle(&fs_m),3534.pName = "main",3535},3536{3537.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,3538.stage = VK_SHADER_STAGE_GEOMETRY_BIT,3539.module = VK_NULL_HANDLE,3540.pName = "main",3541},3542};35433544if (gs_nir) {3545v3dv_shader_module_internal_init(device, &gs_m, gs_nir);3546stages[2].module = vk_shader_module_to_handle(&gs_m);3547}35483549VkGraphicsPipelineCreateInfo info = {3550.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,35513552.stageCount = num_stages,3553.pStages = stages,35543555.pVertexInputState = vi_state,35563557.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {3558.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,3559.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,3560.primitiveRestartEnable = false,3561},35623563.pViewportState = &(VkPipelineViewportStateCreateInfo) {3564.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,3565.viewportCount = 1,3566.scissorCount = 1,3567},35683569.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {3570.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,3571.rasterizerDiscardEnable = false,3572.polygonMode = VK_POLYGON_MODE_FILL,3573.cullMode = VK_CULL_MODE_NONE,3574.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,3575.depthBiasEnable = false,3576},35773578.pMultisampleState = ms_state,35793580.pDepthStencilState = ds_state,35813582.pColorBlendState = cb_state,35833584/* The meta clear pipeline declares all state as dynamic.3585* As a consequence, vkCmdBindPipeline writes no dynamic state3586* to the cmd buffer. Therefore, at the end of the meta clear,3587* we need only restore dynamic state that was vkCmdSet.3588*/3589.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {3590.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,3591.dynamicStateCount = 6,3592.pDynamicStates = (VkDynamicState[]) {3593VK_DYNAMIC_STATE_VIEWPORT,3594VK_DYNAMIC_STATE_SCISSOR,3595VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,3596VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,3597VK_DYNAMIC_STATE_STENCIL_REFERENCE,3598VK_DYNAMIC_STATE_BLEND_CONSTANTS,3599VK_DYNAMIC_STATE_DEPTH_BIAS,3600VK_DYNAMIC_STATE_LINE_WIDTH,3601},3602},36033604.flags = 0,3605.layout = layout,3606.renderPass = v3dv_render_pass_to_handle(pass),3607.subpass = 0,3608};36093610VkResult result =3611v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),3612VK_NULL_HANDLE,36131, &info,3614&device->vk.alloc,3615pipeline);36163617ralloc_free(vs_nir);3618ralloc_free(fs_nir);36193620return result == VK_SUCCESS;3621}36223623static enum glsl_sampler_dim3624get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)3625{3626/* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:3627*3628* "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be3629* VK_IMAGE_TYPE_2D, ..."3630*/3631assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);36323633switch (type) {3634case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;3635case VK_IMAGE_TYPE_2D:3636return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :3637GLSL_SAMPLER_DIM_MS;3638case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;3639default:3640unreachable("Invalid image type");3641}3642}36433644static bool3645create_blit_pipeline(struct v3dv_device *device,3646VkFormat dst_format,3647VkFormat src_format,3648VkColorComponentFlags cmask,3649VkImageType src_type,3650VkSampleCountFlagBits dst_samples,3651VkSampleCountFlagBits src_samples,3652VkRenderPass _pass,3653VkPipelineLayout pipeline_layout,3654VkPipeline *pipeline)3655{3656struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);36573658/* We always rewrite depth/stencil blits to compatible color blits */3659assert(vk_format_is_color(dst_format));3660assert(vk_format_is_color(src_format));36613662const enum glsl_sampler_dim sampler_dim =3663get_sampler_dim(src_type, src_samples);36643665nir_shader *vs_nir = get_blit_vs();3666nir_shader *fs_nir =3667get_color_blit_fs(device, dst_format, src_format,3668dst_samples, src_samples, sampler_dim);36693670const VkPipelineVertexInputStateCreateInfo vi_state = {3671.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,3672.vertexBindingDescriptionCount = 0,3673.vertexAttributeDescriptionCount = 0,3674};36753676VkPipelineDepthStencilStateCreateInfo ds_state = {3677.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,3678};36793680VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };3681blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {3682.blendEnable = false,3683.colorWriteMask = cmask,3684};36853686const VkPipelineColorBlendStateCreateInfo cb_state = {3687.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,3688.logicOpEnable = false,3689.attachmentCount = 1,3690.pAttachments = blend_att_state3691};36923693const VkPipelineMultisampleStateCreateInfo ms_state = {3694.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,3695.rasterizationSamples = dst_samples,3696.sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,3697.pSampleMask = NULL,3698.alphaToCoverageEnable = false,3699.alphaToOneEnable = false,3700};37013702return create_pipeline(device,3703pass,3704vs_nir, NULL, fs_nir,3705&vi_state,3706&ds_state,3707&cb_state,3708&ms_state,3709pipeline_layout,3710pipeline);3711}37123713/**3714* Return a pipeline suitable for blitting the requested aspect given the3715* destination and source formats.3716*/3717static bool3718get_blit_pipeline(struct v3dv_device *device,3719VkFormat dst_format,3720VkFormat src_format,3721VkColorComponentFlags cmask,3722VkImageType src_type,3723VkSampleCountFlagBits dst_samples,3724VkSampleCountFlagBits src_samples,3725struct v3dv_meta_blit_pipeline **pipeline)3726{3727bool ok = true;37283729uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];3730get_blit_pipeline_cache_key(dst_format, src_format, cmask,3731dst_samples, src_samples, key);3732mtx_lock(&device->meta.mtx);3733struct hash_entry *entry =3734_mesa_hash_table_search(device->meta.blit.cache[src_type], &key);3735if (entry) {3736mtx_unlock(&device->meta.mtx);3737*pipeline = entry->data;3738return true;3739}37403741*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,3742VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);37433744if (*pipeline == NULL)3745goto fail;37463747ok = create_blit_render_pass(device, dst_format, src_format,3748&(*pipeline)->pass,3749&(*pipeline)->pass_no_load);3750if (!ok)3751goto fail;37523753/* Create the pipeline using one of the render passes, they are both3754* compatible, so we don't care which one we use here.3755*/3756ok = create_blit_pipeline(device,3757dst_format,3758src_format,3759cmask,3760src_type,3761dst_samples,3762src_samples,3763(*pipeline)->pass,3764device->meta.blit.p_layout,3765&(*pipeline)->pipeline);3766if (!ok)3767goto fail;37683769memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));3770_mesa_hash_table_insert(device->meta.blit.cache[src_type],3771&(*pipeline)->key, *pipeline);37723773mtx_unlock(&device->meta.mtx);3774return true;37753776fail:3777mtx_unlock(&device->meta.mtx);37783779VkDevice _device = v3dv_device_to_handle(device);3780if (*pipeline) {3781if ((*pipeline)->pass)3782v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);3783if ((*pipeline)->pass_no_load)3784v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);3785if ((*pipeline)->pipeline)3786v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);3787vk_free(&device->vk.alloc, *pipeline);3788*pipeline = NULL;3789}37903791return false;3792}37933794static void3795compute_blit_box(const VkOffset3D *offsets,3796uint32_t image_w, uint32_t image_h,3797uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,3798bool *mirror_x, bool *mirror_y)3799{3800if (offsets[1].x >= offsets[0].x) {3801*mirror_x = false;3802*x = MIN2(offsets[0].x, image_w - 1);3803*w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);3804} else {3805*mirror_x = true;3806*x = MIN2(offsets[1].x, image_w - 1);3807*w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);3808}3809if (offsets[1].y >= offsets[0].y) {3810*mirror_y = false;3811*y = MIN2(offsets[0].y, image_h - 1);3812*h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);3813} else {3814*mirror_y = true;3815*y = MIN2(offsets[1].y, image_h - 1);3816*h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);3817}3818}38193820static void3821compute_blit_3d_layers(const VkOffset3D *offsets,3822uint32_t *min_layer, uint32_t *max_layer,3823bool *mirror_z)3824{3825if (offsets[1].z >= offsets[0].z) {3826*mirror_z = false;3827*min_layer = offsets[0].z;3828*max_layer = offsets[1].z;3829} else {3830*mirror_z = true;3831*min_layer = offsets[1].z;3832*max_layer = offsets[0].z;3833}3834}38353836static VkResult3837create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)3838{3839/* If this is not the first pool we create for this command buffer3840* size it based on the size of the currently exhausted pool.3841*/3842uint32_t descriptor_count = 64;3843if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {3844struct v3dv_descriptor_pool *exhausted_pool =3845v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);3846descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);3847}38483849/* Create the descriptor pool */3850cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;3851VkDescriptorPoolSize pool_size = {3852.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,3853.descriptorCount = descriptor_count,3854};3855VkDescriptorPoolCreateInfo info = {3856.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,3857.maxSets = descriptor_count,3858.poolSizeCount = 1,3859.pPoolSizes = &pool_size,3860.flags = 0,3861};3862VkResult result =3863v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),3864&info,3865&cmd_buffer->device->vk.alloc,3866&cmd_buffer->meta.blit.dspool);38673868if (result == VK_SUCCESS) {3869assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);3870const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;38713872v3dv_cmd_buffer_add_private_obj(3873cmd_buffer, (uintptr_t) _pool,3874(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);38753876struct v3dv_descriptor_pool *pool =3877v3dv_descriptor_pool_from_handle(_pool);3878pool->is_driver_internal = true;3879}38803881return result;3882}38833884static VkResult3885allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,3886VkDescriptorSet *set)3887{3888/* Make sure we have a descriptor pool */3889VkResult result;3890if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {3891result = create_blit_descriptor_pool(cmd_buffer);3892if (result != VK_SUCCESS)3893return result;3894}3895assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);38963897/* Allocate descriptor set */3898struct v3dv_device *device = cmd_buffer->device;3899VkDevice _device = v3dv_device_to_handle(device);3900VkDescriptorSetAllocateInfo info = {3901.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,3902.descriptorPool = cmd_buffer->meta.blit.dspool,3903.descriptorSetCount = 1,3904.pSetLayouts = &device->meta.blit.ds_layout,3905};3906result = v3dv_AllocateDescriptorSets(_device, &info, set);39073908/* If we ran out of pool space, grow the pool and try again */3909if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {3910result = create_blit_descriptor_pool(cmd_buffer);3911if (result == VK_SUCCESS) {3912info.descriptorPool = cmd_buffer->meta.blit.dspool;3913result = v3dv_AllocateDescriptorSets(_device, &info, set);3914}3915}39163917return result;3918}39193920/**3921* Returns true if the implementation supports the requested operation (even if3922* it failed to process it, for example, due to an out-of-memory error).3923*3924* The caller can specify the channels on the destination to be written via the3925* cmask parameter (which can be 0 to default to all channels), as well as a3926* swizzle to apply to the source via the cswizzle parameter (which can be NULL3927* to use the default identity swizzle).3928*/3929static bool3930blit_shader(struct v3dv_cmd_buffer *cmd_buffer,3931struct v3dv_image *dst,3932VkFormat dst_format,3933struct v3dv_image *src,3934VkFormat src_format,3935VkColorComponentFlags cmask,3936VkComponentMapping *cswizzle,3937const VkImageBlit2KHR *_region,3938VkFilter filter,3939bool dst_is_padded_image)3940{3941bool handled = true;3942VkResult result;3943uint32_t dirty_dynamic_state = 0;39443945/* We don't support rendering to linear depth/stencil, this should have3946* been rewritten to a compatible color blit by the caller.3947*/3948assert(dst->tiling != VK_IMAGE_TILING_LINEAR ||3949!vk_format_is_depth_or_stencil(dst_format));39503951/* Can't sample from linear images */3952if (src->tiling == VK_IMAGE_TILING_LINEAR && src->type != VK_IMAGE_TYPE_1D)3953return false;39543955VkImageBlit2KHR region = *_region;3956/* Rewrite combined D/S blits to compatible color blits */3957if (vk_format_is_depth_or_stencil(dst_format)) {3958assert(src_format == dst_format);3959assert(cmask == 0);3960switch(dst_format) {3961case VK_FORMAT_D16_UNORM:3962dst_format = VK_FORMAT_R16_UINT;3963break;3964case VK_FORMAT_D32_SFLOAT:3965dst_format = VK_FORMAT_R32_UINT;3966break;3967case VK_FORMAT_X8_D24_UNORM_PACK32:3968case VK_FORMAT_D24_UNORM_S8_UINT:3969if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {3970cmask |= VK_COLOR_COMPONENT_G_BIT |3971VK_COLOR_COMPONENT_B_BIT |3972VK_COLOR_COMPONENT_A_BIT;3973}3974if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {3975assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);3976cmask |= VK_COLOR_COMPONENT_R_BIT;3977}3978dst_format = VK_FORMAT_R8G8B8A8_UINT;3979break;3980default:3981unreachable("Unsupported depth/stencil format");3982};3983src_format = dst_format;3984region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;3985region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;3986}39873988const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |3989VK_COLOR_COMPONENT_G_BIT |3990VK_COLOR_COMPONENT_B_BIT |3991VK_COLOR_COMPONENT_A_BIT;3992if (cmask == 0)3993cmask = full_cmask;39943995VkComponentMapping ident_swizzle = {3996.r = VK_COMPONENT_SWIZZLE_IDENTITY,3997.g = VK_COMPONENT_SWIZZLE_IDENTITY,3998.b = VK_COMPONENT_SWIZZLE_IDENTITY,3999.a = VK_COMPONENT_SWIZZLE_IDENTITY,4000};4001if (!cswizzle)4002cswizzle = &ident_swizzle;40034004/* When we get here from a copy between compressed / uncompressed images4005* we choose to specify the destination blit region based on the size4006* semantics of the source image of the copy (see copy_image_blit), so we4007* need to apply those same semantics here when we compute the size of the4008* destination image level.4009*/4010const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);4011const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);4012const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);4013const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);4014const uint32_t dst_level_w =4015u_minify(DIV_ROUND_UP(dst->extent.width * src_block_w, dst_block_w),4016region.dstSubresource.mipLevel);4017const uint32_t dst_level_h =4018u_minify(DIV_ROUND_UP(dst->extent.height * src_block_h, dst_block_h),4019region.dstSubresource.mipLevel);40204021const uint32_t src_level_w =4022u_minify(src->extent.width, region.srcSubresource.mipLevel);4023const uint32_t src_level_h =4024u_minify(src->extent.height, region.srcSubresource.mipLevel);4025const uint32_t src_level_d =4026u_minify(src->extent.depth, region.srcSubresource.mipLevel);40274028uint32_t dst_x, dst_y, dst_w, dst_h;4029bool dst_mirror_x, dst_mirror_y;4030compute_blit_box(region.dstOffsets,4031dst_level_w, dst_level_h,4032&dst_x, &dst_y, &dst_w, &dst_h,4033&dst_mirror_x, &dst_mirror_y);40344035uint32_t src_x, src_y, src_w, src_h;4036bool src_mirror_x, src_mirror_y;4037compute_blit_box(region.srcOffsets,4038src_level_w, src_level_h,4039&src_x, &src_y, &src_w, &src_h,4040&src_mirror_x, &src_mirror_y);40414042uint32_t min_dst_layer;4043uint32_t max_dst_layer;4044bool dst_mirror_z = false;4045if (dst->type != VK_IMAGE_TYPE_3D) {4046min_dst_layer = region.dstSubresource.baseArrayLayer;4047max_dst_layer = min_dst_layer + region.dstSubresource.layerCount;4048} else {4049compute_blit_3d_layers(region.dstOffsets,4050&min_dst_layer, &max_dst_layer,4051&dst_mirror_z);4052}40534054uint32_t min_src_layer;4055uint32_t max_src_layer;4056bool src_mirror_z = false;4057if (src->type != VK_IMAGE_TYPE_3D) {4058min_src_layer = region.srcSubresource.baseArrayLayer;4059max_src_layer = min_src_layer + region.srcSubresource.layerCount;4060} else {4061compute_blit_3d_layers(region.srcOffsets,4062&min_src_layer, &max_src_layer,4063&src_mirror_z);4064}40654066uint32_t layer_count = max_dst_layer - min_dst_layer;40674068/* Translate source blit coordinates to normalized texture coordinates for4069* single sampled textures. For multisampled textures we require4070* unnormalized coordinates, since we can only do texelFetch on them.4071*/4072float coords[4] = {4073(float)src_x,4074(float)src_y,4075(float)(src_x + src_w),4076(float)(src_y + src_h),4077};40784079if (src->samples == VK_SAMPLE_COUNT_1_BIT) {4080coords[0] /= (float)src_level_w;4081coords[1] /= (float)src_level_h;4082coords[2] /= (float)src_level_w;4083coords[3] /= (float)src_level_h;4084}40854086/* Handle mirroring */4087const bool mirror_x = dst_mirror_x != src_mirror_x;4088const bool mirror_y = dst_mirror_y != src_mirror_y;4089const bool mirror_z = dst_mirror_z != src_mirror_z;4090float tex_coords[5] = {4091!mirror_x ? coords[0] : coords[2],4092!mirror_y ? coords[1] : coords[3],4093!mirror_x ? coords[2] : coords[0],4094!mirror_y ? coords[3] : coords[1],4095/* Z coordinate for 3D blit sources, to be filled for each4096* destination layer4097*/40980.0f4099};41004101/* For blits from 3D images we also need to compute the slice coordinate to4102* sample from, which will change for each layer in the destination.4103* Compute the step we should increase for each iteration.4104*/4105const float src_z_step =4106(float)(max_src_layer - min_src_layer) / (float)layer_count;41074108/* Get the blit pipeline */4109struct v3dv_meta_blit_pipeline *pipeline = NULL;4110bool ok = get_blit_pipeline(cmd_buffer->device,4111dst_format, src_format, cmask, src->type,4112dst->samples, src->samples,4113&pipeline);4114if (!ok)4115return handled;4116assert(pipeline && pipeline->pipeline &&4117pipeline->pass && pipeline->pass_no_load);41184119struct v3dv_device *device = cmd_buffer->device;4120assert(device->meta.blit.ds_layout);41214122VkDevice _device = v3dv_device_to_handle(device);4123VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);41244125/* Create sampler for blit source image */4126VkSamplerCreateInfo sampler_info = {4127.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,4128.magFilter = filter,4129.minFilter = filter,4130.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,4131.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,4132.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,4133.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,4134};4135VkSampler sampler;4136result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,4137&sampler);4138if (result != VK_SUCCESS)4139goto fail;41404141v3dv_cmd_buffer_add_private_obj(4142cmd_buffer, (uintptr_t)sampler,4143(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);41444145/* Push command buffer state before starting meta operation */4146v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);41474148/* Push state that is common for all layers */4149v3dv_CmdBindPipeline(_cmd_buffer,4150VK_PIPELINE_BIND_POINT_GRAPHICS,4151pipeline->pipeline);41524153const VkViewport viewport = {4154.x = dst_x,4155.y = dst_y,4156.width = dst_w,4157.height = dst_h,4158.minDepth = 0.0f,4159.maxDepth = 1.0f4160};4161v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);41624163const VkRect2D scissor = {4164.offset = { dst_x, dst_y },4165.extent = { dst_w, dst_h }4166};4167v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);41684169bool can_skip_tlb_load = false;4170const VkRect2D render_area = {4171.offset = { dst_x, dst_y },4172.extent = { dst_w, dst_h },4173};41744175/* Record per-layer commands */4176VkImageAspectFlags aspects = region.dstSubresource.aspectMask;4177for (uint32_t i = 0; i < layer_count; i++) {4178/* Setup framebuffer */4179VkImageViewCreateInfo dst_image_view_info = {4180.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,4181.image = v3dv_image_to_handle(dst),4182.viewType = v3dv_image_type_to_view_type(dst->type),4183.format = dst_format,4184.subresourceRange = {4185.aspectMask = aspects,4186.baseMipLevel = region.dstSubresource.mipLevel,4187.levelCount = 1,4188.baseArrayLayer = min_dst_layer + i,4189.layerCount = 14190},4191};4192VkImageView dst_image_view;4193result = v3dv_CreateImageView(_device, &dst_image_view_info,4194&device->vk.alloc, &dst_image_view);4195if (result != VK_SUCCESS)4196goto fail;41974198v3dv_cmd_buffer_add_private_obj(4199cmd_buffer, (uintptr_t)dst_image_view,4200(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);42014202VkFramebufferCreateInfo fb_info = {4203.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,4204.renderPass = pipeline->pass,4205.attachmentCount = 1,4206.pAttachments = &dst_image_view,4207.width = dst_x + dst_w,4208.height = dst_y + dst_h,4209.layers = 1,4210};42114212VkFramebuffer fb;4213result = v3dv_CreateFramebuffer(_device, &fb_info,4214&cmd_buffer->device->vk.alloc, &fb);4215if (result != VK_SUCCESS)4216goto fail;42174218struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);4219framebuffer->has_edge_padding = fb_info.width == dst_level_w &&4220fb_info.height == dst_level_h &&4221dst_is_padded_image;42224223v3dv_cmd_buffer_add_private_obj(4224cmd_buffer, (uintptr_t)fb,4225(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);42264227/* Setup descriptor set for blit source texture. We don't have to4228* register the descriptor as a private command buffer object since4229* all descriptors will be freed automatically with the descriptor4230* pool.4231*/4232VkDescriptorSet set;4233result = allocate_blit_source_descriptor_set(cmd_buffer, &set);4234if (result != VK_SUCCESS)4235goto fail;42364237VkImageViewCreateInfo src_image_view_info = {4238.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,4239.image = v3dv_image_to_handle(src),4240.viewType = v3dv_image_type_to_view_type(src->type),4241.format = src_format,4242.components = *cswizzle,4243.subresourceRange = {4244.aspectMask = aspects,4245.baseMipLevel = region.srcSubresource.mipLevel,4246.levelCount = 1,4247.baseArrayLayer =4248src->type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,4249.layerCount = 14250},4251};4252VkImageView src_image_view;4253result = v3dv_CreateImageView(_device, &src_image_view_info,4254&device->vk.alloc, &src_image_view);4255if (result != VK_SUCCESS)4256goto fail;42574258v3dv_cmd_buffer_add_private_obj(4259cmd_buffer, (uintptr_t)src_image_view,4260(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);42614262VkDescriptorImageInfo image_info = {4263.sampler = sampler,4264.imageView = src_image_view,4265.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,4266};4267VkWriteDescriptorSet write = {4268.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,4269.dstSet = set,4270.dstBinding = 0,4271.dstArrayElement = 0,4272.descriptorCount = 1,4273.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,4274.pImageInfo = &image_info,4275};4276v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);42774278v3dv_CmdBindDescriptorSets(_cmd_buffer,4279VK_PIPELINE_BIND_POINT_GRAPHICS,4280device->meta.blit.p_layout,42810, 1, &set,42820, NULL);42834284/* If the region we are about to blit is tile-aligned, then we can4285* use the render pass version that won't pre-load the tile buffer4286* with the dst image contents before the blit. The exception is when we4287* don't have a full color mask, since in that case we need to preserve4288* the original value of some of the color components.4289*4290* Since all layers have the same area, we only need to compute this for4291* the first.4292*/4293if (i == 0) {4294struct v3dv_render_pass *pipeline_pass =4295v3dv_render_pass_from_handle(pipeline->pass);4296can_skip_tlb_load =4297cmask == full_cmask &&4298v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,4299framebuffer, pipeline_pass, 0);4300}43014302/* Record blit */4303VkRenderPassBeginInfo rp_info = {4304.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,4305.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :4306pipeline->pass,4307.framebuffer = fb,4308.renderArea = render_area,4309.clearValueCount = 0,4310};43114312v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);4313struct v3dv_job *job = cmd_buffer->state.job;4314if (!job)4315goto fail;43164317/* For 3D blits we need to compute the source slice to blit from (the Z4318* coordinate of the source sample operation). We want to choose this4319* based on the ratio of the depth of the source and the destination4320* images, picking the coordinate in the middle of each step.4321*/4322if (src->type == VK_IMAGE_TYPE_3D) {4323tex_coords[4] =4324!mirror_z ?4325(min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :4326(max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;4327}43284329v3dv_CmdPushConstants(_cmd_buffer,4330device->meta.blit.p_layout,4331VK_SHADER_STAGE_VERTEX_BIT, 0, 20,4332&tex_coords);43334334v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);43354336v3dv_CmdEndRenderPass(_cmd_buffer);4337dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;4338}43394340fail:4341v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);43424343return handled;4344}43454346VKAPI_ATTR void VKAPI_CALL4347v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,4348const VkBlitImageInfo2KHR *pBlitImageInfo)4349{4350V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);4351V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);4352V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);43534354/* This command can only happen outside a render pass */4355assert(cmd_buffer->state.pass == NULL);4356assert(cmd_buffer->state.job == NULL);43574358/* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */4359assert(dst->samples == VK_SAMPLE_COUNT_1_BIT &&4360src->samples == VK_SAMPLE_COUNT_1_BIT);43614362/* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */4363assert(!vk_format_is_compressed(dst->vk_format));43644365for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {4366if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i]))4367continue;4368if (blit_shader(cmd_buffer,4369dst, dst->vk_format,4370src, src->vk_format,43710, NULL,4372&pBlitImageInfo->pRegions[i],4373pBlitImageInfo->filter, true)) {4374continue;4375}4376unreachable("Unsupported blit operation");4377}4378}43794380static bool4381resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,4382struct v3dv_image *dst,4383struct v3dv_image *src,4384const VkImageResolve2KHR *region)4385{4386if (!can_use_tlb(src, ®ion->srcOffset, NULL) ||4387!can_use_tlb(dst, ®ion->dstOffset, NULL)) {4388return false;4389}43904391if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))4392return false;43934394const VkFormat fb_format = src->vk_format;43954396uint32_t num_layers;4397if (dst->type != VK_IMAGE_TYPE_3D)4398num_layers = region->dstSubresource.layerCount;4399else4400num_layers = region->extent.depth;4401assert(num_layers > 0);44024403struct v3dv_job *job =4404v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);4405if (!job)4406return true;44074408const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);4409const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);4410const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);4411const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);44124413uint32_t internal_type, internal_bpp;4414v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)4415(fb_format, region->srcSubresource.aspectMask,4416&internal_type, &internal_bpp);44174418v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, true);44194420struct framebuffer_data framebuffer;4421v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,4422&job->frame_tiling);44234424v3dv_X(job->device, job_emit_binning_flush)(job);4425v3dv_X(job->device, job_emit_resolve_image_rcl)(job, dst, src, &framebuffer, region);44264427v3dv_cmd_buffer_finish_job(cmd_buffer);4428return true;4429}44304431static bool4432resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,4433struct v3dv_image *dst,4434struct v3dv_image *src,4435const VkImageResolve2KHR *region)4436{4437const VkImageBlit2KHR blit_region = {4438.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,4439.srcSubresource = region->srcSubresource,4440.srcOffsets = {4441region->srcOffset,4442{4443region->srcOffset.x + region->extent.width,4444region->srcOffset.y + region->extent.height,4445}4446},4447.dstSubresource = region->dstSubresource,4448.dstOffsets = {4449region->dstOffset,4450{4451region->dstOffset.x + region->extent.width,4452region->dstOffset.y + region->extent.height,4453}4454},4455};4456return blit_shader(cmd_buffer,4457dst, dst->vk_format,4458src, src->vk_format,44590, NULL,4460&blit_region, VK_FILTER_NEAREST, true);4461}44624463VKAPI_ATTR void VKAPI_CALL4464v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,4465const VkResolveImageInfo2KHR *info)44664467{4468V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);4469V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);4470V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);44714472/* This command can only happen outside a render pass */4473assert(cmd_buffer->state.pass == NULL);4474assert(cmd_buffer->state.job == NULL);44754476assert(src->samples == VK_SAMPLE_COUNT_4_BIT);4477assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);44784479for (uint32_t i = 0; i < info->regionCount; i++) {4480if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))4481continue;4482if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))4483continue;4484unreachable("Unsupported multismaple resolve operation");4485}4486}448744884489