Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dv_meta_copy.c
4560 views
1
/*
2
* Copyright © 2019 Raspberry Pi
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "v3dv_private.h"
25
#include "v3dv_meta_copy.h"
26
27
#include "compiler/nir/nir_builder.h"
28
#include "vk_format_info.h"
29
#include "util/u_pack_color.h"
30
#include "vulkan/util/vk_common_entrypoints.h"
31
32
static uint32_t
33
meta_blit_key_hash(const void *key)
34
{
35
return _mesa_hash_data(key, V3DV_META_BLIT_CACHE_KEY_SIZE);
36
}
37
38
static bool
39
meta_blit_key_compare(const void *key1, const void *key2)
40
{
41
return memcmp(key1, key2, V3DV_META_BLIT_CACHE_KEY_SIZE) == 0;
42
}
43
44
static bool
45
create_blit_pipeline_layout(struct v3dv_device *device,
46
VkDescriptorSetLayout *descriptor_set_layout,
47
VkPipelineLayout *pipeline_layout)
48
{
49
VkResult result;
50
51
if (*descriptor_set_layout == 0) {
52
VkDescriptorSetLayoutBinding descriptor_set_layout_binding = {
53
.binding = 0,
54
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
55
.descriptorCount = 1,
56
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
57
};
58
VkDescriptorSetLayoutCreateInfo descriptor_set_layout_info = {
59
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
60
.bindingCount = 1,
61
.pBindings = &descriptor_set_layout_binding,
62
};
63
result =
64
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
65
&descriptor_set_layout_info,
66
&device->vk.alloc,
67
descriptor_set_layout);
68
if (result != VK_SUCCESS)
69
return false;
70
}
71
72
assert(*pipeline_layout == 0);
73
VkPipelineLayoutCreateInfo pipeline_layout_info = {
74
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
75
.setLayoutCount = 1,
76
.pSetLayouts = descriptor_set_layout,
77
.pushConstantRangeCount = 1,
78
.pPushConstantRanges =
79
&(VkPushConstantRange) { VK_SHADER_STAGE_VERTEX_BIT, 0, 20 },
80
};
81
82
result =
83
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
84
&pipeline_layout_info,
85
&device->vk.alloc,
86
pipeline_layout);
87
return result == VK_SUCCESS;
88
}
89
90
void
91
v3dv_meta_blit_init(struct v3dv_device *device)
92
{
93
for (uint32_t i = 0; i < 3; i++) {
94
device->meta.blit.cache[i] =
95
_mesa_hash_table_create(NULL,
96
meta_blit_key_hash,
97
meta_blit_key_compare);
98
}
99
100
create_blit_pipeline_layout(device,
101
&device->meta.blit.ds_layout,
102
&device->meta.blit.p_layout);
103
}
104
105
void
106
v3dv_meta_blit_finish(struct v3dv_device *device)
107
{
108
VkDevice _device = v3dv_device_to_handle(device);
109
110
for (uint32_t i = 0; i < 3; i++) {
111
hash_table_foreach(device->meta.blit.cache[i], entry) {
112
struct v3dv_meta_blit_pipeline *item = entry->data;
113
v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
114
v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
115
v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
116
vk_free(&device->vk.alloc, item);
117
}
118
_mesa_hash_table_destroy(device->meta.blit.cache[i], NULL);
119
}
120
121
if (device->meta.blit.p_layout) {
122
v3dv_DestroyPipelineLayout(_device, device->meta.blit.p_layout,
123
&device->vk.alloc);
124
}
125
126
if (device->meta.blit.ds_layout) {
127
v3dv_DestroyDescriptorSetLayout(_device, device->meta.blit.ds_layout,
128
&device->vk.alloc);
129
}
130
}
131
132
static uint32_t
133
meta_texel_buffer_copy_key_hash(const void *key)
134
{
135
return _mesa_hash_data(key, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
136
}
137
138
static bool
139
meta_texel_buffer_copy_key_compare(const void *key1, const void *key2)
140
{
141
return memcmp(key1, key2, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE) == 0;
142
}
143
144
static bool
145
create_texel_buffer_copy_pipeline_layout(struct v3dv_device *device,
146
VkDescriptorSetLayout *ds_layout,
147
VkPipelineLayout *p_layout)
148
{
149
VkResult result;
150
151
if (*ds_layout == 0) {
152
VkDescriptorSetLayoutBinding ds_layout_binding = {
153
.binding = 0,
154
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
155
.descriptorCount = 1,
156
.stageFlags = VK_SHADER_STAGE_FRAGMENT_BIT,
157
};
158
VkDescriptorSetLayoutCreateInfo ds_layout_info = {
159
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
160
.bindingCount = 1,
161
.pBindings = &ds_layout_binding,
162
};
163
result =
164
v3dv_CreateDescriptorSetLayout(v3dv_device_to_handle(device),
165
&ds_layout_info,
166
&device->vk.alloc,
167
ds_layout);
168
if (result != VK_SUCCESS)
169
return false;
170
}
171
172
assert(*p_layout == 0);
173
/* FIXME: this is abusing a bit the API, since not all of our copy
174
* pipelines have a geometry shader. We could create 2 different pipeline
175
* layouts, but this works for us for now.
176
*/
177
#define TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET 0
178
#define TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET 16
179
#define TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET 20
180
#define TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET 24
181
VkPushConstantRange ranges[2] = {
182
{ VK_SHADER_STAGE_FRAGMENT_BIT, 0, 24 },
183
{ VK_SHADER_STAGE_GEOMETRY_BIT, 24, 4 },
184
};
185
186
VkPipelineLayoutCreateInfo p_layout_info = {
187
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
188
.setLayoutCount = 1,
189
.pSetLayouts = ds_layout,
190
.pushConstantRangeCount = 2,
191
.pPushConstantRanges = ranges,
192
};
193
194
result =
195
v3dv_CreatePipelineLayout(v3dv_device_to_handle(device),
196
&p_layout_info,
197
&device->vk.alloc,
198
p_layout);
199
return result == VK_SUCCESS;
200
}
201
202
void
203
v3dv_meta_texel_buffer_copy_init(struct v3dv_device *device)
204
{
205
for (uint32_t i = 0; i < 3; i++) {
206
device->meta.texel_buffer_copy.cache[i] =
207
_mesa_hash_table_create(NULL,
208
meta_texel_buffer_copy_key_hash,
209
meta_texel_buffer_copy_key_compare);
210
}
211
212
create_texel_buffer_copy_pipeline_layout(
213
device,
214
&device->meta.texel_buffer_copy.ds_layout,
215
&device->meta.texel_buffer_copy.p_layout);
216
}
217
218
void
219
v3dv_meta_texel_buffer_copy_finish(struct v3dv_device *device)
220
{
221
VkDevice _device = v3dv_device_to_handle(device);
222
223
for (uint32_t i = 0; i < 3; i++) {
224
hash_table_foreach(device->meta.texel_buffer_copy.cache[i], entry) {
225
struct v3dv_meta_texel_buffer_copy_pipeline *item = entry->data;
226
v3dv_DestroyPipeline(_device, item->pipeline, &device->vk.alloc);
227
v3dv_DestroyRenderPass(_device, item->pass, &device->vk.alloc);
228
v3dv_DestroyRenderPass(_device, item->pass_no_load, &device->vk.alloc);
229
vk_free(&device->vk.alloc, item);
230
}
231
_mesa_hash_table_destroy(device->meta.texel_buffer_copy.cache[i], NULL);
232
}
233
234
if (device->meta.texel_buffer_copy.p_layout) {
235
v3dv_DestroyPipelineLayout(_device, device->meta.texel_buffer_copy.p_layout,
236
&device->vk.alloc);
237
}
238
239
if (device->meta.texel_buffer_copy.ds_layout) {
240
v3dv_DestroyDescriptorSetLayout(_device, device->meta.texel_buffer_copy.ds_layout,
241
&device->vk.alloc);
242
}
243
}
244
245
static inline bool
246
can_use_tlb(struct v3dv_image *image,
247
const VkOffset3D *offset,
248
VkFormat *compat_format);
249
250
/* Implements a copy using the TLB.
251
*
252
* This only works if we are copying from offset (0,0), since a TLB store for
253
* tile (x,y) will be written at the same tile offset into the destination.
254
* When this requirement is not met, we need to use a blit instead.
255
*
256
* Returns true if the implementation supports the requested operation (even if
257
* it failed to process it, for example, due to an out-of-memory error).
258
*
259
*/
260
static bool
261
copy_image_to_buffer_tlb(struct v3dv_cmd_buffer *cmd_buffer,
262
struct v3dv_buffer *buffer,
263
struct v3dv_image *image,
264
const VkBufferImageCopy2KHR *region)
265
{
266
VkFormat fb_format;
267
if (!can_use_tlb(image, &region->imageOffset, &fb_format))
268
return false;
269
270
uint32_t internal_type, internal_bpp;
271
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
272
(fb_format, region->imageSubresource.aspectMask,
273
&internal_type, &internal_bpp);
274
275
uint32_t num_layers;
276
if (image->type != VK_IMAGE_TYPE_3D)
277
num_layers = region->imageSubresource.layerCount;
278
else
279
num_layers = region->imageExtent.depth;
280
assert(num_layers > 0);
281
282
struct v3dv_job *job =
283
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
284
if (!job)
285
return true;
286
287
/* Handle copy from compressed format using a compatible format */
288
const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
289
const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
290
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
291
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
292
293
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
294
295
struct framebuffer_data framebuffer;
296
v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,
297
&job->frame_tiling);
298
299
v3dv_X(job->device, job_emit_binning_flush)(job);
300
v3dv_X(job->device, job_emit_copy_image_to_buffer_rcl)
301
(job, buffer, image, &framebuffer, region);
302
303
v3dv_cmd_buffer_finish_job(cmd_buffer);
304
305
return true;
306
}
307
308
static bool
309
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
310
struct v3dv_image *dst,
311
VkFormat dst_format,
312
struct v3dv_image *src,
313
VkFormat src_format,
314
VkColorComponentFlags cmask,
315
VkComponentMapping *cswizzle,
316
const VkImageBlit2KHR *region,
317
VkFilter filter,
318
bool dst_is_padded_image);
319
320
/**
321
* Returns true if the implementation supports the requested operation (even if
322
* it failed to process it, for example, due to an out-of-memory error).
323
*/
324
static bool
325
copy_image_to_buffer_blit(struct v3dv_cmd_buffer *cmd_buffer,
326
struct v3dv_buffer *buffer,
327
struct v3dv_image *image,
328
const VkBufferImageCopy2KHR *region)
329
{
330
bool handled = false;
331
332
/* Generally, the bpp of the data in the buffer matches that of the
333
* source image. The exception is the case where we are copying
334
* stencil (8bpp) to a combined d24s8 image (32bpp).
335
*/
336
uint32_t buffer_bpp = image->cpp;
337
338
VkImageAspectFlags copy_aspect = region->imageSubresource.aspectMask;
339
340
/* Because we are going to implement the copy as a blit, we need to create
341
* a linear image from the destination buffer and we also want our blit
342
* source and destination formats to be the same (to avoid any format
343
* conversions), so we choose a canonical format that matches the
344
* source image bpp.
345
*
346
* The exception to the above is copying from combined depth/stencil images
347
* because we are copying only one aspect of the image, so we need to setup
348
* our formats, color write mask and source swizzle mask to match that.
349
*/
350
VkFormat dst_format;
351
VkFormat src_format;
352
VkColorComponentFlags cmask = 0; /* All components */
353
VkComponentMapping cswizzle = {
354
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
355
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
356
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
357
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
358
};
359
switch (buffer_bpp) {
360
case 16:
361
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
362
dst_format = VK_FORMAT_R32G32B32A32_UINT;
363
src_format = dst_format;
364
break;
365
case 8:
366
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
367
dst_format = VK_FORMAT_R16G16B16A16_UINT;
368
src_format = dst_format;
369
break;
370
case 4:
371
switch (copy_aspect) {
372
case VK_IMAGE_ASPECT_COLOR_BIT:
373
src_format = VK_FORMAT_R8G8B8A8_UINT;
374
dst_format = VK_FORMAT_R8G8B8A8_UINT;
375
break;
376
case VK_IMAGE_ASPECT_DEPTH_BIT:
377
assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
378
image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
379
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
380
if (image->vk_format == VK_FORMAT_D32_SFLOAT) {
381
src_format = VK_FORMAT_R32_UINT;
382
dst_format = VK_FORMAT_R32_UINT;
383
} else {
384
/* We want to write depth in the buffer in the first 24-bits,
385
* however, the hardware has depth in bits 8-31, so swizzle the
386
* the source components to match what we want. Also, we don't
387
* want to write bits 24-31 in the destination.
388
*/
389
src_format = VK_FORMAT_R8G8B8A8_UINT;
390
dst_format = VK_FORMAT_R8G8B8A8_UINT;
391
cmask = VK_COLOR_COMPONENT_R_BIT |
392
VK_COLOR_COMPONENT_G_BIT |
393
VK_COLOR_COMPONENT_B_BIT;
394
cswizzle.r = VK_COMPONENT_SWIZZLE_G;
395
cswizzle.g = VK_COMPONENT_SWIZZLE_B;
396
cswizzle.b = VK_COMPONENT_SWIZZLE_A;
397
cswizzle.a = VK_COMPONENT_SWIZZLE_ZERO;
398
}
399
break;
400
case VK_IMAGE_ASPECT_STENCIL_BIT:
401
assert(copy_aspect == VK_IMAGE_ASPECT_STENCIL_BIT);
402
assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
403
/* Copying from S8D24. We want to write 8-bit stencil values only,
404
* so adjust the buffer bpp for that. Since the hardware stores stencil
405
* in the LSB, we can just do a RGBA8UI to R8UI blit.
406
*/
407
src_format = VK_FORMAT_R8G8B8A8_UINT;
408
dst_format = VK_FORMAT_R8_UINT;
409
buffer_bpp = 1;
410
break;
411
default:
412
unreachable("unsupported aspect");
413
return handled;
414
};
415
break;
416
case 2:
417
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT ||
418
copy_aspect == VK_IMAGE_ASPECT_DEPTH_BIT);
419
dst_format = VK_FORMAT_R16_UINT;
420
src_format = dst_format;
421
break;
422
case 1:
423
assert(copy_aspect == VK_IMAGE_ASPECT_COLOR_BIT);
424
dst_format = VK_FORMAT_R8_UINT;
425
src_format = dst_format;
426
break;
427
default:
428
unreachable("unsupported bit-size");
429
return handled;
430
};
431
432
/* The hardware doesn't support linear depth/stencil stores, so we
433
* implement copies of depth/stencil aspect as color copies using a
434
* compatible color format.
435
*/
436
assert(vk_format_is_color(src_format));
437
assert(vk_format_is_color(dst_format));
438
copy_aspect = VK_IMAGE_ASPECT_COLOR_BIT;
439
440
/* We should be able to handle the blit if we got this far */
441
handled = true;
442
443
/* Obtain the 2D buffer region spec */
444
uint32_t buf_width, buf_height;
445
if (region->bufferRowLength == 0)
446
buf_width = region->imageExtent.width;
447
else
448
buf_width = region->bufferRowLength;
449
450
if (region->bufferImageHeight == 0)
451
buf_height = region->imageExtent.height;
452
else
453
buf_height = region->bufferImageHeight;
454
455
/* If the image is compressed, the bpp refers to blocks, not pixels */
456
uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
457
uint32_t block_height = vk_format_get_blockheight(image->vk_format);
458
buf_width = buf_width / block_width;
459
buf_height = buf_height / block_height;
460
461
/* Compute layers to copy */
462
uint32_t num_layers;
463
if (image->type != VK_IMAGE_TYPE_3D)
464
num_layers = region->imageSubresource.layerCount;
465
else
466
num_layers = region->imageExtent.depth;
467
assert(num_layers > 0);
468
469
/* Our blit interface can see the real format of the images to detect
470
* copies between compressed and uncompressed images and adapt the
471
* blit region accordingly. Here we are just doing a raw copy of
472
* compressed data, but we are passing an uncompressed view of the
473
* buffer for the blit destination image (since compressed formats are
474
* not renderable), so we also want to provide an uncompressed view of
475
* the source image.
476
*/
477
VkResult result;
478
struct v3dv_device *device = cmd_buffer->device;
479
VkDevice _device = v3dv_device_to_handle(device);
480
if (vk_format_is_compressed(image->vk_format)) {
481
VkImage uiview;
482
VkImageCreateInfo uiview_info = {
483
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
484
.imageType = VK_IMAGE_TYPE_3D,
485
.format = dst_format,
486
.extent = { buf_width, buf_height, image->extent.depth },
487
.mipLevels = image->levels,
488
.arrayLayers = image->array_size,
489
.samples = image->samples,
490
.tiling = image->tiling,
491
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
492
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
493
.queueFamilyIndexCount = 0,
494
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
495
};
496
result = v3dv_CreateImage(_device, &uiview_info, &device->vk.alloc, &uiview);
497
if (result != VK_SUCCESS)
498
return handled;
499
500
v3dv_cmd_buffer_add_private_obj(
501
cmd_buffer, (uintptr_t)uiview,
502
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
503
504
result =
505
vk_common_BindImageMemory(_device, uiview,
506
v3dv_device_memory_to_handle(image->mem),
507
image->mem_offset);
508
if (result != VK_SUCCESS)
509
return handled;
510
511
image = v3dv_image_from_handle(uiview);
512
}
513
514
/* Copy requested layers */
515
for (uint32_t i = 0; i < num_layers; i++) {
516
/* Create the destination blit image from the destination buffer */
517
VkImageCreateInfo image_info = {
518
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
519
.imageType = VK_IMAGE_TYPE_2D,
520
.format = dst_format,
521
.extent = { buf_width, buf_height, 1 },
522
.mipLevels = 1,
523
.arrayLayers = 1,
524
.samples = VK_SAMPLE_COUNT_1_BIT,
525
.tiling = VK_IMAGE_TILING_LINEAR,
526
.usage = VK_IMAGE_USAGE_TRANSFER_DST_BIT,
527
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
528
.queueFamilyIndexCount = 0,
529
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
530
};
531
532
VkImage buffer_image;
533
result =
534
v3dv_CreateImage(_device, &image_info, &device->vk.alloc, &buffer_image);
535
if (result != VK_SUCCESS)
536
return handled;
537
538
v3dv_cmd_buffer_add_private_obj(
539
cmd_buffer, (uintptr_t)buffer_image,
540
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
541
542
/* Bind the buffer memory to the image */
543
VkDeviceSize buffer_offset = buffer->mem_offset + region->bufferOffset +
544
i * buf_width * buf_height * buffer_bpp;
545
result =
546
vk_common_BindImageMemory(_device, buffer_image,
547
v3dv_device_memory_to_handle(buffer->mem),
548
buffer_offset);
549
if (result != VK_SUCCESS)
550
return handled;
551
552
/* Blit-copy the requested image extent.
553
*
554
* Since we are copying, the blit must use the same format on the
555
* destination and source images to avoid format conversions. The
556
* only exception is copying stencil, which we upload to a R8UI source
557
* image, but that we need to blit to a S8D24 destination (the only
558
* stencil format we support).
559
*/
560
const VkImageBlit2KHR blit_region = {
561
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
562
.srcSubresource = {
563
.aspectMask = copy_aspect,
564
.mipLevel = region->imageSubresource.mipLevel,
565
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
566
.layerCount = 1,
567
},
568
.srcOffsets = {
569
{
570
DIV_ROUND_UP(region->imageOffset.x, block_width),
571
DIV_ROUND_UP(region->imageOffset.y, block_height),
572
region->imageOffset.z + i,
573
},
574
{
575
DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
576
block_width),
577
DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
578
block_height),
579
region->imageOffset.z + i + 1,
580
},
581
},
582
.dstSubresource = {
583
.aspectMask = copy_aspect,
584
.mipLevel = 0,
585
.baseArrayLayer = 0,
586
.layerCount = 1,
587
},
588
.dstOffsets = {
589
{ 0, 0, 0 },
590
{
591
DIV_ROUND_UP(region->imageExtent.width, block_width),
592
DIV_ROUND_UP(region->imageExtent.height, block_height),
593
1
594
},
595
},
596
};
597
598
handled = blit_shader(cmd_buffer,
599
v3dv_image_from_handle(buffer_image), dst_format,
600
image, src_format,
601
cmask, &cswizzle,
602
&blit_region, VK_FILTER_NEAREST, false);
603
if (!handled) {
604
/* This is unexpected, we should have a supported blit spec */
605
unreachable("Unable to blit buffer to destination image");
606
return false;
607
}
608
}
609
610
assert(handled);
611
return true;
612
}
613
614
static VkFormat
615
get_compatible_tlb_format(VkFormat format)
616
{
617
switch (format) {
618
case VK_FORMAT_R8G8B8A8_SNORM:
619
return VK_FORMAT_R8G8B8A8_UINT;
620
621
case VK_FORMAT_R8G8_SNORM:
622
return VK_FORMAT_R8G8_UINT;
623
624
case VK_FORMAT_R8_SNORM:
625
return VK_FORMAT_R8_UINT;
626
627
case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
628
return VK_FORMAT_A8B8G8R8_UINT_PACK32;
629
630
case VK_FORMAT_R16_UNORM:
631
case VK_FORMAT_R16_SNORM:
632
return VK_FORMAT_R16_UINT;
633
634
case VK_FORMAT_R16G16_UNORM:
635
case VK_FORMAT_R16G16_SNORM:
636
return VK_FORMAT_R16G16_UINT;
637
638
case VK_FORMAT_R16G16B16A16_UNORM:
639
case VK_FORMAT_R16G16B16A16_SNORM:
640
return VK_FORMAT_R16G16B16A16_UINT;
641
642
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
643
return VK_FORMAT_R32_SFLOAT;
644
645
/* We can't render to compressed formats using the TLB so instead we use
646
* a compatible format with the same bpp as the compressed format. Because
647
* the compressed format's bpp is for a full block (i.e. 4x4 pixels in the
648
* case of ETC), when we implement copies with the compatible format we
649
* will have to divide offsets and dimensions on the compressed image by
650
* the compressed block size.
651
*/
652
case VK_FORMAT_ETC2_R8G8B8A8_UNORM_BLOCK:
653
case VK_FORMAT_ETC2_R8G8B8A8_SRGB_BLOCK:
654
case VK_FORMAT_EAC_R11G11_UNORM_BLOCK:
655
case VK_FORMAT_EAC_R11G11_SNORM_BLOCK:
656
case VK_FORMAT_BC2_UNORM_BLOCK:
657
case VK_FORMAT_BC2_SRGB_BLOCK:
658
case VK_FORMAT_BC3_SRGB_BLOCK:
659
case VK_FORMAT_BC3_UNORM_BLOCK:
660
case VK_FORMAT_ASTC_4x4_UNORM_BLOCK:
661
case VK_FORMAT_ASTC_4x4_SRGB_BLOCK:
662
case VK_FORMAT_ASTC_5x4_UNORM_BLOCK:
663
case VK_FORMAT_ASTC_5x4_SRGB_BLOCK:
664
case VK_FORMAT_ASTC_5x5_UNORM_BLOCK:
665
case VK_FORMAT_ASTC_5x5_SRGB_BLOCK:
666
case VK_FORMAT_ASTC_6x5_UNORM_BLOCK:
667
case VK_FORMAT_ASTC_6x5_SRGB_BLOCK:
668
case VK_FORMAT_ASTC_6x6_UNORM_BLOCK:
669
case VK_FORMAT_ASTC_6x6_SRGB_BLOCK:
670
case VK_FORMAT_ASTC_8x5_UNORM_BLOCK:
671
case VK_FORMAT_ASTC_8x5_SRGB_BLOCK:
672
case VK_FORMAT_ASTC_8x6_UNORM_BLOCK:
673
case VK_FORMAT_ASTC_8x6_SRGB_BLOCK:
674
case VK_FORMAT_ASTC_8x8_UNORM_BLOCK:
675
case VK_FORMAT_ASTC_8x8_SRGB_BLOCK:
676
case VK_FORMAT_ASTC_10x5_UNORM_BLOCK:
677
case VK_FORMAT_ASTC_10x5_SRGB_BLOCK:
678
case VK_FORMAT_ASTC_10x6_UNORM_BLOCK:
679
case VK_FORMAT_ASTC_10x6_SRGB_BLOCK:
680
case VK_FORMAT_ASTC_10x8_UNORM_BLOCK:
681
case VK_FORMAT_ASTC_10x8_SRGB_BLOCK:
682
case VK_FORMAT_ASTC_10x10_UNORM_BLOCK:
683
case VK_FORMAT_ASTC_10x10_SRGB_BLOCK:
684
case VK_FORMAT_ASTC_12x10_UNORM_BLOCK:
685
case VK_FORMAT_ASTC_12x10_SRGB_BLOCK:
686
case VK_FORMAT_ASTC_12x12_UNORM_BLOCK:
687
case VK_FORMAT_ASTC_12x12_SRGB_BLOCK:
688
return VK_FORMAT_R32G32B32A32_UINT;
689
690
case VK_FORMAT_ETC2_R8G8B8A1_UNORM_BLOCK:
691
case VK_FORMAT_ETC2_R8G8B8A1_SRGB_BLOCK:
692
case VK_FORMAT_ETC2_R8G8B8_UNORM_BLOCK:
693
case VK_FORMAT_ETC2_R8G8B8_SRGB_BLOCK:
694
case VK_FORMAT_EAC_R11_UNORM_BLOCK:
695
case VK_FORMAT_EAC_R11_SNORM_BLOCK:
696
case VK_FORMAT_BC1_RGB_UNORM_BLOCK:
697
case VK_FORMAT_BC1_RGB_SRGB_BLOCK:
698
case VK_FORMAT_BC1_RGBA_UNORM_BLOCK:
699
case VK_FORMAT_BC1_RGBA_SRGB_BLOCK:
700
return VK_FORMAT_R16G16B16A16_UINT;
701
702
default:
703
return VK_FORMAT_UNDEFINED;
704
}
705
}
706
707
static inline bool
708
can_use_tlb(struct v3dv_image *image,
709
const VkOffset3D *offset,
710
VkFormat *compat_format)
711
{
712
if (offset->x != 0 || offset->y != 0)
713
return false;
714
715
if (image->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO) {
716
if (compat_format)
717
*compat_format = image->vk_format;
718
return true;
719
}
720
721
/* If the image format is not TLB-supported, then check if we can use
722
* a compatible format instead.
723
*/
724
if (compat_format) {
725
*compat_format = get_compatible_tlb_format(image->vk_format);
726
if (*compat_format != VK_FORMAT_UNDEFINED)
727
return true;
728
}
729
730
return false;
731
}
732
733
VKAPI_ATTR void VKAPI_CALL
734
v3dv_CmdCopyImageToBuffer2KHR(VkCommandBuffer commandBuffer,
735
const VkCopyImageToBufferInfo2KHR *info)
736
737
{
738
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
739
V3DV_FROM_HANDLE(v3dv_image, image, info->srcImage);
740
V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->dstBuffer);
741
742
assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
743
744
for (uint32_t i = 0; i < info->regionCount; i++) {
745
if (copy_image_to_buffer_tlb(cmd_buffer, buffer, image, &info->pRegions[i]))
746
continue;
747
if (copy_image_to_buffer_blit(cmd_buffer, buffer, image, &info->pRegions[i]))
748
continue;
749
unreachable("Unsupported image to buffer copy.");
750
}
751
}
752
753
/**
754
* Returns true if the implementation supports the requested operation (even if
755
* it failed to process it, for example, due to an out-of-memory error).
756
*/
757
static bool
758
copy_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
759
struct v3dv_image *dst,
760
struct v3dv_image *src,
761
const VkImageCopy2KHR *region)
762
{
763
/* Destination can't be raster format */
764
if (dst->tiling == VK_IMAGE_TILING_LINEAR)
765
return false;
766
767
/* We can only do full copies, so if the format is D24S8 both aspects need
768
* to be copied. We only need to check the dst format because the spec
769
* states that depth/stencil formats must match exactly.
770
*/
771
if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
772
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
773
VK_IMAGE_ASPECT_STENCIL_BIT;
774
if (region->dstSubresource.aspectMask != ds_aspects)
775
return false;
776
}
777
778
/* Don't handle copies between uncompressed and compressed formats for now.
779
*
780
* FIXME: we should be able to handle these easily but there is no coverage
781
* in CTS at the moment that make such copies with full images (which we
782
* require here), only partial copies. Also, in that case the code below that
783
* checks for "dst image complete" requires some changes, since it is
784
* checking against the region dimensions, which are in units of the source
785
* image format.
786
*/
787
if (vk_format_is_compressed(dst->vk_format) !=
788
vk_format_is_compressed(src->vk_format)) {
789
return false;
790
}
791
792
/* Source region must start at (0,0) */
793
if (region->srcOffset.x != 0 || region->srcOffset.y != 0)
794
return false;
795
796
/* Destination image must be complete */
797
if (region->dstOffset.x != 0 || region->dstOffset.y != 0)
798
return false;
799
800
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
801
uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);
802
uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);
803
if (region->extent.width != dst_width || region->extent.height != dst_height)
804
return false;
805
806
/* From vkCmdCopyImage:
807
*
808
* "When copying between compressed and uncompressed formats the extent
809
* members represent the texel dimensions of the source image and not
810
* the destination."
811
*/
812
const uint32_t block_w = vk_format_get_blockwidth(src->vk_format);
813
const uint32_t block_h = vk_format_get_blockheight(src->vk_format);
814
uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
815
uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
816
817
/* Account for sample count */
818
assert(dst->samples == src->samples);
819
if (dst->samples > VK_SAMPLE_COUNT_1_BIT) {
820
assert(dst->samples == VK_SAMPLE_COUNT_4_BIT);
821
width *= 2;
822
height *= 2;
823
}
824
825
/* The TFU unit doesn't handle format conversions so we need the formats to
826
* match. On the other hand, vkCmdCopyImage allows different color formats
827
* on the source and destination images, but only if they are texel
828
* compatible. For us, this means that we can effectively ignore different
829
* formats and just make the copy using either of them, since we are just
830
* moving raw data and not making any conversions.
831
*
832
* Also, the formats supported by the TFU unit are limited, but again, since
833
* we are only doing raw copies here without interpreting or converting
834
* the underlying pixel data according to its format, we can always choose
835
* to use compatible formats that are supported with the TFU unit.
836
*/
837
assert(dst->cpp == src->cpp);
838
const struct v3dv_format *format =
839
v3dv_get_compatible_tfu_format(cmd_buffer->device,
840
dst->cpp, NULL);
841
842
/* Emit a TFU job for each layer to blit */
843
const uint32_t layer_count = dst->type != VK_IMAGE_TYPE_3D ?
844
region->dstSubresource.layerCount :
845
region->extent.depth;
846
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
847
848
const uint32_t base_src_layer = src->type != VK_IMAGE_TYPE_3D ?
849
region->srcSubresource.baseArrayLayer : region->srcOffset.z;
850
const uint32_t base_dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
851
region->dstSubresource.baseArrayLayer : region->dstOffset.z;
852
for (uint32_t i = 0; i < layer_count; i++) {
853
v3dv_X(cmd_buffer->device, cmd_buffer_emit_tfu_job)
854
(cmd_buffer, dst, dst_mip_level, base_dst_layer + i,
855
src, src_mip_level, base_src_layer + i,
856
width, height, format);
857
}
858
859
return true;
860
}
861
862
/**
863
* Returns true if the implementation supports the requested operation (even if
864
* it failed to process it, for example, due to an out-of-memory error).
865
*/
866
static bool
867
copy_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
868
struct v3dv_image *dst,
869
struct v3dv_image *src,
870
const VkImageCopy2KHR *region)
871
{
872
VkFormat fb_format;
873
if (!can_use_tlb(src, &region->srcOffset, &fb_format) ||
874
!can_use_tlb(dst, &region->dstOffset, &fb_format)) {
875
return false;
876
}
877
878
/* From the Vulkan spec, VkImageCopy valid usage:
879
*
880
* "If neither the calling command’s srcImage nor the calling command’s
881
* dstImage has a multi-planar image format then the aspectMask member
882
* of srcSubresource and dstSubresource must match."
883
*/
884
assert(region->dstSubresource.aspectMask ==
885
region->srcSubresource.aspectMask);
886
uint32_t internal_type, internal_bpp;
887
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
888
(fb_format, region->dstSubresource.aspectMask,
889
&internal_type, &internal_bpp);
890
891
/* From the Vulkan spec with VK_KHR_maintenance1, VkImageCopy valid usage:
892
*
893
* "The number of slices of the extent (for 3D) or layers of the
894
* srcSubresource (for non-3D) must match the number of slices of the
895
* extent (for 3D) or layers of the dstSubresource (for non-3D)."
896
*/
897
assert((src->type != VK_IMAGE_TYPE_3D ?
898
region->srcSubresource.layerCount : region->extent.depth) ==
899
(dst->type != VK_IMAGE_TYPE_3D ?
900
region->dstSubresource.layerCount : region->extent.depth));
901
uint32_t num_layers;
902
if (dst->type != VK_IMAGE_TYPE_3D)
903
num_layers = region->dstSubresource.layerCount;
904
else
905
num_layers = region->extent.depth;
906
assert(num_layers > 0);
907
908
struct v3dv_job *job =
909
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
910
if (!job)
911
return true;
912
913
/* Handle copy to compressed image using compatible format */
914
const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
915
const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
916
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
917
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
918
919
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp,
920
src->samples > VK_SAMPLE_COUNT_1_BIT);
921
922
struct framebuffer_data framebuffer;
923
v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,
924
&job->frame_tiling);
925
926
v3dv_X(job->device, job_emit_binning_flush)(job);
927
v3dv_X(job->device, job_emit_copy_image_rcl)(job, dst, src, &framebuffer, region);
928
929
v3dv_cmd_buffer_finish_job(cmd_buffer);
930
931
return true;
932
}
933
934
/**
935
* Takes the image provided as argument and creates a new image that has
936
* the same specification and aliases the same memory storage, except that:
937
*
938
* - It has the uncompressed format passed in.
939
* - Its original width/height are scaled by the factors passed in.
940
*
941
* This is useful to implement copies from compressed images using the blit
942
* path. The idea is that we create uncompressed "image views" of both the
943
* source and destination images using the uncompressed format and then we
944
* define the copy blit in terms of that format.
945
*/
946
static struct v3dv_image *
947
create_image_alias(struct v3dv_cmd_buffer *cmd_buffer,
948
struct v3dv_image *src,
949
float width_scale,
950
float height_scale,
951
VkFormat format)
952
{
953
assert(!vk_format_is_compressed(format));
954
955
VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
956
957
VkImageCreateInfo info = {
958
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
959
.imageType = src->type,
960
.format = format,
961
.extent = {
962
.width = src->extent.width * width_scale,
963
.height = src->extent.height * height_scale,
964
.depth = src->extent.depth,
965
},
966
.mipLevels = src->levels,
967
.arrayLayers = src->array_size,
968
.samples = src->samples,
969
.tiling = src->tiling,
970
.usage = src->usage,
971
};
972
973
VkImage _image;
974
VkResult result =
975
v3dv_CreateImage(_device, &info, &cmd_buffer->device->vk.alloc, &_image);
976
if (result != VK_SUCCESS) {
977
v3dv_flag_oom(cmd_buffer, NULL);
978
return NULL;
979
}
980
981
struct v3dv_image *image = v3dv_image_from_handle(_image);
982
image->mem = src->mem;
983
image->mem_offset = src->mem_offset;
984
return image;
985
}
986
987
/**
988
* Returns true if the implementation supports the requested operation (even if
989
* it failed to process it, for example, due to an out-of-memory error).
990
*/
991
static bool
992
copy_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
993
struct v3dv_image *dst,
994
struct v3dv_image *src,
995
const VkImageCopy2KHR *region)
996
{
997
const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
998
const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
999
const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
1000
const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
1001
const float block_scale_w = (float)src_block_w / (float)dst_block_w;
1002
const float block_scale_h = (float)src_block_h / (float)dst_block_h;
1003
1004
/* We need to choose a single format for the blit to ensure that this is
1005
* really a copy and there are not format conversions going on. Since we
1006
* going to blit, we need to make sure that the selected format can be
1007
* both rendered to and textured from.
1008
*/
1009
VkFormat format;
1010
float src_scale_w = 1.0f;
1011
float src_scale_h = 1.0f;
1012
float dst_scale_w = block_scale_w;
1013
float dst_scale_h = block_scale_h;
1014
if (vk_format_is_compressed(src->vk_format)) {
1015
/* If we are copying from a compressed format we should be aware that we
1016
* are going to texture from the source image, and the texture setup
1017
* knows the actual size of the image, so we need to choose a format
1018
* that has a per-texel (not per-block) bpp that is compatible for that
1019
* image size. For example, for a source image with size Bw*WxBh*H
1020
* and format ETC2_RGBA8_UNORM copied to a WxH image of format RGBA32UI,
1021
* each of the Bw*WxBh*H texels in the compressed source image is 8-bit
1022
* (which translates to a 128-bit 4x4 RGBA32 block when uncompressed),
1023
* so we could specify a blit with size Bw*WxBh*H and a format with
1024
* a bpp of 8-bit per texel (R8_UINT).
1025
*
1026
* Unfortunately, when copying from a format like ETC2_RGB8A1_UNORM,
1027
* which is 64-bit per texel, then we would need a 4-bit format, which
1028
* we don't have, so instead we still choose an 8-bit format, but we
1029
* apply a divisor to the row dimensions of the blit, since we are
1030
* copying two texels per item.
1031
*
1032
* Generally, we can choose any format so long as we compute appropriate
1033
* divisors for the width and height depending on the source image's
1034
* bpp.
1035
*/
1036
assert(src->cpp == dst->cpp);
1037
1038
format = VK_FORMAT_R32G32_UINT;
1039
switch (src->cpp) {
1040
case 16:
1041
format = VK_FORMAT_R32G32B32A32_UINT;
1042
break;
1043
case 8:
1044
format = VK_FORMAT_R16G16B16A16_UINT;
1045
break;
1046
default:
1047
unreachable("Unsupported compressed format");
1048
}
1049
1050
/* Create image views of the src/dst images that we can interpret in
1051
* terms of the canonical format.
1052
*/
1053
src_scale_w /= src_block_w;
1054
src_scale_h /= src_block_h;
1055
dst_scale_w /= src_block_w;
1056
dst_scale_h /= src_block_h;
1057
1058
src = create_image_alias(cmd_buffer, src,
1059
src_scale_w, src_scale_h, format);
1060
1061
dst = create_image_alias(cmd_buffer, dst,
1062
dst_scale_w, dst_scale_h, format);
1063
} else {
1064
format = src->format->rt_type != V3D_OUTPUT_IMAGE_FORMAT_NO ?
1065
src->vk_format : get_compatible_tlb_format(src->vk_format);
1066
if (format == VK_FORMAT_UNDEFINED)
1067
return false;
1068
1069
const struct v3dv_format *f = v3dv_X(cmd_buffer->device, get_format)(format);
1070
if (!f->supported || f->tex_type == TEXTURE_DATA_FORMAT_NO)
1071
return false;
1072
}
1073
1074
/* Given an uncompressed image with size WxH, if we copy it to a compressed
1075
* image, it will result in an image with size W*bWxH*bH, where bW and bH
1076
* are the compressed format's block width and height. This means that
1077
* copies between compressed and uncompressed images involve different
1078
* image sizes, and therefore, we need to take that into account when
1079
* setting up the source and destination blit regions below, so they are
1080
* consistent from the point of view of the single compatible format
1081
* selected for the copy.
1082
*
1083
* We should take into account that the dimensions of the region provided
1084
* to the copy command are specified in terms of the source image. With that
1085
* in mind, below we adjust the blit destination region to be consistent with
1086
* the source region for the compatible format, so basically, we apply
1087
* the block scale factor to the destination offset provided by the copy
1088
* command (because it is specified in terms of the destination image, not
1089
* the source), and then we just add the region copy dimensions to that
1090
* (since the region dimensions are already specified in terms of the source
1091
* image).
1092
*/
1093
const VkOffset3D src_start = {
1094
region->srcOffset.x * src_scale_w,
1095
region->srcOffset.y * src_scale_h,
1096
region->srcOffset.z,
1097
};
1098
const VkOffset3D src_end = {
1099
src_start.x + region->extent.width * src_scale_w,
1100
src_start.y + region->extent.height * src_scale_h,
1101
src_start.z + region->extent.depth,
1102
};
1103
1104
const VkOffset3D dst_start = {
1105
region->dstOffset.x * dst_scale_w,
1106
region->dstOffset.y * dst_scale_h,
1107
region->dstOffset.z,
1108
};
1109
const VkOffset3D dst_end = {
1110
dst_start.x + region->extent.width * src_scale_w,
1111
dst_start.y + region->extent.height * src_scale_h,
1112
dst_start.z + region->extent.depth,
1113
};
1114
1115
const VkImageBlit2KHR blit_region = {
1116
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
1117
.srcSubresource = region->srcSubresource,
1118
.srcOffsets = { src_start, src_end },
1119
.dstSubresource = region->dstSubresource,
1120
.dstOffsets = { dst_start, dst_end },
1121
};
1122
bool handled = blit_shader(cmd_buffer,
1123
dst, format,
1124
src, format,
1125
0, NULL,
1126
&blit_region, VK_FILTER_NEAREST, true);
1127
1128
/* We should have selected formats that we can blit */
1129
assert(handled);
1130
return handled;
1131
}
1132
1133
VKAPI_ATTR void VKAPI_CALL
1134
v3dv_CmdCopyImage2KHR(VkCommandBuffer commandBuffer,
1135
const VkCopyImageInfo2KHR *info)
1136
1137
{
1138
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1139
V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
1140
V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
1141
1142
assert(src->samples == dst->samples);
1143
1144
for (uint32_t i = 0; i < info->regionCount; i++) {
1145
if (copy_image_tfu(cmd_buffer, dst, src, &info->pRegions[i]))
1146
continue;
1147
if (copy_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
1148
continue;
1149
if (copy_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
1150
continue;
1151
unreachable("Image copy not supported");
1152
}
1153
}
1154
1155
static void
1156
get_hw_clear_color(struct v3dv_device *device,
1157
const VkClearColorValue *color,
1158
VkFormat fb_format,
1159
VkFormat image_format,
1160
uint32_t internal_type,
1161
uint32_t internal_bpp,
1162
uint32_t *hw_color)
1163
{
1164
const uint32_t internal_size = 4 << internal_bpp;
1165
1166
/* If the image format doesn't match the framebuffer format, then we are
1167
* trying to clear an unsupported tlb format using a compatible
1168
* format for the framebuffer. In this case, we want to make sure that
1169
* we pack the clear value according to the original format semantics,
1170
* not the compatible format.
1171
*/
1172
if (fb_format == image_format) {
1173
v3dv_X(device, get_hw_clear_color)(color, internal_type, internal_size, hw_color);
1174
} else {
1175
union util_color uc;
1176
enum pipe_format pipe_image_format =
1177
vk_format_to_pipe_format(image_format);
1178
util_pack_color(color->float32, pipe_image_format, &uc);
1179
memcpy(hw_color, uc.ui, internal_size);
1180
}
1181
}
1182
1183
/* Returns true if the implementation is able to handle the case, false
1184
* otherwise.
1185
*/
1186
static bool
1187
clear_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1188
struct v3dv_image *image,
1189
const VkClearValue *clear_value,
1190
const VkImageSubresourceRange *range)
1191
{
1192
const VkOffset3D origin = { 0, 0, 0 };
1193
VkFormat fb_format;
1194
if (!can_use_tlb(image, &origin, &fb_format))
1195
return false;
1196
1197
uint32_t internal_type, internal_bpp;
1198
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1199
(fb_format, range->aspectMask,
1200
&internal_type, &internal_bpp);
1201
1202
union v3dv_clear_value hw_clear_value = { 0 };
1203
if (range->aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1204
get_hw_clear_color(cmd_buffer->device, &clear_value->color, fb_format,
1205
image->vk_format, internal_type, internal_bpp,
1206
&hw_clear_value.color[0]);
1207
} else {
1208
assert((range->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) ||
1209
(range->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT));
1210
hw_clear_value.z = clear_value->depthStencil.depth;
1211
hw_clear_value.s = clear_value->depthStencil.stencil;
1212
}
1213
1214
uint32_t level_count = range->levelCount == VK_REMAINING_MIP_LEVELS ?
1215
image->levels - range->baseMipLevel :
1216
range->levelCount;
1217
uint32_t min_level = range->baseMipLevel;
1218
uint32_t max_level = range->baseMipLevel + level_count;
1219
1220
/* For 3D images baseArrayLayer and layerCount must be 0 and 1 respectively.
1221
* Instead, we need to consider the full depth dimension of the image, which
1222
* goes from 0 up to the level's depth extent.
1223
*/
1224
uint32_t min_layer;
1225
uint32_t max_layer;
1226
if (image->type != VK_IMAGE_TYPE_3D) {
1227
uint32_t layer_count = range->layerCount == VK_REMAINING_ARRAY_LAYERS ?
1228
image->array_size - range->baseArrayLayer :
1229
range->layerCount;
1230
min_layer = range->baseArrayLayer;
1231
max_layer = range->baseArrayLayer + layer_count;
1232
} else {
1233
min_layer = 0;
1234
max_layer = 0;
1235
}
1236
1237
for (uint32_t level = min_level; level < max_level; level++) {
1238
if (image->type == VK_IMAGE_TYPE_3D)
1239
max_layer = u_minify(image->extent.depth, level);
1240
for (uint32_t layer = min_layer; layer < max_layer; layer++) {
1241
uint32_t width = u_minify(image->extent.width, level);
1242
uint32_t height = u_minify(image->extent.height, level);
1243
1244
struct v3dv_job *job =
1245
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1246
1247
if (!job)
1248
return true;
1249
1250
/* We start a a new job for each layer so the frame "depth" is 1 */
1251
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp,
1252
image->samples > VK_SAMPLE_COUNT_1_BIT);
1253
1254
struct framebuffer_data framebuffer;
1255
v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,
1256
&job->frame_tiling);
1257
1258
v3dv_X(job->device, job_emit_binning_flush)(job);
1259
1260
/* If this triggers it is an application bug: the spec requires
1261
* that any aspects to clear are present in the image.
1262
*/
1263
assert(range->aspectMask & image->aspects);
1264
1265
v3dv_X(job->device, job_emit_clear_image_rcl)
1266
(job, image, &framebuffer, &hw_clear_value,
1267
range->aspectMask, layer, level);
1268
1269
v3dv_cmd_buffer_finish_job(cmd_buffer);
1270
}
1271
}
1272
1273
return true;
1274
}
1275
1276
VKAPI_ATTR void VKAPI_CALL
1277
v3dv_CmdClearColorImage(VkCommandBuffer commandBuffer,
1278
VkImage _image,
1279
VkImageLayout imageLayout,
1280
const VkClearColorValue *pColor,
1281
uint32_t rangeCount,
1282
const VkImageSubresourceRange *pRanges)
1283
{
1284
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1285
V3DV_FROM_HANDLE(v3dv_image, image, _image);
1286
1287
const VkClearValue clear_value = {
1288
.color = *pColor,
1289
};
1290
1291
for (uint32_t i = 0; i < rangeCount; i++) {
1292
if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
1293
continue;
1294
unreachable("Unsupported color clear.");
1295
}
1296
}
1297
1298
VKAPI_ATTR void VKAPI_CALL
1299
v3dv_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
1300
VkImage _image,
1301
VkImageLayout imageLayout,
1302
const VkClearDepthStencilValue *pDepthStencil,
1303
uint32_t rangeCount,
1304
const VkImageSubresourceRange *pRanges)
1305
{
1306
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1307
V3DV_FROM_HANDLE(v3dv_image, image, _image);
1308
1309
const VkClearValue clear_value = {
1310
.depthStencil = *pDepthStencil,
1311
};
1312
1313
for (uint32_t i = 0; i < rangeCount; i++) {
1314
if (clear_image_tlb(cmd_buffer, image, &clear_value, &pRanges[i]))
1315
continue;
1316
unreachable("Unsupported depth/stencil clear.");
1317
}
1318
}
1319
1320
VKAPI_ATTR void VKAPI_CALL
1321
v3dv_CmdCopyBuffer2KHR(VkCommandBuffer commandBuffer,
1322
const VkCopyBufferInfo2KHR *pCopyBufferInfo)
1323
{
1324
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1325
V3DV_FROM_HANDLE(v3dv_buffer, src_buffer, pCopyBufferInfo->srcBuffer);
1326
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, pCopyBufferInfo->dstBuffer);
1327
1328
for (uint32_t i = 0; i < pCopyBufferInfo->regionCount; i++) {
1329
v3dv_X(cmd_buffer->device, cmd_buffer_copy_buffer)
1330
(cmd_buffer,
1331
dst_buffer->mem->bo, dst_buffer->mem_offset,
1332
src_buffer->mem->bo, src_buffer->mem_offset,
1333
&pCopyBufferInfo->pRegions[i]);
1334
}
1335
}
1336
1337
static void
1338
destroy_update_buffer_cb(VkDevice _device,
1339
uint64_t pobj,
1340
VkAllocationCallbacks *alloc)
1341
{
1342
V3DV_FROM_HANDLE(v3dv_device, device, _device);
1343
struct v3dv_bo *bo = (struct v3dv_bo *)((uintptr_t) pobj);
1344
v3dv_bo_free(device, bo);
1345
}
1346
1347
VKAPI_ATTR void VKAPI_CALL
1348
v3dv_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1349
VkBuffer dstBuffer,
1350
VkDeviceSize dstOffset,
1351
VkDeviceSize dataSize,
1352
const void *pData)
1353
{
1354
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1355
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1356
1357
struct v3dv_bo *src_bo =
1358
v3dv_bo_alloc(cmd_buffer->device, dataSize, "vkCmdUpdateBuffer", true);
1359
if (!src_bo) {
1360
fprintf(stderr, "Failed to allocate BO for vkCmdUpdateBuffer.\n");
1361
return;
1362
}
1363
1364
bool ok = v3dv_bo_map(cmd_buffer->device, src_bo, src_bo->size);
1365
if (!ok) {
1366
fprintf(stderr, "Failed to map BO for vkCmdUpdateBuffer.\n");
1367
return;
1368
}
1369
1370
memcpy(src_bo->map, pData, dataSize);
1371
1372
v3dv_bo_unmap(cmd_buffer->device, src_bo);
1373
1374
VkBufferCopy2KHR region = {
1375
.sType = VK_STRUCTURE_TYPE_BUFFER_COPY_2_KHR,
1376
.srcOffset = 0,
1377
.dstOffset = dstOffset,
1378
.size = dataSize,
1379
};
1380
struct v3dv_job *copy_job =
1381
v3dv_X(cmd_buffer->device, cmd_buffer_copy_buffer)
1382
(cmd_buffer, dst_buffer->mem->bo, dst_buffer->mem_offset,
1383
src_bo, 0, &region);
1384
1385
if (!copy_job)
1386
return;
1387
1388
v3dv_cmd_buffer_add_private_obj(
1389
cmd_buffer, (uint64_t)(uintptr_t)src_bo, destroy_update_buffer_cb);
1390
}
1391
1392
VKAPI_ATTR void VKAPI_CALL
1393
v3dv_CmdFillBuffer(VkCommandBuffer commandBuffer,
1394
VkBuffer dstBuffer,
1395
VkDeviceSize dstOffset,
1396
VkDeviceSize size,
1397
uint32_t data)
1398
{
1399
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
1400
V3DV_FROM_HANDLE(v3dv_buffer, dst_buffer, dstBuffer);
1401
1402
struct v3dv_bo *bo = dst_buffer->mem->bo;
1403
1404
/* From the Vulkan spec:
1405
*
1406
* "If VK_WHOLE_SIZE is used and the remaining size of the buffer is not
1407
* a multiple of 4, then the nearest smaller multiple is used."
1408
*/
1409
if (size == VK_WHOLE_SIZE) {
1410
size = dst_buffer->size - dstOffset;
1411
size -= size % 4;
1412
}
1413
1414
v3dv_X(cmd_buffer->device, cmd_buffer_fill_buffer)
1415
(cmd_buffer, bo, dstOffset, size, data);
1416
}
1417
1418
/**
1419
* Returns true if the implementation supports the requested operation (even if
1420
* it failed to process it, for example, due to an out-of-memory error).
1421
*/
1422
static bool
1423
copy_buffer_to_image_tfu(struct v3dv_cmd_buffer *cmd_buffer,
1424
struct v3dv_image *image,
1425
struct v3dv_buffer *buffer,
1426
const VkBufferImageCopy2KHR *region)
1427
{
1428
assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
1429
1430
/* Destination can't be raster format */
1431
if (image->tiling == VK_IMAGE_TILING_LINEAR)
1432
return false;
1433
1434
/* We can't copy D24S8 because buffer to image copies only copy one aspect
1435
* at a time, and the TFU copies full images. Also, V3D depth bits for
1436
* both D24S8 and D24X8 stored in the 24-bit MSB of each 32-bit word, but
1437
* the Vulkan spec has the buffer data specified the other way around, so it
1438
* is not a straight copy, we would havew to swizzle the channels, which the
1439
* TFU can't do.
1440
*/
1441
if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
1442
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) {
1443
return false;
1444
}
1445
1446
/* Region must include full slice */
1447
const uint32_t offset_x = region->imageOffset.x;
1448
const uint32_t offset_y = region->imageOffset.y;
1449
if (offset_x != 0 || offset_y != 0)
1450
return false;
1451
1452
uint32_t width, height;
1453
if (region->bufferRowLength == 0)
1454
width = region->imageExtent.width;
1455
else
1456
width = region->bufferRowLength;
1457
1458
if (region->bufferImageHeight == 0)
1459
height = region->imageExtent.height;
1460
else
1461
height = region->bufferImageHeight;
1462
1463
if (width != image->extent.width || height != image->extent.height)
1464
return false;
1465
1466
/* Handle region semantics for compressed images */
1467
const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
1468
const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
1469
width = DIV_ROUND_UP(width, block_w);
1470
height = DIV_ROUND_UP(height, block_h);
1471
1472
/* Format must be supported for texturing via the TFU. Since we are just
1473
* copying raw data and not converting between pixel formats, we can ignore
1474
* the image's format and choose a compatible TFU format for the image
1475
* texel size instead, which expands the list of formats we can handle here.
1476
*/
1477
const struct v3dv_format *format =
1478
v3dv_get_compatible_tfu_format(cmd_buffer->device,
1479
image->cpp, NULL);
1480
1481
const uint32_t mip_level = region->imageSubresource.mipLevel;
1482
const struct v3d_resource_slice *slice = &image->slices[mip_level];
1483
1484
uint32_t num_layers;
1485
if (image->type != VK_IMAGE_TYPE_3D)
1486
num_layers = region->imageSubresource.layerCount;
1487
else
1488
num_layers = region->imageExtent.depth;
1489
assert(num_layers > 0);
1490
1491
assert(image->mem && image->mem->bo);
1492
const struct v3dv_bo *dst_bo = image->mem->bo;
1493
1494
assert(buffer->mem && buffer->mem->bo);
1495
const struct v3dv_bo *src_bo = buffer->mem->bo;
1496
1497
/* Emit a TFU job per layer to copy */
1498
const uint32_t buffer_stride = width * image->cpp;
1499
for (int i = 0; i < num_layers; i++) {
1500
uint32_t layer;
1501
if (image->type != VK_IMAGE_TYPE_3D)
1502
layer = region->imageSubresource.baseArrayLayer + i;
1503
else
1504
layer = region->imageOffset.z + i;
1505
1506
struct drm_v3d_submit_tfu tfu = {
1507
.ios = (height << 16) | width,
1508
.bo_handles = {
1509
dst_bo->handle,
1510
src_bo->handle != dst_bo->handle ? src_bo->handle : 0
1511
},
1512
};
1513
1514
const uint32_t buffer_offset =
1515
buffer->mem_offset + region->bufferOffset +
1516
height * buffer_stride * i;
1517
1518
const uint32_t src_offset = src_bo->offset + buffer_offset;
1519
tfu.iia |= src_offset;
1520
tfu.icfg |= V3D_TFU_ICFG_FORMAT_RASTER << V3D_TFU_ICFG_FORMAT_SHIFT;
1521
tfu.iis |= width;
1522
1523
const uint32_t dst_offset =
1524
dst_bo->offset + v3dv_layer_offset(image, mip_level, layer);
1525
tfu.ioa |= dst_offset;
1526
1527
tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
1528
(slice->tiling - V3D_TILING_LINEARTILE)) <<
1529
V3D_TFU_IOA_FORMAT_SHIFT;
1530
tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
1531
1532
/* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
1533
* OPAD field for the destination (how many extra UIF blocks beyond
1534
* those necessary to cover the height).
1535
*/
1536
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
1537
slice->tiling == V3D_TILING_UIF_XOR) {
1538
uint32_t uif_block_h = 2 * v3d_utile_height(image->cpp);
1539
uint32_t implicit_padded_height = align(height, uif_block_h);
1540
uint32_t icfg =
1541
(slice->padded_height - implicit_padded_height) / uif_block_h;
1542
tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
1543
}
1544
1545
v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
1546
}
1547
1548
return true;
1549
}
1550
1551
/**
1552
* Returns true if the implementation supports the requested operation (even if
1553
* it failed to process it, for example, due to an out-of-memory error).
1554
*/
1555
static bool
1556
copy_buffer_to_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
1557
struct v3dv_image *image,
1558
struct v3dv_buffer *buffer,
1559
const VkBufferImageCopy2KHR *region)
1560
{
1561
VkFormat fb_format;
1562
if (!can_use_tlb(image, &region->imageOffset, &fb_format))
1563
return false;
1564
1565
uint32_t internal_type, internal_bpp;
1566
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
1567
(fb_format, region->imageSubresource.aspectMask,
1568
&internal_type, &internal_bpp);
1569
1570
uint32_t num_layers;
1571
if (image->type != VK_IMAGE_TYPE_3D)
1572
num_layers = region->imageSubresource.layerCount;
1573
else
1574
num_layers = region->imageExtent.depth;
1575
assert(num_layers > 0);
1576
1577
struct v3dv_job *job =
1578
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1579
if (!job)
1580
return true;
1581
1582
/* Handle copy to compressed format using a compatible format */
1583
const uint32_t block_w = vk_format_get_blockwidth(image->vk_format);
1584
const uint32_t block_h = vk_format_get_blockheight(image->vk_format);
1585
const uint32_t width = DIV_ROUND_UP(region->imageExtent.width, block_w);
1586
const uint32_t height = DIV_ROUND_UP(region->imageExtent.height, block_h);
1587
1588
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, false);
1589
1590
struct framebuffer_data framebuffer;
1591
v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,
1592
&job->frame_tiling);
1593
1594
v3dv_X(job->device, job_emit_binning_flush)(job);
1595
v3dv_X(job->device, job_emit_copy_buffer_to_image_rcl)
1596
(job, image, buffer, &framebuffer, region);
1597
1598
v3dv_cmd_buffer_finish_job(cmd_buffer);
1599
1600
return true;
1601
}
1602
1603
static bool
1604
create_tiled_image_from_buffer(struct v3dv_cmd_buffer *cmd_buffer,
1605
struct v3dv_image *image,
1606
struct v3dv_buffer *buffer,
1607
const VkBufferImageCopy2KHR *region)
1608
{
1609
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, region))
1610
return true;
1611
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, region))
1612
return true;
1613
return false;
1614
}
1615
1616
static VkResult
1617
create_texel_buffer_copy_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
1618
{
1619
/* If this is not the first pool we create for this command buffer
1620
* size it based on the size of the currently exhausted pool.
1621
*/
1622
uint32_t descriptor_count = 64;
1623
if (cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE) {
1624
struct v3dv_descriptor_pool *exhausted_pool =
1625
v3dv_descriptor_pool_from_handle(cmd_buffer->meta.texel_buffer_copy.dspool);
1626
descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
1627
}
1628
1629
/* Create the descriptor pool */
1630
cmd_buffer->meta.texel_buffer_copy.dspool = VK_NULL_HANDLE;
1631
VkDescriptorPoolSize pool_size = {
1632
.type = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1633
.descriptorCount = descriptor_count,
1634
};
1635
VkDescriptorPoolCreateInfo info = {
1636
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
1637
.maxSets = descriptor_count,
1638
.poolSizeCount = 1,
1639
.pPoolSizes = &pool_size,
1640
.flags = 0,
1641
};
1642
VkResult result =
1643
v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
1644
&info,
1645
&cmd_buffer->device->vk.alloc,
1646
&cmd_buffer->meta.texel_buffer_copy.dspool);
1647
1648
if (result == VK_SUCCESS) {
1649
assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
1650
const VkDescriptorPool _pool = cmd_buffer->meta.texel_buffer_copy.dspool;
1651
1652
v3dv_cmd_buffer_add_private_obj(
1653
cmd_buffer, (uintptr_t) _pool,
1654
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
1655
1656
struct v3dv_descriptor_pool *pool =
1657
v3dv_descriptor_pool_from_handle(_pool);
1658
pool->is_driver_internal = true;
1659
}
1660
1661
return result;
1662
}
1663
1664
static VkResult
1665
allocate_texel_buffer_copy_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
1666
VkDescriptorSet *set)
1667
{
1668
/* Make sure we have a descriptor pool */
1669
VkResult result;
1670
if (cmd_buffer->meta.texel_buffer_copy.dspool == VK_NULL_HANDLE) {
1671
result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
1672
if (result != VK_SUCCESS)
1673
return result;
1674
}
1675
assert(cmd_buffer->meta.texel_buffer_copy.dspool != VK_NULL_HANDLE);
1676
1677
/* Allocate descriptor set */
1678
struct v3dv_device *device = cmd_buffer->device;
1679
VkDevice _device = v3dv_device_to_handle(device);
1680
VkDescriptorSetAllocateInfo info = {
1681
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
1682
.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool,
1683
.descriptorSetCount = 1,
1684
.pSetLayouts = &device->meta.texel_buffer_copy.ds_layout,
1685
};
1686
result = v3dv_AllocateDescriptorSets(_device, &info, set);
1687
1688
/* If we ran out of pool space, grow the pool and try again */
1689
if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
1690
result = create_texel_buffer_copy_descriptor_pool(cmd_buffer);
1691
if (result == VK_SUCCESS) {
1692
info.descriptorPool = cmd_buffer->meta.texel_buffer_copy.dspool;
1693
result = v3dv_AllocateDescriptorSets(_device, &info, set);
1694
}
1695
}
1696
1697
return result;
1698
}
1699
1700
static void
1701
get_texel_buffer_copy_pipeline_cache_key(VkFormat format,
1702
VkColorComponentFlags cmask,
1703
VkComponentMapping *cswizzle,
1704
bool is_layered,
1705
uint8_t *key)
1706
{
1707
memset(key, 0, V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1708
1709
uint32_t *p = (uint32_t *) key;
1710
1711
*p = format;
1712
p++;
1713
1714
*p = cmask;
1715
p++;
1716
1717
/* Note that that we are using a single byte for this, so we could pack
1718
* more data into this 32-bit slot in the future.
1719
*/
1720
*p = is_layered ? 1 : 0;
1721
p++;
1722
1723
memcpy(p, cswizzle, sizeof(VkComponentMapping));
1724
p += sizeof(VkComponentMapping) / sizeof(uint32_t);
1725
1726
assert(((uint8_t*)p - key) == V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE);
1727
}
1728
1729
static bool
1730
create_blit_render_pass(struct v3dv_device *device,
1731
VkFormat dst_format,
1732
VkFormat src_format,
1733
VkRenderPass *pass_load,
1734
VkRenderPass *pass_no_load);
1735
1736
static nir_ssa_def *gen_rect_vertices(nir_builder *b);
1737
1738
static bool
1739
create_pipeline(struct v3dv_device *device,
1740
struct v3dv_render_pass *pass,
1741
struct nir_shader *vs_nir,
1742
struct nir_shader *gs_nir,
1743
struct nir_shader *fs_nir,
1744
const VkPipelineVertexInputStateCreateInfo *vi_state,
1745
const VkPipelineDepthStencilStateCreateInfo *ds_state,
1746
const VkPipelineColorBlendStateCreateInfo *cb_state,
1747
const VkPipelineMultisampleStateCreateInfo *ms_state,
1748
const VkPipelineLayout layout,
1749
VkPipeline *pipeline);
1750
1751
static nir_shader *
1752
get_texel_buffer_copy_vs()
1753
{
1754
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1755
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
1756
"meta texel buffer copy vs");
1757
nir_variable *vs_out_pos =
1758
nir_variable_create(b.shader, nir_var_shader_out,
1759
glsl_vec4_type(), "gl_Position");
1760
vs_out_pos->data.location = VARYING_SLOT_POS;
1761
1762
nir_ssa_def *pos = gen_rect_vertices(&b);
1763
nir_store_var(&b, vs_out_pos, pos, 0xf);
1764
1765
return b.shader;
1766
}
1767
1768
static nir_shader *
1769
get_texel_buffer_copy_gs()
1770
{
1771
/* FIXME: this creates a geometry shader that takes the index of a single
1772
* layer to clear from push constants, so we need to emit a draw call for
1773
* each layer that we want to clear. We could actually do better and have it
1774
* take a range of layers however, if we were to do this, we would need to
1775
* be careful not to exceed the maximum number of output vertices allowed in
1776
* a geometry shader.
1777
*/
1778
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1779
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_GEOMETRY, options,
1780
"meta texel buffer copy gs");
1781
nir_shader *nir = b.shader;
1782
nir->info.inputs_read = 1ull << VARYING_SLOT_POS;
1783
nir->info.outputs_written = (1ull << VARYING_SLOT_POS) |
1784
(1ull << VARYING_SLOT_LAYER);
1785
nir->info.gs.input_primitive = GL_TRIANGLES;
1786
nir->info.gs.output_primitive = GL_TRIANGLE_STRIP;
1787
nir->info.gs.vertices_in = 3;
1788
nir->info.gs.vertices_out = 3;
1789
nir->info.gs.invocations = 1;
1790
nir->info.gs.active_stream_mask = 0x1;
1791
1792
/* in vec4 gl_Position[3] */
1793
nir_variable *gs_in_pos =
1794
nir_variable_create(b.shader, nir_var_shader_in,
1795
glsl_array_type(glsl_vec4_type(), 3, 0),
1796
"in_gl_Position");
1797
gs_in_pos->data.location = VARYING_SLOT_POS;
1798
1799
/* out vec4 gl_Position */
1800
nir_variable *gs_out_pos =
1801
nir_variable_create(b.shader, nir_var_shader_out, glsl_vec4_type(),
1802
"out_gl_Position");
1803
gs_out_pos->data.location = VARYING_SLOT_POS;
1804
1805
/* out float gl_Layer */
1806
nir_variable *gs_out_layer =
1807
nir_variable_create(b.shader, nir_var_shader_out, glsl_float_type(),
1808
"out_gl_Layer");
1809
gs_out_layer->data.location = VARYING_SLOT_LAYER;
1810
1811
/* Emit output triangle */
1812
for (uint32_t i = 0; i < 3; i++) {
1813
/* gl_Position from shader input */
1814
nir_deref_instr *in_pos_i =
1815
nir_build_deref_array_imm(&b, nir_build_deref_var(&b, gs_in_pos), i);
1816
nir_copy_deref(&b, nir_build_deref_var(&b, gs_out_pos), in_pos_i);
1817
1818
/* gl_Layer from push constants */
1819
nir_ssa_def *layer =
1820
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1821
.base = TEXEL_BUFFER_COPY_GS_LAYER_PC_OFFSET,
1822
.range = 4);
1823
nir_store_var(&b, gs_out_layer, layer, 0x1);
1824
1825
nir_emit_vertex(&b, 0);
1826
}
1827
1828
nir_end_primitive(&b, 0);
1829
1830
return nir;
1831
}
1832
1833
static nir_ssa_def *
1834
load_frag_coord(nir_builder *b)
1835
{
1836
nir_foreach_shader_in_variable(var, b->shader) {
1837
if (var->data.location == VARYING_SLOT_POS)
1838
return nir_load_var(b, var);
1839
}
1840
nir_variable *pos = nir_variable_create(b->shader, nir_var_shader_in,
1841
glsl_vec4_type(), NULL);
1842
pos->data.location = VARYING_SLOT_POS;
1843
return nir_load_var(b, pos);
1844
}
1845
1846
static uint32_t
1847
component_swizzle_to_nir_swizzle(VkComponentSwizzle comp, VkComponentSwizzle swz)
1848
{
1849
if (swz == VK_COMPONENT_SWIZZLE_IDENTITY)
1850
swz = comp;
1851
1852
switch (swz) {
1853
case VK_COMPONENT_SWIZZLE_R:
1854
return 0;
1855
case VK_COMPONENT_SWIZZLE_G:
1856
return 1;
1857
case VK_COMPONENT_SWIZZLE_B:
1858
return 2;
1859
case VK_COMPONENT_SWIZZLE_A:
1860
return 3;
1861
default:
1862
unreachable("Invalid swizzle");
1863
};
1864
}
1865
1866
static nir_shader *
1867
get_texel_buffer_copy_fs(struct v3dv_device *device, VkFormat format,
1868
VkComponentMapping *cswizzle)
1869
{
1870
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
1871
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
1872
"meta texel buffer copy fs");
1873
1874
/* We only use the copy from texel buffer shader to implement
1875
* copy_buffer_to_image_shader, which always selects a compatible integer
1876
* format for the copy.
1877
*/
1878
assert(vk_format_is_int(format));
1879
1880
/* Fragment shader output color */
1881
nir_variable *fs_out_color =
1882
nir_variable_create(b.shader, nir_var_shader_out,
1883
glsl_uvec4_type(), "out_color");
1884
fs_out_color->data.location = FRAG_RESULT_DATA0;
1885
1886
/* Texel buffer input */
1887
const struct glsl_type *sampler_type =
1888
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_UINT);
1889
nir_variable *sampler =
1890
nir_variable_create(b.shader, nir_var_uniform, sampler_type, "texel_buf");
1891
sampler->data.descriptor_set = 0;
1892
sampler->data.binding = 0;
1893
1894
/* Load the box describing the pixel region we want to copy from the
1895
* texel buffer.
1896
*/
1897
nir_ssa_def *box =
1898
nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0),
1899
.base = TEXEL_BUFFER_COPY_FS_BOX_PC_OFFSET,
1900
.range = 16);
1901
1902
/* Load the buffer stride (this comes in texel units) */
1903
nir_ssa_def *stride =
1904
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1905
.base = TEXEL_BUFFER_COPY_FS_STRIDE_PC_OFFSET,
1906
.range = 4);
1907
1908
/* Load the buffer offset (this comes in texel units) */
1909
nir_ssa_def *offset =
1910
nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 0),
1911
.base = TEXEL_BUFFER_COPY_FS_OFFSET_PC_OFFSET,
1912
.range = 4);
1913
1914
nir_ssa_def *coord = nir_f2i32(&b, load_frag_coord(&b));
1915
1916
/* Load pixel data from texel buffer based on the x,y offset of the pixel
1917
* within the box. Texel buffers are 1D arrays of texels.
1918
*
1919
* Notice that we already make sure that we only generate fragments that are
1920
* inside the box through the scissor/viewport state, so our offset into the
1921
* texel buffer should always be within its bounds and we we don't need
1922
* to add a check for that here.
1923
*/
1924
nir_ssa_def *x_offset =
1925
nir_isub(&b, nir_channel(&b, coord, 0),
1926
nir_channel(&b, box, 0));
1927
nir_ssa_def *y_offset =
1928
nir_isub(&b, nir_channel(&b, coord, 1),
1929
nir_channel(&b, box, 1));
1930
nir_ssa_def *texel_offset =
1931
nir_iadd(&b, nir_iadd(&b, offset, x_offset),
1932
nir_imul(&b, y_offset, stride));
1933
1934
nir_ssa_def *tex_deref = &nir_build_deref_var(&b, sampler)->dest.ssa;
1935
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 2);
1936
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
1937
tex->op = nir_texop_txf;
1938
tex->src[0].src_type = nir_tex_src_coord;
1939
tex->src[0].src = nir_src_for_ssa(texel_offset);
1940
tex->src[1].src_type = nir_tex_src_texture_deref;
1941
tex->src[1].src = nir_src_for_ssa(tex_deref);
1942
tex->dest_type = nir_type_uint32;
1943
tex->is_array = false;
1944
tex->coord_components = 1;
1945
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "texel buffer result");
1946
nir_builder_instr_insert(&b, &tex->instr);
1947
1948
uint32_t swiz[4];
1949
swiz[0] =
1950
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_R, cswizzle->r);
1951
swiz[1] =
1952
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_G, cswizzle->g);
1953
swiz[2] =
1954
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_B, cswizzle->b);
1955
swiz[3] =
1956
component_swizzle_to_nir_swizzle(VK_COMPONENT_SWIZZLE_A, cswizzle->a);
1957
nir_ssa_def *s = nir_swizzle(&b, &tex->dest.ssa, swiz, 4);
1958
nir_store_var(&b, fs_out_color, s, 0xf);
1959
1960
return b.shader;
1961
}
1962
1963
static bool
1964
create_texel_buffer_copy_pipeline(struct v3dv_device *device,
1965
VkFormat format,
1966
VkColorComponentFlags cmask,
1967
VkComponentMapping *cswizzle,
1968
bool is_layered,
1969
VkRenderPass _pass,
1970
VkPipelineLayout pipeline_layout,
1971
VkPipeline *pipeline)
1972
{
1973
struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
1974
1975
assert(vk_format_is_color(format));
1976
1977
nir_shader *vs_nir = get_texel_buffer_copy_vs();
1978
nir_shader *fs_nir = get_texel_buffer_copy_fs(device, format, cswizzle);
1979
nir_shader *gs_nir = is_layered ? get_texel_buffer_copy_gs() : NULL;
1980
1981
const VkPipelineVertexInputStateCreateInfo vi_state = {
1982
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
1983
.vertexBindingDescriptionCount = 0,
1984
.vertexAttributeDescriptionCount = 0,
1985
};
1986
1987
VkPipelineDepthStencilStateCreateInfo ds_state = {
1988
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
1989
};
1990
1991
VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
1992
blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
1993
.blendEnable = false,
1994
.colorWriteMask = cmask,
1995
};
1996
1997
const VkPipelineColorBlendStateCreateInfo cb_state = {
1998
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
1999
.logicOpEnable = false,
2000
.attachmentCount = 1,
2001
.pAttachments = blend_att_state
2002
};
2003
2004
const VkPipelineMultisampleStateCreateInfo ms_state = {
2005
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
2006
.rasterizationSamples = VK_SAMPLE_COUNT_1_BIT,
2007
.sampleShadingEnable = false,
2008
.pSampleMask = NULL,
2009
.alphaToCoverageEnable = false,
2010
.alphaToOneEnable = false,
2011
};
2012
2013
return create_pipeline(device,
2014
pass,
2015
vs_nir, gs_nir, fs_nir,
2016
&vi_state,
2017
&ds_state,
2018
&cb_state,
2019
&ms_state,
2020
pipeline_layout,
2021
pipeline);
2022
}
2023
2024
static bool
2025
get_copy_texel_buffer_pipeline(
2026
struct v3dv_device *device,
2027
VkFormat format,
2028
VkColorComponentFlags cmask,
2029
VkComponentMapping *cswizzle,
2030
VkImageType image_type,
2031
bool is_layered,
2032
struct v3dv_meta_texel_buffer_copy_pipeline **pipeline)
2033
{
2034
bool ok = true;
2035
2036
uint8_t key[V3DV_META_TEXEL_BUFFER_COPY_CACHE_KEY_SIZE];
2037
get_texel_buffer_copy_pipeline_cache_key(format, cmask, cswizzle, is_layered,
2038
key);
2039
2040
mtx_lock(&device->meta.mtx);
2041
struct hash_entry *entry =
2042
_mesa_hash_table_search(device->meta.texel_buffer_copy.cache[image_type],
2043
&key);
2044
if (entry) {
2045
mtx_unlock(&device->meta.mtx);
2046
*pipeline = entry->data;
2047
return true;
2048
}
2049
2050
*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
2051
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
2052
2053
if (*pipeline == NULL)
2054
goto fail;
2055
2056
/* The blit render pass is compatible */
2057
ok = create_blit_render_pass(device, format, format,
2058
&(*pipeline)->pass,
2059
&(*pipeline)->pass_no_load);
2060
if (!ok)
2061
goto fail;
2062
2063
ok =
2064
create_texel_buffer_copy_pipeline(device,
2065
format, cmask, cswizzle, is_layered,
2066
(*pipeline)->pass,
2067
device->meta.texel_buffer_copy.p_layout,
2068
&(*pipeline)->pipeline);
2069
if (!ok)
2070
goto fail;
2071
2072
_mesa_hash_table_insert(device->meta.texel_buffer_copy.cache[image_type],
2073
&key, *pipeline);
2074
2075
mtx_unlock(&device->meta.mtx);
2076
return true;
2077
2078
fail:
2079
mtx_unlock(&device->meta.mtx);
2080
2081
VkDevice _device = v3dv_device_to_handle(device);
2082
if (*pipeline) {
2083
if ((*pipeline)->pass)
2084
v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
2085
if ((*pipeline)->pipeline)
2086
v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
2087
vk_free(&device->vk.alloc, *pipeline);
2088
*pipeline = NULL;
2089
}
2090
2091
return false;
2092
}
2093
2094
static bool
2095
texel_buffer_shader_copy(struct v3dv_cmd_buffer *cmd_buffer,
2096
VkImageAspectFlags aspect,
2097
struct v3dv_image *image,
2098
VkFormat dst_format,
2099
VkFormat src_format,
2100
struct v3dv_buffer *buffer,
2101
uint32_t buffer_bpp,
2102
VkColorComponentFlags cmask,
2103
VkComponentMapping *cswizzle,
2104
uint32_t region_count,
2105
const VkBufferImageCopy2KHR *regions)
2106
{
2107
VkResult result;
2108
bool handled = false;
2109
2110
assert(cswizzle);
2111
2112
/* This is a copy path, so we don't handle format conversions. The only
2113
* exception are stencil to D24S8 copies, which are handled as a color
2114
* masked R8->RGBA8 copy.
2115
*/
2116
assert(src_format == dst_format ||
2117
(dst_format == VK_FORMAT_R8G8B8A8_UINT &&
2118
src_format == VK_FORMAT_R8_UINT &&
2119
cmask == VK_COLOR_COMPONENT_R_BIT));
2120
2121
/* We only handle color copies. Callers can copy D/S aspects by using
2122
* a compatible color format and maybe a cmask/cswizzle for D24 formats.
2123
*/
2124
if (aspect != VK_IMAGE_ASPECT_COLOR_BIT)
2125
return handled;
2126
2127
/* FIXME: we only handle uncompressed images for now. */
2128
if (vk_format_is_compressed(image->vk_format))
2129
return handled;
2130
2131
const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
2132
VK_COLOR_COMPONENT_G_BIT |
2133
VK_COLOR_COMPONENT_B_BIT |
2134
VK_COLOR_COMPONENT_A_BIT;
2135
if (cmask == 0)
2136
cmask = full_cmask;
2137
2138
/* The buffer needs to have VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT
2139
* so we can bind it as a texel buffer. Otherwise, the buffer view
2140
* we create below won't setup the texture state that we need for this.
2141
*/
2142
if (!(buffer->usage & VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT)) {
2143
if (v3dv_buffer_format_supports_features(
2144
cmd_buffer->device, src_format,
2145
VK_FORMAT_FEATURE_UNIFORM_TEXEL_BUFFER_BIT)) {
2146
buffer->usage |= VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT;
2147
} else {
2148
return handled;
2149
}
2150
}
2151
2152
/* At this point we should be able to handle the copy unless an unexpected
2153
* error occurs, such as an OOM.
2154
*/
2155
handled = true;
2156
2157
2158
/* Compute the number of layers to copy.
2159
*
2160
* If we are batching (region_count > 1) all our regions have the same
2161
* image subresource so we can take this from the first region. For 3D
2162
* images we require the same depth extent.
2163
*/
2164
const VkImageSubresourceLayers *resource = &regions[0].imageSubresource;
2165
uint32_t num_layers;
2166
if (image->type != VK_IMAGE_TYPE_3D) {
2167
num_layers = resource->layerCount;
2168
} else {
2169
assert(region_count == 1);
2170
num_layers = regions[0].imageExtent.depth;
2171
}
2172
assert(num_layers > 0);
2173
2174
/* Get the texel buffer copy pipeline */
2175
struct v3dv_meta_texel_buffer_copy_pipeline *pipeline = NULL;
2176
bool ok = get_copy_texel_buffer_pipeline(cmd_buffer->device,
2177
dst_format, cmask, cswizzle,
2178
image->type, num_layers > 1,
2179
&pipeline);
2180
if (!ok)
2181
return handled;
2182
assert(pipeline && pipeline->pipeline && pipeline->pass);
2183
2184
/* Setup descriptor set for the source texel buffer. We don't have to
2185
* register the descriptor as a private command buffer object since
2186
* all descriptors will be freed automatically with the descriptor
2187
* pool.
2188
*/
2189
VkDescriptorSet set;
2190
result = allocate_texel_buffer_copy_descriptor_set(cmd_buffer, &set);
2191
if (result != VK_SUCCESS)
2192
return handled;
2193
2194
/* FIXME: for some reason passing region->bufferOffset here for the
2195
* offset field doesn't work, making the following CTS tests fail:
2196
*
2197
* dEQP-VK.api.copy_and_blit.core.buffer_to_image.*buffer_offset*
2198
*
2199
* So instead we pass 0 here and we pass the offset in texels as a push
2200
* constant to the shader, which seems to work correctly.
2201
*/
2202
VkDevice _device = v3dv_device_to_handle(cmd_buffer->device);
2203
VkBufferViewCreateInfo buffer_view_info = {
2204
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
2205
.buffer = v3dv_buffer_to_handle(buffer),
2206
.format = src_format,
2207
.offset = 0,
2208
.range = VK_WHOLE_SIZE,
2209
};
2210
2211
VkBufferView texel_buffer_view;
2212
result = v3dv_CreateBufferView(_device, &buffer_view_info,
2213
&cmd_buffer->device->vk.alloc,
2214
&texel_buffer_view);
2215
if (result != VK_SUCCESS)
2216
return handled;
2217
2218
v3dv_cmd_buffer_add_private_obj(
2219
cmd_buffer, (uintptr_t)texel_buffer_view,
2220
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyBufferView);
2221
2222
VkWriteDescriptorSet write = {
2223
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
2224
.dstSet = set,
2225
.dstBinding = 0,
2226
.dstArrayElement = 0,
2227
.descriptorCount = 1,
2228
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
2229
.pTexelBufferView = &texel_buffer_view,
2230
};
2231
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
2232
2233
/* Push command buffer state before starting meta operation */
2234
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
2235
uint32_t dirty_dynamic_state = 0;
2236
2237
/* Bind common state for all layers and regions */
2238
VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
2239
v3dv_CmdBindPipeline(_cmd_buffer,
2240
VK_PIPELINE_BIND_POINT_GRAPHICS,
2241
pipeline->pipeline);
2242
2243
v3dv_CmdBindDescriptorSets(_cmd_buffer,
2244
VK_PIPELINE_BIND_POINT_GRAPHICS,
2245
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2246
0, 1, &set,
2247
0, NULL);
2248
2249
/* Setup framebuffer.
2250
*
2251
* For 3D images, this creates a layered framebuffer with a number of
2252
* layers matching the depth extent of the 3D image.
2253
*/
2254
uint32_t fb_width = u_minify(image->extent.width, resource->mipLevel);
2255
uint32_t fb_height = u_minify(image->extent.height, resource->mipLevel);
2256
VkImageViewCreateInfo image_view_info = {
2257
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
2258
.image = v3dv_image_to_handle(image),
2259
.viewType = v3dv_image_type_to_view_type(image->type),
2260
.format = dst_format,
2261
.subresourceRange = {
2262
.aspectMask = aspect,
2263
.baseMipLevel = resource->mipLevel,
2264
.levelCount = 1,
2265
.baseArrayLayer = resource->baseArrayLayer,
2266
.layerCount = num_layers,
2267
},
2268
};
2269
VkImageView image_view;
2270
result = v3dv_CreateImageView(_device, &image_view_info,
2271
&cmd_buffer->device->vk.alloc, &image_view);
2272
if (result != VK_SUCCESS)
2273
goto fail;
2274
2275
v3dv_cmd_buffer_add_private_obj(
2276
cmd_buffer, (uintptr_t)image_view,
2277
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
2278
2279
VkFramebufferCreateInfo fb_info = {
2280
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
2281
.renderPass = pipeline->pass,
2282
.attachmentCount = 1,
2283
.pAttachments = &image_view,
2284
.width = fb_width,
2285
.height = fb_height,
2286
.layers = num_layers,
2287
};
2288
2289
VkFramebuffer fb;
2290
result = v3dv_CreateFramebuffer(_device, &fb_info,
2291
&cmd_buffer->device->vk.alloc, &fb);
2292
if (result != VK_SUCCESS)
2293
goto fail;
2294
2295
v3dv_cmd_buffer_add_private_obj(
2296
cmd_buffer, (uintptr_t)fb,
2297
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
2298
2299
/* For each layer */
2300
for (uint32_t l = 0; l < num_layers; l++) {
2301
/* Start render pass for this layer.
2302
*
2303
* If the we only have one region to copy, then we might be able to
2304
* skip the TLB load if it is aligned to tile boundaries. All layers
2305
* copy the same area, so we only need to check this once.
2306
*/
2307
bool can_skip_tlb_load = false;
2308
VkRect2D render_area;
2309
if (region_count == 1) {
2310
render_area.offset.x = regions[0].imageOffset.x;
2311
render_area.offset.y = regions[0].imageOffset.y;
2312
render_area.extent.width = regions[0].imageExtent.width;
2313
render_area.extent.height = regions[0].imageExtent.height;
2314
2315
if (l == 0) {
2316
struct v3dv_render_pass *pipeline_pass =
2317
v3dv_render_pass_from_handle(pipeline->pass);
2318
can_skip_tlb_load =
2319
cmask == full_cmask &&
2320
v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
2321
v3dv_framebuffer_from_handle(fb),
2322
pipeline_pass, 0);
2323
}
2324
} else {
2325
render_area.offset.x = 0;
2326
render_area.offset.y = 0;
2327
render_area.extent.width = fb_width;
2328
render_area.extent.height = fb_height;
2329
}
2330
2331
VkRenderPassBeginInfo rp_info = {
2332
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
2333
.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
2334
pipeline->pass,
2335
.framebuffer = fb,
2336
.renderArea = render_area,
2337
.clearValueCount = 0,
2338
};
2339
2340
v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
2341
struct v3dv_job *job = cmd_buffer->state.job;
2342
if (!job)
2343
goto fail;
2344
2345
/* If we are using a layered copy we need to specify the layer for the
2346
* Geometry Shader.
2347
*/
2348
if (num_layers > 1) {
2349
uint32_t layer = resource->baseArrayLayer + l;
2350
v3dv_CmdPushConstants(_cmd_buffer,
2351
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2352
VK_SHADER_STAGE_GEOMETRY_BIT,
2353
24, 4, &layer);
2354
}
2355
2356
/* For each region */
2357
dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
2358
for (uint32_t r = 0; r < region_count; r++) {
2359
const VkBufferImageCopy2KHR *region = &regions[r];
2360
2361
/* Obtain the 2D buffer region spec */
2362
uint32_t buf_width, buf_height;
2363
if (region->bufferRowLength == 0)
2364
buf_width = region->imageExtent.width;
2365
else
2366
buf_width = region->bufferRowLength;
2367
2368
if (region->bufferImageHeight == 0)
2369
buf_height = region->imageExtent.height;
2370
else
2371
buf_height = region->bufferImageHeight;
2372
2373
const VkViewport viewport = {
2374
.x = region->imageOffset.x,
2375
.y = region->imageOffset.y,
2376
.width = region->imageExtent.width,
2377
.height = region->imageExtent.height,
2378
.minDepth = 0.0f,
2379
.maxDepth = 1.0f
2380
};
2381
v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
2382
const VkRect2D scissor = {
2383
.offset = { region->imageOffset.x, region->imageOffset.y },
2384
.extent = { region->imageExtent.width, region->imageExtent.height }
2385
};
2386
v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
2387
2388
const VkDeviceSize buf_offset =
2389
region->bufferOffset / buffer_bpp + l * buf_height * buf_width;
2390
uint32_t push_data[6] = {
2391
region->imageOffset.x,
2392
region->imageOffset.y,
2393
region->imageOffset.x + region->imageExtent.width - 1,
2394
region->imageOffset.y + region->imageExtent.height - 1,
2395
buf_width,
2396
buf_offset,
2397
};
2398
2399
v3dv_CmdPushConstants(_cmd_buffer,
2400
cmd_buffer->device->meta.texel_buffer_copy.p_layout,
2401
VK_SHADER_STAGE_FRAGMENT_BIT,
2402
0, sizeof(push_data), &push_data);
2403
2404
v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
2405
} /* For each region */
2406
2407
v3dv_CmdEndRenderPass(_cmd_buffer);
2408
} /* For each layer */
2409
2410
fail:
2411
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
2412
return handled;
2413
}
2414
2415
/**
2416
* Returns true if the implementation supports the requested operation (even if
2417
* it failed to process it, for example, due to an out-of-memory error).
2418
*/
2419
static bool
2420
copy_buffer_to_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
2421
VkImageAspectFlags aspect,
2422
struct v3dv_image *image,
2423
VkFormat dst_format,
2424
VkFormat src_format,
2425
struct v3dv_buffer *buffer,
2426
uint32_t buffer_bpp,
2427
VkColorComponentFlags cmask,
2428
VkComponentMapping *cswizzle,
2429
uint32_t region_count,
2430
const VkBufferImageCopy2KHR *regions)
2431
{
2432
/* Since we can't sample linear images we need to upload the linear
2433
* buffer to a tiled image that we can use as a blit source, which
2434
* is slow.
2435
*/
2436
perf_debug("Falling back to blit path for buffer to image copy.\n");
2437
2438
struct v3dv_device *device = cmd_buffer->device;
2439
VkDevice _device = v3dv_device_to_handle(device);
2440
bool handled = true;
2441
2442
/* Allocate memory for the tiled image. Since we copy layer by layer
2443
* we allocate memory to hold a full layer, which is the worse case.
2444
* For that we create a dummy image with that spec, get memory requirements
2445
* for it and use that information to create the memory allocation.
2446
* We will then reuse this memory store for all the regions we want to
2447
* copy.
2448
*/
2449
VkImage dummy_image;
2450
VkImageCreateInfo dummy_info = {
2451
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2452
.imageType = VK_IMAGE_TYPE_2D,
2453
.format = src_format,
2454
.extent = { image->extent.width, image->extent.height, 1 },
2455
.mipLevels = 1,
2456
.arrayLayers = 1,
2457
.samples = VK_SAMPLE_COUNT_1_BIT,
2458
.tiling = VK_IMAGE_TILING_OPTIMAL,
2459
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2460
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2461
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2462
.queueFamilyIndexCount = 0,
2463
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2464
};
2465
VkResult result =
2466
v3dv_CreateImage(_device, &dummy_info, &device->vk.alloc, &dummy_image);
2467
if (result != VK_SUCCESS)
2468
return handled;
2469
2470
VkMemoryRequirements reqs;
2471
vk_common_GetImageMemoryRequirements(_device, dummy_image, &reqs);
2472
v3dv_DestroyImage(_device, dummy_image, &device->vk.alloc);
2473
2474
VkDeviceMemory mem;
2475
VkMemoryAllocateInfo alloc_info = {
2476
.sType = VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO,
2477
.allocationSize = reqs.size,
2478
.memoryTypeIndex = 0,
2479
};
2480
result = v3dv_AllocateMemory(_device, &alloc_info, &device->vk.alloc, &mem);
2481
if (result != VK_SUCCESS)
2482
return handled;
2483
2484
v3dv_cmd_buffer_add_private_obj(
2485
cmd_buffer, (uintptr_t)mem,
2486
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_FreeMemory);
2487
2488
/* Obtain the layer count.
2489
*
2490
* If we are batching (region_count > 1) all our regions have the same
2491
* image subresource so we can take this from the first region.
2492
*/
2493
uint32_t num_layers;
2494
if (image->type != VK_IMAGE_TYPE_3D)
2495
num_layers = regions[0].imageSubresource.layerCount;
2496
else
2497
num_layers = regions[0].imageExtent.depth;
2498
assert(num_layers > 0);
2499
2500
/* Sanity check: we can only batch multiple regions together if they have
2501
* the same framebuffer (so the same layer).
2502
*/
2503
assert(num_layers == 1 || region_count == 1);
2504
2505
const uint32_t block_width = vk_format_get_blockwidth(image->vk_format);
2506
const uint32_t block_height = vk_format_get_blockheight(image->vk_format);
2507
2508
/* Copy regions by uploading each region to a temporary tiled image using
2509
* the memory we have just allocated as storage.
2510
*/
2511
for (uint32_t r = 0; r < region_count; r++) {
2512
const VkBufferImageCopy2KHR *region = &regions[r];
2513
2514
/* Obtain the 2D buffer region spec */
2515
uint32_t buf_width, buf_height;
2516
if (region->bufferRowLength == 0)
2517
buf_width = region->imageExtent.width;
2518
else
2519
buf_width = region->bufferRowLength;
2520
2521
if (region->bufferImageHeight == 0)
2522
buf_height = region->imageExtent.height;
2523
else
2524
buf_height = region->bufferImageHeight;
2525
2526
/* If the image is compressed, the bpp refers to blocks, not pixels */
2527
buf_width = buf_width / block_width;
2528
buf_height = buf_height / block_height;
2529
2530
for (uint32_t i = 0; i < num_layers; i++) {
2531
/* Create the tiled image */
2532
VkImageCreateInfo image_info = {
2533
.sType = VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,
2534
.imageType = VK_IMAGE_TYPE_2D,
2535
.format = src_format,
2536
.extent = { buf_width, buf_height, 1 },
2537
.mipLevels = 1,
2538
.arrayLayers = 1,
2539
.samples = VK_SAMPLE_COUNT_1_BIT,
2540
.tiling = VK_IMAGE_TILING_OPTIMAL,
2541
.usage = VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
2542
VK_IMAGE_USAGE_TRANSFER_DST_BIT,
2543
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
2544
.queueFamilyIndexCount = 0,
2545
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
2546
};
2547
2548
VkImage buffer_image;
2549
VkResult result =
2550
v3dv_CreateImage(_device, &image_info, &device->vk.alloc,
2551
&buffer_image);
2552
if (result != VK_SUCCESS)
2553
return handled;
2554
2555
v3dv_cmd_buffer_add_private_obj(
2556
cmd_buffer, (uintptr_t)buffer_image,
2557
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImage);
2558
2559
result = vk_common_BindImageMemory(_device, buffer_image, mem, 0);
2560
if (result != VK_SUCCESS)
2561
return handled;
2562
2563
/* Upload buffer contents for the selected layer */
2564
const VkDeviceSize buf_offset_bytes =
2565
region->bufferOffset + i * buf_height * buf_width * buffer_bpp;
2566
const VkBufferImageCopy2KHR buffer_image_copy = {
2567
.sType = VK_STRUCTURE_TYPE_BUFFER_IMAGE_COPY_2_KHR,
2568
.bufferOffset = buf_offset_bytes,
2569
.bufferRowLength = region->bufferRowLength / block_width,
2570
.bufferImageHeight = region->bufferImageHeight / block_height,
2571
.imageSubresource = {
2572
.aspectMask = aspect,
2573
.mipLevel = 0,
2574
.baseArrayLayer = 0,
2575
.layerCount = 1,
2576
},
2577
.imageOffset = { 0, 0, 0 },
2578
.imageExtent = { buf_width, buf_height, 1 }
2579
};
2580
handled =
2581
create_tiled_image_from_buffer(cmd_buffer,
2582
v3dv_image_from_handle(buffer_image),
2583
buffer, &buffer_image_copy);
2584
if (!handled) {
2585
/* This is unexpected, we should have setup the upload to be
2586
* conformant to a TFU or TLB copy.
2587
*/
2588
unreachable("Unable to copy buffer to image through TLB");
2589
return false;
2590
}
2591
2592
/* Blit-copy the requested image extent from the buffer image to the
2593
* destination image.
2594
*
2595
* Since we are copying, the blit must use the same format on the
2596
* destination and source images to avoid format conversions. The
2597
* only exception is copying stencil, which we upload to a R8UI source
2598
* image, but that we need to blit to a S8D24 destination (the only
2599
* stencil format we support).
2600
*/
2601
const VkImageBlit2KHR blit_region = {
2602
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
2603
.srcSubresource = {
2604
.aspectMask = aspect,
2605
.mipLevel = 0,
2606
.baseArrayLayer = 0,
2607
.layerCount = 1,
2608
},
2609
.srcOffsets = {
2610
{ 0, 0, 0 },
2611
{ region->imageExtent.width, region->imageExtent.height, 1 },
2612
},
2613
.dstSubresource = {
2614
.aspectMask = aspect,
2615
.mipLevel = region->imageSubresource.mipLevel,
2616
.baseArrayLayer = region->imageSubresource.baseArrayLayer + i,
2617
.layerCount = 1,
2618
},
2619
.dstOffsets = {
2620
{
2621
DIV_ROUND_UP(region->imageOffset.x, block_width),
2622
DIV_ROUND_UP(region->imageOffset.y, block_height),
2623
region->imageOffset.z + i,
2624
},
2625
{
2626
DIV_ROUND_UP(region->imageOffset.x + region->imageExtent.width,
2627
block_width),
2628
DIV_ROUND_UP(region->imageOffset.y + region->imageExtent.height,
2629
block_height),
2630
region->imageOffset.z + i + 1,
2631
},
2632
},
2633
};
2634
2635
handled = blit_shader(cmd_buffer,
2636
image, dst_format,
2637
v3dv_image_from_handle(buffer_image), src_format,
2638
cmask, cswizzle,
2639
&blit_region, VK_FILTER_NEAREST, true);
2640
if (!handled) {
2641
/* This is unexpected, we should have a supported blit spec */
2642
unreachable("Unable to blit buffer to destination image");
2643
return false;
2644
}
2645
}
2646
}
2647
2648
return handled;
2649
}
2650
2651
/**
2652
* Returns true if the implementation supports the requested operation (even if
2653
* it failed to process it, for example, due to an out-of-memory error).
2654
*/
2655
static bool
2656
copy_buffer_to_image_shader(struct v3dv_cmd_buffer *cmd_buffer,
2657
struct v3dv_image *image,
2658
struct v3dv_buffer *buffer,
2659
uint32_t region_count,
2660
const VkBufferImageCopy2KHR *regions,
2661
bool use_texel_buffer)
2662
{
2663
/* We can only call this with region_count > 1 if we can batch the regions
2664
* together, in which case they share the same image subresource, and so
2665
* the same aspect.
2666
*/
2667
VkImageAspectFlags aspect = regions[0].imageSubresource.aspectMask;
2668
2669
/* Generally, the bpp of the data in the buffer matches that of the
2670
* destination image. The exception is the case where we are uploading
2671
* stencil (8bpp) to a combined d24s8 image (32bpp).
2672
*/
2673
uint32_t buf_bpp = image->cpp;
2674
2675
/* We are about to upload the buffer data to an image so we can then
2676
* blit that to our destination region. Because we are going to implement
2677
* the copy as a blit, we want our blit source and destination formats to be
2678
* the same (to avoid any format conversions), so we choose a canonical
2679
* format that matches the destination image bpp.
2680
*/
2681
VkComponentMapping ident_swizzle = {
2682
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
2683
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
2684
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
2685
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
2686
};
2687
2688
VkComponentMapping cswizzle = ident_swizzle;
2689
VkColorComponentFlags cmask = 0; /* Write all components */
2690
VkFormat src_format;
2691
VkFormat dst_format;
2692
switch (buf_bpp) {
2693
case 16:
2694
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2695
src_format = VK_FORMAT_R32G32B32A32_UINT;
2696
dst_format = src_format;
2697
break;
2698
case 8:
2699
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2700
src_format = VK_FORMAT_R16G16B16A16_UINT;
2701
dst_format = src_format;
2702
break;
2703
case 4:
2704
switch (aspect) {
2705
case VK_IMAGE_ASPECT_COLOR_BIT:
2706
src_format = VK_FORMAT_R8G8B8A8_UINT;
2707
dst_format = src_format;
2708
break;
2709
case VK_IMAGE_ASPECT_DEPTH_BIT:
2710
assert(image->vk_format == VK_FORMAT_D32_SFLOAT ||
2711
image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
2712
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32);
2713
src_format = VK_FORMAT_R8G8B8A8_UINT;
2714
dst_format = src_format;
2715
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2716
2717
/* For D24 formats, the Vulkan spec states that the depth component
2718
* in the buffer is stored in the 24-LSB, but V3D wants it in the
2719
* 24-MSB.
2720
*/
2721
if (image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
2722
image->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) {
2723
cmask = VK_COLOR_COMPONENT_G_BIT |
2724
VK_COLOR_COMPONENT_B_BIT |
2725
VK_COLOR_COMPONENT_A_BIT;
2726
cswizzle.r = VK_COMPONENT_SWIZZLE_R;
2727
cswizzle.g = VK_COMPONENT_SWIZZLE_R;
2728
cswizzle.b = VK_COMPONENT_SWIZZLE_G;
2729
cswizzle.a = VK_COMPONENT_SWIZZLE_B;
2730
}
2731
break;
2732
case VK_IMAGE_ASPECT_STENCIL_BIT:
2733
/* Since we don't support separate stencil this is always a stencil
2734
* copy to a combined depth/stencil image. Because we don't support
2735
* separate stencil images, we interpret the buffer data as a
2736
* color R8UI image, and implement the blit as a compatible color
2737
* blit to an RGBA8UI destination masking out writes to components
2738
* GBA (which map to the D24 component of a S8D24 image).
2739
*/
2740
assert(image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT);
2741
buf_bpp = 1;
2742
src_format = VK_FORMAT_R8_UINT;
2743
dst_format = VK_FORMAT_R8G8B8A8_UINT;
2744
cmask = VK_COLOR_COMPONENT_R_BIT;
2745
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2746
break;
2747
default:
2748
unreachable("unsupported aspect");
2749
return false;
2750
};
2751
break;
2752
case 2:
2753
aspect = VK_IMAGE_ASPECT_COLOR_BIT;
2754
src_format = VK_FORMAT_R16_UINT;
2755
dst_format = src_format;
2756
break;
2757
case 1:
2758
assert(aspect == VK_IMAGE_ASPECT_COLOR_BIT);
2759
src_format = VK_FORMAT_R8_UINT;
2760
dst_format = src_format;
2761
break;
2762
default:
2763
unreachable("unsupported bit-size");
2764
return false;
2765
}
2766
2767
if (use_texel_buffer) {
2768
return texel_buffer_shader_copy(cmd_buffer, aspect, image,
2769
dst_format, src_format,
2770
buffer, buf_bpp,
2771
cmask, &cswizzle,
2772
region_count, regions);
2773
} else {
2774
return copy_buffer_to_image_blit(cmd_buffer, aspect, image,
2775
dst_format, src_format,
2776
buffer, buf_bpp,
2777
cmask, &cswizzle,
2778
region_count, regions);
2779
}
2780
}
2781
2782
/**
2783
* Returns true if the implementation supports the requested operation (even if
2784
* it failed to process it, for example, due to an out-of-memory error).
2785
*/
2786
static bool
2787
copy_buffer_to_image_cpu(struct v3dv_cmd_buffer *cmd_buffer,
2788
struct v3dv_image *image,
2789
struct v3dv_buffer *buffer,
2790
const VkBufferImageCopy2KHR *region)
2791
{
2792
/* FIXME */
2793
if (vk_format_is_depth_or_stencil(image->vk_format))
2794
return false;
2795
2796
if (vk_format_is_compressed(image->vk_format))
2797
return false;
2798
2799
if (image->tiling == VK_IMAGE_TILING_LINEAR)
2800
return false;
2801
2802
uint32_t buffer_width, buffer_height;
2803
if (region->bufferRowLength == 0)
2804
buffer_width = region->imageExtent.width;
2805
else
2806
buffer_width = region->bufferRowLength;
2807
2808
if (region->bufferImageHeight == 0)
2809
buffer_height = region->imageExtent.height;
2810
else
2811
buffer_height = region->bufferImageHeight;
2812
2813
uint32_t buffer_stride = buffer_width * image->cpp;
2814
uint32_t buffer_layer_stride = buffer_stride * buffer_height;
2815
2816
uint32_t num_layers;
2817
if (image->type != VK_IMAGE_TYPE_3D)
2818
num_layers = region->imageSubresource.layerCount;
2819
else
2820
num_layers = region->imageExtent.depth;
2821
assert(num_layers > 0);
2822
2823
struct v3dv_job *job =
2824
v3dv_cmd_buffer_create_cpu_job(cmd_buffer->device,
2825
V3DV_JOB_TYPE_CPU_COPY_BUFFER_TO_IMAGE,
2826
cmd_buffer, -1);
2827
if (!job)
2828
return true;
2829
2830
job->cpu.copy_buffer_to_image.image = image;
2831
job->cpu.copy_buffer_to_image.buffer = buffer;
2832
job->cpu.copy_buffer_to_image.buffer_stride = buffer_stride;
2833
job->cpu.copy_buffer_to_image.buffer_layer_stride = buffer_layer_stride;
2834
job->cpu.copy_buffer_to_image.buffer_offset = region->bufferOffset;
2835
job->cpu.copy_buffer_to_image.image_extent = region->imageExtent;
2836
job->cpu.copy_buffer_to_image.image_offset = region->imageOffset;
2837
job->cpu.copy_buffer_to_image.mip_level =
2838
region->imageSubresource.mipLevel;
2839
job->cpu.copy_buffer_to_image.base_layer =
2840
region->imageSubresource.baseArrayLayer;
2841
job->cpu.copy_buffer_to_image.layer_count = num_layers;
2842
2843
list_addtail(&job->list_link, &cmd_buffer->jobs);
2844
2845
return true;
2846
}
2847
2848
VKAPI_ATTR void VKAPI_CALL
2849
v3dv_CmdCopyBufferToImage2KHR(VkCommandBuffer commandBuffer,
2850
const VkCopyBufferToImageInfo2KHR *info)
2851
{
2852
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
2853
V3DV_FROM_HANDLE(v3dv_buffer, buffer, info->srcBuffer);
2854
V3DV_FROM_HANDLE(v3dv_image, image, info->dstImage);
2855
2856
assert(image->samples == VK_SAMPLE_COUNT_1_BIT);
2857
2858
uint32_t r = 0;
2859
while (r < info->regionCount) {
2860
/* The TFU and TLB paths can only copy one region at a time and the region
2861
* needs to start at the origin. We try these first for the common case
2862
* where we are copying full images, since they should be the fastest.
2863
*/
2864
uint32_t batch_size = 1;
2865
if (copy_buffer_to_image_tfu(cmd_buffer, image, buffer, &info->pRegions[r]))
2866
goto handled;
2867
2868
if (copy_buffer_to_image_tlb(cmd_buffer, image, buffer, &info->pRegions[r]))
2869
goto handled;
2870
2871
/* Otherwise, we are copying subrects, so we fallback to copying
2872
* via shader and texel buffers and we try to batch the regions
2873
* if possible. We can only batch copies if they have the same
2874
* framebuffer spec, which is mostly determined by the image
2875
* subresource of the region.
2876
*/
2877
const VkImageSubresourceLayers *rsc = &info->pRegions[r].imageSubresource;
2878
for (uint32_t s = r + 1; s < info->regionCount; s++) {
2879
const VkImageSubresourceLayers *rsc_s =
2880
&info->pRegions[s].imageSubresource;
2881
2882
if (memcmp(rsc, rsc_s, sizeof(VkImageSubresourceLayers)) != 0)
2883
break;
2884
2885
/* For 3D images we also need to check the depth extent */
2886
if (image->type == VK_IMAGE_TYPE_3D &&
2887
info->pRegions[s].imageExtent.depth !=
2888
info->pRegions[r].imageExtent.depth) {
2889
break;
2890
}
2891
2892
batch_size++;
2893
}
2894
2895
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
2896
batch_size, &info->pRegions[r], true)) {
2897
goto handled;
2898
}
2899
2900
/* If we still could not copy, fallback to slower paths.
2901
*
2902
* FIXME: we could try to batch these too, but since they are bound to be
2903
* slow it might not be worth it and we should instead put more effort
2904
* in handling more cases with the other paths.
2905
*/
2906
if (copy_buffer_to_image_cpu(cmd_buffer, image, buffer,
2907
&info->pRegions[r])) {
2908
batch_size = 1;
2909
goto handled;
2910
}
2911
2912
if (copy_buffer_to_image_shader(cmd_buffer, image, buffer,
2913
batch_size, &info->pRegions[r], false)) {
2914
goto handled;
2915
}
2916
2917
unreachable("Unsupported buffer to image copy.");
2918
2919
handled:
2920
r += batch_size;
2921
}
2922
}
2923
2924
static void
2925
compute_blit_3d_layers(const VkOffset3D *offsets,
2926
uint32_t *min_layer, uint32_t *max_layer,
2927
bool *mirror_z);
2928
2929
/**
2930
* Returns true if the implementation supports the requested operation (even if
2931
* it failed to process it, for example, due to an out-of-memory error).
2932
*
2933
* The TFU blit path doesn't handle scaling so the blit filter parameter can
2934
* be ignored.
2935
*/
2936
static bool
2937
blit_tfu(struct v3dv_cmd_buffer *cmd_buffer,
2938
struct v3dv_image *dst,
2939
struct v3dv_image *src,
2940
const VkImageBlit2KHR *region)
2941
{
2942
assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);
2943
assert(src->samples == VK_SAMPLE_COUNT_1_BIT);
2944
2945
/* Format must match */
2946
if (src->vk_format != dst->vk_format)
2947
return false;
2948
2949
/* Destination can't be raster format */
2950
if (dst->tiling == VK_IMAGE_TILING_LINEAR)
2951
return false;
2952
2953
/* Source region must start at (0,0) */
2954
if (region->srcOffsets[0].x != 0 || region->srcOffsets[0].y != 0)
2955
return false;
2956
2957
/* Destination image must be complete */
2958
if (region->dstOffsets[0].x != 0 || region->dstOffsets[0].y != 0)
2959
return false;
2960
2961
const uint32_t dst_mip_level = region->dstSubresource.mipLevel;
2962
const uint32_t dst_width = u_minify(dst->extent.width, dst_mip_level);
2963
const uint32_t dst_height = u_minify(dst->extent.height, dst_mip_level);
2964
if (region->dstOffsets[1].x < dst_width - 1||
2965
region->dstOffsets[1].y < dst_height - 1) {
2966
return false;
2967
}
2968
2969
/* No XY scaling */
2970
if (region->srcOffsets[1].x != region->dstOffsets[1].x ||
2971
region->srcOffsets[1].y != region->dstOffsets[1].y) {
2972
return false;
2973
}
2974
2975
/* If the format is D24S8 both aspects need to be copied, since the TFU
2976
* can't be programmed to copy only one aspect of the image.
2977
*/
2978
if (dst->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
2979
const VkImageAspectFlags ds_aspects = VK_IMAGE_ASPECT_DEPTH_BIT |
2980
VK_IMAGE_ASPECT_STENCIL_BIT;
2981
if (region->dstSubresource.aspectMask != ds_aspects)
2982
return false;
2983
}
2984
2985
/* Our TFU blits only handle exact copies (it requires same formats
2986
* on input and output, no scaling, etc), so there is no pixel format
2987
* conversions and we can rewrite the format to use one that is TFU
2988
* compatible based on its texel size.
2989
*/
2990
const struct v3dv_format *format =
2991
v3dv_get_compatible_tfu_format(cmd_buffer->device,
2992
dst->cpp, NULL);
2993
2994
/* Emit a TFU job for each layer to blit */
2995
assert(region->dstSubresource.layerCount ==
2996
region->srcSubresource.layerCount);
2997
2998
uint32_t min_dst_layer;
2999
uint32_t max_dst_layer;
3000
bool dst_mirror_z = false;
3001
if (dst->type == VK_IMAGE_TYPE_3D) {
3002
compute_blit_3d_layers(region->dstOffsets,
3003
&min_dst_layer, &max_dst_layer,
3004
&dst_mirror_z);
3005
} else {
3006
min_dst_layer = region->dstSubresource.baseArrayLayer;
3007
max_dst_layer = min_dst_layer + region->dstSubresource.layerCount;
3008
}
3009
3010
uint32_t min_src_layer;
3011
uint32_t max_src_layer;
3012
bool src_mirror_z = false;
3013
if (src->type == VK_IMAGE_TYPE_3D) {
3014
compute_blit_3d_layers(region->srcOffsets,
3015
&min_src_layer, &max_src_layer,
3016
&src_mirror_z);
3017
} else {
3018
min_src_layer = region->srcSubresource.baseArrayLayer;
3019
max_src_layer = min_src_layer + region->srcSubresource.layerCount;
3020
}
3021
3022
/* No Z scaling for 3D images (for non-3D images both src and dst must
3023
* have the same layerCount).
3024
*/
3025
if (max_dst_layer - min_dst_layer != max_src_layer - min_src_layer)
3026
return false;
3027
3028
const uint32_t layer_count = max_dst_layer - min_dst_layer;
3029
const uint32_t src_mip_level = region->srcSubresource.mipLevel;
3030
for (uint32_t i = 0; i < layer_count; i++) {
3031
/* Since the TFU path doesn't handle scaling, Z mirroring for 3D images
3032
* only involves reversing the order of the slices.
3033
*/
3034
const uint32_t dst_layer =
3035
dst_mirror_z ? max_dst_layer - i - 1: min_dst_layer + i;
3036
const uint32_t src_layer =
3037
src_mirror_z ? max_src_layer - i - 1: min_src_layer + i;
3038
v3dv_X(cmd_buffer->device, cmd_buffer_emit_tfu_job)
3039
(cmd_buffer, dst, dst_mip_level, dst_layer,
3040
src, src_mip_level, src_layer,
3041
dst_width, dst_height, format);
3042
}
3043
3044
return true;
3045
}
3046
3047
static bool
3048
format_needs_software_int_clamp(VkFormat format)
3049
{
3050
switch (format) {
3051
case VK_FORMAT_A2R10G10B10_UINT_PACK32:
3052
case VK_FORMAT_A2R10G10B10_SINT_PACK32:
3053
case VK_FORMAT_A2B10G10R10_UINT_PACK32:
3054
case VK_FORMAT_A2B10G10R10_SINT_PACK32:
3055
return true;
3056
default:
3057
return false;
3058
};
3059
}
3060
3061
static void
3062
get_blit_pipeline_cache_key(VkFormat dst_format,
3063
VkFormat src_format,
3064
VkColorComponentFlags cmask,
3065
VkSampleCountFlagBits dst_samples,
3066
VkSampleCountFlagBits src_samples,
3067
uint8_t *key)
3068
{
3069
memset(key, 0, V3DV_META_BLIT_CACHE_KEY_SIZE);
3070
3071
uint32_t *p = (uint32_t *) key;
3072
3073
*p = dst_format;
3074
p++;
3075
3076
/* Generally, when blitting from a larger format to a smaller format
3077
* the hardware takes care of clamping the source to the RT range.
3078
* Specifically, for integer formats, this is done by using
3079
* V3D_RENDER_TARGET_CLAMP_INT in the render target setup, however, this
3080
* clamps to the bit-size of the render type, and some formats, such as
3081
* rgb10a2_uint have a 16-bit type, so it won't do what we need and we
3082
* require to clamp in software. In these cases, we need to amend the blit
3083
* shader with clamp code that depends on both the src and dst formats, so
3084
* we need the src format to be part of the key.
3085
*/
3086
*p = format_needs_software_int_clamp(dst_format) ? src_format : 0;
3087
p++;
3088
3089
*p = cmask;
3090
p++;
3091
3092
*p = (dst_samples << 8) | src_samples;
3093
p++;
3094
3095
assert(((uint8_t*)p - key) == V3DV_META_BLIT_CACHE_KEY_SIZE);
3096
}
3097
3098
static bool
3099
create_blit_render_pass(struct v3dv_device *device,
3100
VkFormat dst_format,
3101
VkFormat src_format,
3102
VkRenderPass *pass_load,
3103
VkRenderPass *pass_no_load)
3104
{
3105
const bool is_color_blit = vk_format_is_color(dst_format);
3106
3107
/* Attachment load operation is specified below */
3108
VkAttachmentDescription att = {
3109
.format = dst_format,
3110
.samples = VK_SAMPLE_COUNT_1_BIT,
3111
.storeOp = VK_ATTACHMENT_STORE_OP_STORE,
3112
.initialLayout = VK_IMAGE_LAYOUT_GENERAL,
3113
.finalLayout = VK_IMAGE_LAYOUT_GENERAL,
3114
};
3115
3116
VkAttachmentReference att_ref = {
3117
.attachment = 0,
3118
.layout = VK_IMAGE_LAYOUT_GENERAL,
3119
};
3120
3121
VkSubpassDescription subpass = {
3122
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
3123
.inputAttachmentCount = 0,
3124
.colorAttachmentCount = is_color_blit ? 1 : 0,
3125
.pColorAttachments = is_color_blit ? &att_ref : NULL,
3126
.pResolveAttachments = NULL,
3127
.pDepthStencilAttachment = is_color_blit ? NULL : &att_ref,
3128
.preserveAttachmentCount = 0,
3129
.pPreserveAttachments = NULL,
3130
};
3131
3132
VkRenderPassCreateInfo info = {
3133
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO,
3134
.attachmentCount = 1,
3135
.pAttachments = &att,
3136
.subpassCount = 1,
3137
.pSubpasses = &subpass,
3138
.dependencyCount = 0,
3139
.pDependencies = NULL,
3140
};
3141
3142
VkResult result;
3143
att.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
3144
result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
3145
&info, &device->vk.alloc, pass_load);
3146
if (result != VK_SUCCESS)
3147
return false;
3148
3149
att.loadOp = VK_ATTACHMENT_LOAD_OP_DONT_CARE;
3150
result = v3dv_CreateRenderPass(v3dv_device_to_handle(device),
3151
&info, &device->vk.alloc, pass_no_load);
3152
return result == VK_SUCCESS;
3153
}
3154
3155
static nir_ssa_def *
3156
gen_rect_vertices(nir_builder *b)
3157
{
3158
nir_ssa_def *vertex_id = nir_load_vertex_id(b);
3159
3160
/* vertex 0: -1.0, -1.0
3161
* vertex 1: -1.0, 1.0
3162
* vertex 2: 1.0, -1.0
3163
* vertex 3: 1.0, 1.0
3164
*
3165
* so:
3166
*
3167
* channel 0 is vertex_id < 2 ? -1.0 : 1.0
3168
* channel 1 is vertex id & 1 ? 1.0 : -1.0
3169
*/
3170
3171
nir_ssa_def *one = nir_imm_int(b, 1);
3172
nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
3173
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3174
3175
nir_ssa_def *comp[4];
3176
comp[0] = nir_bcsel(b, c0cmp,
3177
nir_imm_float(b, -1.0f),
3178
nir_imm_float(b, 1.0f));
3179
3180
comp[1] = nir_bcsel(b, c1cmp,
3181
nir_imm_float(b, 1.0f),
3182
nir_imm_float(b, -1.0f));
3183
comp[2] = nir_imm_float(b, 0.0f);
3184
comp[3] = nir_imm_float(b, 1.0f);
3185
return nir_vec(b, comp, 4);
3186
}
3187
3188
static nir_ssa_def *
3189
gen_tex_coords(nir_builder *b)
3190
{
3191
nir_ssa_def *tex_box =
3192
nir_load_push_constant(b, 4, 32, nir_imm_int(b, 0), .base = 0, .range = 16);
3193
3194
nir_ssa_def *tex_z =
3195
nir_load_push_constant(b, 1, 32, nir_imm_int(b, 0), .base = 16, .range = 4);
3196
3197
nir_ssa_def *vertex_id = nir_load_vertex_id(b);
3198
3199
/* vertex 0: src0_x, src0_y
3200
* vertex 1: src0_x, src1_y
3201
* vertex 2: src1_x, src0_y
3202
* vertex 3: src1_x, src1_y
3203
*
3204
* So:
3205
*
3206
* channel 0 is vertex_id < 2 ? src0_x : src1_x
3207
* channel 1 is vertex id & 1 ? src1_y : src0_y
3208
*/
3209
3210
nir_ssa_def *one = nir_imm_int(b, 1);
3211
nir_ssa_def *c0cmp = nir_ilt(b, vertex_id, nir_imm_int(b, 2));
3212
nir_ssa_def *c1cmp = nir_ieq(b, nir_iand(b, vertex_id, one), one);
3213
3214
nir_ssa_def *comp[4];
3215
comp[0] = nir_bcsel(b, c0cmp,
3216
nir_channel(b, tex_box, 0),
3217
nir_channel(b, tex_box, 2));
3218
3219
comp[1] = nir_bcsel(b, c1cmp,
3220
nir_channel(b, tex_box, 3),
3221
nir_channel(b, tex_box, 1));
3222
comp[2] = tex_z;
3223
comp[3] = nir_imm_float(b, 1.0f);
3224
return nir_vec(b, comp, 4);
3225
}
3226
3227
static nir_ssa_def *
3228
build_nir_tex_op_read(struct nir_builder *b,
3229
nir_ssa_def *tex_pos,
3230
enum glsl_base_type tex_type,
3231
enum glsl_sampler_dim dim)
3232
{
3233
assert(dim != GLSL_SAMPLER_DIM_MS);
3234
3235
const struct glsl_type *sampler_type =
3236
glsl_sampler_type(dim, false, false, tex_type);
3237
nir_variable *sampler =
3238
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3239
sampler->data.descriptor_set = 0;
3240
sampler->data.binding = 0;
3241
3242
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3243
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 3);
3244
tex->sampler_dim = dim;
3245
tex->op = nir_texop_tex;
3246
tex->src[0].src_type = nir_tex_src_coord;
3247
tex->src[0].src = nir_src_for_ssa(tex_pos);
3248
tex->src[1].src_type = nir_tex_src_texture_deref;
3249
tex->src[1].src = nir_src_for_ssa(tex_deref);
3250
tex->src[2].src_type = nir_tex_src_sampler_deref;
3251
tex->src[2].src = nir_src_for_ssa(tex_deref);
3252
tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3253
tex->is_array = glsl_sampler_type_is_array(sampler_type);
3254
tex->coord_components = tex_pos->num_components;
3255
3256
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3257
nir_builder_instr_insert(b, &tex->instr);
3258
return &tex->dest.ssa;
3259
}
3260
3261
static nir_ssa_def *
3262
build_nir_tex_op_ms_fetch_sample(struct nir_builder *b,
3263
nir_variable *sampler,
3264
nir_ssa_def *tex_deref,
3265
enum glsl_base_type tex_type,
3266
nir_ssa_def *tex_pos,
3267
nir_ssa_def *sample_idx)
3268
{
3269
nir_tex_instr *tex = nir_tex_instr_create(b->shader, 4);
3270
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
3271
tex->op = nir_texop_txf_ms;
3272
tex->src[0].src_type = nir_tex_src_coord;
3273
tex->src[0].src = nir_src_for_ssa(tex_pos);
3274
tex->src[1].src_type = nir_tex_src_texture_deref;
3275
tex->src[1].src = nir_src_for_ssa(tex_deref);
3276
tex->src[2].src_type = nir_tex_src_sampler_deref;
3277
tex->src[2].src = nir_src_for_ssa(tex_deref);
3278
tex->src[3].src_type = nir_tex_src_ms_index;
3279
tex->src[3].src = nir_src_for_ssa(sample_idx);
3280
tex->dest_type = nir_get_nir_type_for_glsl_base_type(tex_type);
3281
tex->is_array = false;
3282
tex->coord_components = tex_pos->num_components;
3283
3284
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
3285
nir_builder_instr_insert(b, &tex->instr);
3286
return &tex->dest.ssa;
3287
}
3288
3289
/* Fetches all samples at the given position and averages them */
3290
static nir_ssa_def *
3291
build_nir_tex_op_ms_resolve(struct nir_builder *b,
3292
nir_ssa_def *tex_pos,
3293
enum glsl_base_type tex_type,
3294
VkSampleCountFlagBits src_samples)
3295
{
3296
assert(src_samples > VK_SAMPLE_COUNT_1_BIT);
3297
const struct glsl_type *sampler_type =
3298
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3299
nir_variable *sampler =
3300
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3301
sampler->data.descriptor_set = 0;
3302
sampler->data.binding = 0;
3303
3304
const bool is_int = glsl_base_type_is_integer(tex_type);
3305
3306
nir_ssa_def *tmp = NULL;
3307
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3308
for (uint32_t i = 0; i < src_samples; i++) {
3309
nir_ssa_def *s =
3310
build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3311
tex_type, tex_pos,
3312
nir_imm_int(b, i));
3313
3314
/* For integer formats, the multisample resolve operation is expected to
3315
* return one of the samples, we just return the first one.
3316
*/
3317
if (is_int)
3318
return s;
3319
3320
tmp = i == 0 ? s : nir_fadd(b, tmp, s);
3321
}
3322
3323
assert(!is_int);
3324
return nir_fmul(b, tmp, nir_imm_float(b, 1.0f / src_samples));
3325
}
3326
3327
/* Fetches the current sample (gl_SampleID) at the given position */
3328
static nir_ssa_def *
3329
build_nir_tex_op_ms_read(struct nir_builder *b,
3330
nir_ssa_def *tex_pos,
3331
enum glsl_base_type tex_type)
3332
{
3333
const struct glsl_type *sampler_type =
3334
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, tex_type);
3335
nir_variable *sampler =
3336
nir_variable_create(b->shader, nir_var_uniform, sampler_type, "s_tex");
3337
sampler->data.descriptor_set = 0;
3338
sampler->data.binding = 0;
3339
3340
nir_ssa_def *tex_deref = &nir_build_deref_var(b, sampler)->dest.ssa;
3341
3342
return build_nir_tex_op_ms_fetch_sample(b, sampler, tex_deref,
3343
tex_type, tex_pos,
3344
nir_load_sample_id(b));
3345
}
3346
3347
static nir_ssa_def *
3348
build_nir_tex_op(struct nir_builder *b,
3349
struct v3dv_device *device,
3350
nir_ssa_def *tex_pos,
3351
enum glsl_base_type tex_type,
3352
VkSampleCountFlagBits dst_samples,
3353
VkSampleCountFlagBits src_samples,
3354
enum glsl_sampler_dim dim)
3355
{
3356
switch (dim) {
3357
case GLSL_SAMPLER_DIM_MS:
3358
assert(src_samples == VK_SAMPLE_COUNT_4_BIT);
3359
/* For multisampled texture sources we need to use fetching instead of
3360
* normalized texture coordinates. We already configured our blit
3361
* coordinates to be in texel units, but here we still need to convert
3362
* them from floating point to integer.
3363
*/
3364
tex_pos = nir_f2i32(b, tex_pos);
3365
3366
if (dst_samples == VK_SAMPLE_COUNT_1_BIT)
3367
return build_nir_tex_op_ms_resolve(b, tex_pos, tex_type, src_samples);
3368
else
3369
return build_nir_tex_op_ms_read(b, tex_pos, tex_type);
3370
default:
3371
assert(src_samples == VK_SAMPLE_COUNT_1_BIT);
3372
return build_nir_tex_op_read(b, tex_pos, tex_type, dim);
3373
}
3374
}
3375
3376
static nir_shader *
3377
get_blit_vs()
3378
{
3379
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3380
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_VERTEX, options,
3381
"meta blit vs");
3382
3383
const struct glsl_type *vec4 = glsl_vec4_type();
3384
3385
nir_variable *vs_out_pos =
3386
nir_variable_create(b.shader, nir_var_shader_out, vec4, "gl_Position");
3387
vs_out_pos->data.location = VARYING_SLOT_POS;
3388
3389
nir_variable *vs_out_tex_coord =
3390
nir_variable_create(b.shader, nir_var_shader_out, vec4, "out_tex_coord");
3391
vs_out_tex_coord->data.location = VARYING_SLOT_VAR0;
3392
vs_out_tex_coord->data.interpolation = INTERP_MODE_SMOOTH;
3393
3394
nir_ssa_def *pos = gen_rect_vertices(&b);
3395
nir_store_var(&b, vs_out_pos, pos, 0xf);
3396
3397
nir_ssa_def *tex_coord = gen_tex_coords(&b);
3398
nir_store_var(&b, vs_out_tex_coord, tex_coord, 0xf);
3399
3400
return b.shader;
3401
}
3402
3403
static uint32_t
3404
get_channel_mask_for_sampler_dim(enum glsl_sampler_dim sampler_dim)
3405
{
3406
switch (sampler_dim) {
3407
case GLSL_SAMPLER_DIM_1D: return 0x1;
3408
case GLSL_SAMPLER_DIM_2D: return 0x3;
3409
case GLSL_SAMPLER_DIM_MS: return 0x3;
3410
case GLSL_SAMPLER_DIM_3D: return 0x7;
3411
default:
3412
unreachable("invalid sampler dim");
3413
};
3414
}
3415
3416
static nir_shader *
3417
get_color_blit_fs(struct v3dv_device *device,
3418
VkFormat dst_format,
3419
VkFormat src_format,
3420
VkSampleCountFlagBits dst_samples,
3421
VkSampleCountFlagBits src_samples,
3422
enum glsl_sampler_dim sampler_dim)
3423
{
3424
const nir_shader_compiler_options *options = v3dv_pipeline_get_nir_options();
3425
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_FRAGMENT, options,
3426
"meta blit fs");
3427
3428
const struct glsl_type *vec4 = glsl_vec4_type();
3429
3430
nir_variable *fs_in_tex_coord =
3431
nir_variable_create(b.shader, nir_var_shader_in, vec4, "in_tex_coord");
3432
fs_in_tex_coord->data.location = VARYING_SLOT_VAR0;
3433
3434
const struct glsl_type *fs_out_type =
3435
vk_format_is_sint(dst_format) ? glsl_ivec4_type() :
3436
vk_format_is_uint(dst_format) ? glsl_uvec4_type() :
3437
glsl_vec4_type();
3438
3439
enum glsl_base_type src_base_type =
3440
vk_format_is_sint(src_format) ? GLSL_TYPE_INT :
3441
vk_format_is_uint(src_format) ? GLSL_TYPE_UINT :
3442
GLSL_TYPE_FLOAT;
3443
3444
nir_variable *fs_out_color =
3445
nir_variable_create(b.shader, nir_var_shader_out, fs_out_type, "out_color");
3446
fs_out_color->data.location = FRAG_RESULT_DATA0;
3447
3448
nir_ssa_def *tex_coord = nir_load_var(&b, fs_in_tex_coord);
3449
const uint32_t channel_mask = get_channel_mask_for_sampler_dim(sampler_dim);
3450
tex_coord = nir_channels(&b, tex_coord, channel_mask);
3451
3452
nir_ssa_def *color = build_nir_tex_op(&b, device, tex_coord, src_base_type,
3453
dst_samples, src_samples, sampler_dim);
3454
3455
/* For integer textures, if the bit-size of the destination is too small to
3456
* hold source value, Vulkan (CTS) expects the implementation to clamp to the
3457
* maximum value the destination can hold. The hardware can clamp to the
3458
* render target type, which usually matches the component bit-size, but
3459
* there are some cases that won't match, such as rgb10a2, which has a 16-bit
3460
* render target type, so in these cases we need to clamp manually.
3461
*/
3462
if (format_needs_software_int_clamp(dst_format)) {
3463
assert(vk_format_is_int(dst_format));
3464
enum pipe_format src_pformat = vk_format_to_pipe_format(src_format);
3465
enum pipe_format dst_pformat = vk_format_to_pipe_format(dst_format);
3466
3467
nir_ssa_def *c[4];
3468
for (uint32_t i = 0; i < 4; i++) {
3469
c[i] = nir_channel(&b, color, i);
3470
3471
const uint32_t src_bit_size =
3472
util_format_get_component_bits(src_pformat,
3473
UTIL_FORMAT_COLORSPACE_RGB,
3474
i);
3475
const uint32_t dst_bit_size =
3476
util_format_get_component_bits(dst_pformat,
3477
UTIL_FORMAT_COLORSPACE_RGB,
3478
i);
3479
3480
if (dst_bit_size >= src_bit_size)
3481
continue;
3482
3483
assert(dst_bit_size > 0);
3484
if (util_format_is_pure_uint(dst_pformat)) {
3485
nir_ssa_def *max = nir_imm_int(&b, (1 << dst_bit_size) - 1);
3486
c[i] = nir_umin(&b, c[i], max);
3487
} else {
3488
nir_ssa_def *max = nir_imm_int(&b, (1 << (dst_bit_size - 1)) - 1);
3489
nir_ssa_def *min = nir_imm_int(&b, -(1 << (dst_bit_size - 1)));
3490
c[i] = nir_imax(&b, nir_imin(&b, c[i], max), min);
3491
}
3492
}
3493
3494
color = nir_vec4(&b, c[0], c[1], c[2], c[3]);
3495
}
3496
3497
nir_store_var(&b, fs_out_color, color, 0xf);
3498
3499
return b.shader;
3500
}
3501
3502
static bool
3503
create_pipeline(struct v3dv_device *device,
3504
struct v3dv_render_pass *pass,
3505
struct nir_shader *vs_nir,
3506
struct nir_shader *gs_nir,
3507
struct nir_shader *fs_nir,
3508
const VkPipelineVertexInputStateCreateInfo *vi_state,
3509
const VkPipelineDepthStencilStateCreateInfo *ds_state,
3510
const VkPipelineColorBlendStateCreateInfo *cb_state,
3511
const VkPipelineMultisampleStateCreateInfo *ms_state,
3512
const VkPipelineLayout layout,
3513
VkPipeline *pipeline)
3514
{
3515
struct vk_shader_module vs_m;
3516
struct vk_shader_module gs_m;
3517
struct vk_shader_module fs_m;
3518
3519
uint32_t num_stages = gs_nir ? 3 : 2;
3520
3521
v3dv_shader_module_internal_init(device, &vs_m, vs_nir);
3522
v3dv_shader_module_internal_init(device, &fs_m, fs_nir);
3523
3524
VkPipelineShaderStageCreateInfo stages[3] = {
3525
{
3526
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3527
.stage = VK_SHADER_STAGE_VERTEX_BIT,
3528
.module = vk_shader_module_to_handle(&vs_m),
3529
.pName = "main",
3530
},
3531
{
3532
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3533
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
3534
.module = vk_shader_module_to_handle(&fs_m),
3535
.pName = "main",
3536
},
3537
{
3538
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
3539
.stage = VK_SHADER_STAGE_GEOMETRY_BIT,
3540
.module = VK_NULL_HANDLE,
3541
.pName = "main",
3542
},
3543
};
3544
3545
if (gs_nir) {
3546
v3dv_shader_module_internal_init(device, &gs_m, gs_nir);
3547
stages[2].module = vk_shader_module_to_handle(&gs_m);
3548
}
3549
3550
VkGraphicsPipelineCreateInfo info = {
3551
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
3552
3553
.stageCount = num_stages,
3554
.pStages = stages,
3555
3556
.pVertexInputState = vi_state,
3557
3558
.pInputAssemblyState = &(VkPipelineInputAssemblyStateCreateInfo) {
3559
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
3560
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
3561
.primitiveRestartEnable = false,
3562
},
3563
3564
.pViewportState = &(VkPipelineViewportStateCreateInfo) {
3565
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
3566
.viewportCount = 1,
3567
.scissorCount = 1,
3568
},
3569
3570
.pRasterizationState = &(VkPipelineRasterizationStateCreateInfo) {
3571
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
3572
.rasterizerDiscardEnable = false,
3573
.polygonMode = VK_POLYGON_MODE_FILL,
3574
.cullMode = VK_CULL_MODE_NONE,
3575
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
3576
.depthBiasEnable = false,
3577
},
3578
3579
.pMultisampleState = ms_state,
3580
3581
.pDepthStencilState = ds_state,
3582
3583
.pColorBlendState = cb_state,
3584
3585
/* The meta clear pipeline declares all state as dynamic.
3586
* As a consequence, vkCmdBindPipeline writes no dynamic state
3587
* to the cmd buffer. Therefore, at the end of the meta clear,
3588
* we need only restore dynamic state that was vkCmdSet.
3589
*/
3590
.pDynamicState = &(VkPipelineDynamicStateCreateInfo) {
3591
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
3592
.dynamicStateCount = 6,
3593
.pDynamicStates = (VkDynamicState[]) {
3594
VK_DYNAMIC_STATE_VIEWPORT,
3595
VK_DYNAMIC_STATE_SCISSOR,
3596
VK_DYNAMIC_STATE_STENCIL_COMPARE_MASK,
3597
VK_DYNAMIC_STATE_STENCIL_WRITE_MASK,
3598
VK_DYNAMIC_STATE_STENCIL_REFERENCE,
3599
VK_DYNAMIC_STATE_BLEND_CONSTANTS,
3600
VK_DYNAMIC_STATE_DEPTH_BIAS,
3601
VK_DYNAMIC_STATE_LINE_WIDTH,
3602
},
3603
},
3604
3605
.flags = 0,
3606
.layout = layout,
3607
.renderPass = v3dv_render_pass_to_handle(pass),
3608
.subpass = 0,
3609
};
3610
3611
VkResult result =
3612
v3dv_CreateGraphicsPipelines(v3dv_device_to_handle(device),
3613
VK_NULL_HANDLE,
3614
1, &info,
3615
&device->vk.alloc,
3616
pipeline);
3617
3618
ralloc_free(vs_nir);
3619
ralloc_free(fs_nir);
3620
3621
return result == VK_SUCCESS;
3622
}
3623
3624
static enum glsl_sampler_dim
3625
get_sampler_dim(VkImageType type, VkSampleCountFlagBits src_samples)
3626
{
3627
/* From the Vulkan 1.0 spec, VkImageCreateInfo Validu Usage:
3628
*
3629
* "If samples is not VK_SAMPLE_COUNT_1_BIT, then imageType must be
3630
* VK_IMAGE_TYPE_2D, ..."
3631
*/
3632
assert(src_samples == VK_SAMPLE_COUNT_1_BIT || type == VK_IMAGE_TYPE_2D);
3633
3634
switch (type) {
3635
case VK_IMAGE_TYPE_1D: return GLSL_SAMPLER_DIM_1D;
3636
case VK_IMAGE_TYPE_2D:
3637
return src_samples == VK_SAMPLE_COUNT_1_BIT ? GLSL_SAMPLER_DIM_2D :
3638
GLSL_SAMPLER_DIM_MS;
3639
case VK_IMAGE_TYPE_3D: return GLSL_SAMPLER_DIM_3D;
3640
default:
3641
unreachable("Invalid image type");
3642
}
3643
}
3644
3645
static bool
3646
create_blit_pipeline(struct v3dv_device *device,
3647
VkFormat dst_format,
3648
VkFormat src_format,
3649
VkColorComponentFlags cmask,
3650
VkImageType src_type,
3651
VkSampleCountFlagBits dst_samples,
3652
VkSampleCountFlagBits src_samples,
3653
VkRenderPass _pass,
3654
VkPipelineLayout pipeline_layout,
3655
VkPipeline *pipeline)
3656
{
3657
struct v3dv_render_pass *pass = v3dv_render_pass_from_handle(_pass);
3658
3659
/* We always rewrite depth/stencil blits to compatible color blits */
3660
assert(vk_format_is_color(dst_format));
3661
assert(vk_format_is_color(src_format));
3662
3663
const enum glsl_sampler_dim sampler_dim =
3664
get_sampler_dim(src_type, src_samples);
3665
3666
nir_shader *vs_nir = get_blit_vs();
3667
nir_shader *fs_nir =
3668
get_color_blit_fs(device, dst_format, src_format,
3669
dst_samples, src_samples, sampler_dim);
3670
3671
const VkPipelineVertexInputStateCreateInfo vi_state = {
3672
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
3673
.vertexBindingDescriptionCount = 0,
3674
.vertexAttributeDescriptionCount = 0,
3675
};
3676
3677
VkPipelineDepthStencilStateCreateInfo ds_state = {
3678
.sType = VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO,
3679
};
3680
3681
VkPipelineColorBlendAttachmentState blend_att_state[1] = { 0 };
3682
blend_att_state[0] = (VkPipelineColorBlendAttachmentState) {
3683
.blendEnable = false,
3684
.colorWriteMask = cmask,
3685
};
3686
3687
const VkPipelineColorBlendStateCreateInfo cb_state = {
3688
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
3689
.logicOpEnable = false,
3690
.attachmentCount = 1,
3691
.pAttachments = blend_att_state
3692
};
3693
3694
const VkPipelineMultisampleStateCreateInfo ms_state = {
3695
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
3696
.rasterizationSamples = dst_samples,
3697
.sampleShadingEnable = dst_samples > VK_SAMPLE_COUNT_1_BIT,
3698
.pSampleMask = NULL,
3699
.alphaToCoverageEnable = false,
3700
.alphaToOneEnable = false,
3701
};
3702
3703
return create_pipeline(device,
3704
pass,
3705
vs_nir, NULL, fs_nir,
3706
&vi_state,
3707
&ds_state,
3708
&cb_state,
3709
&ms_state,
3710
pipeline_layout,
3711
pipeline);
3712
}
3713
3714
/**
3715
* Return a pipeline suitable for blitting the requested aspect given the
3716
* destination and source formats.
3717
*/
3718
static bool
3719
get_blit_pipeline(struct v3dv_device *device,
3720
VkFormat dst_format,
3721
VkFormat src_format,
3722
VkColorComponentFlags cmask,
3723
VkImageType src_type,
3724
VkSampleCountFlagBits dst_samples,
3725
VkSampleCountFlagBits src_samples,
3726
struct v3dv_meta_blit_pipeline **pipeline)
3727
{
3728
bool ok = true;
3729
3730
uint8_t key[V3DV_META_BLIT_CACHE_KEY_SIZE];
3731
get_blit_pipeline_cache_key(dst_format, src_format, cmask,
3732
dst_samples, src_samples, key);
3733
mtx_lock(&device->meta.mtx);
3734
struct hash_entry *entry =
3735
_mesa_hash_table_search(device->meta.blit.cache[src_type], &key);
3736
if (entry) {
3737
mtx_unlock(&device->meta.mtx);
3738
*pipeline = entry->data;
3739
return true;
3740
}
3741
3742
*pipeline = vk_zalloc2(&device->vk.alloc, NULL, sizeof(**pipeline), 8,
3743
VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
3744
3745
if (*pipeline == NULL)
3746
goto fail;
3747
3748
ok = create_blit_render_pass(device, dst_format, src_format,
3749
&(*pipeline)->pass,
3750
&(*pipeline)->pass_no_load);
3751
if (!ok)
3752
goto fail;
3753
3754
/* Create the pipeline using one of the render passes, they are both
3755
* compatible, so we don't care which one we use here.
3756
*/
3757
ok = create_blit_pipeline(device,
3758
dst_format,
3759
src_format,
3760
cmask,
3761
src_type,
3762
dst_samples,
3763
src_samples,
3764
(*pipeline)->pass,
3765
device->meta.blit.p_layout,
3766
&(*pipeline)->pipeline);
3767
if (!ok)
3768
goto fail;
3769
3770
memcpy((*pipeline)->key, key, sizeof((*pipeline)->key));
3771
_mesa_hash_table_insert(device->meta.blit.cache[src_type],
3772
&(*pipeline)->key, *pipeline);
3773
3774
mtx_unlock(&device->meta.mtx);
3775
return true;
3776
3777
fail:
3778
mtx_unlock(&device->meta.mtx);
3779
3780
VkDevice _device = v3dv_device_to_handle(device);
3781
if (*pipeline) {
3782
if ((*pipeline)->pass)
3783
v3dv_DestroyRenderPass(_device, (*pipeline)->pass, &device->vk.alloc);
3784
if ((*pipeline)->pass_no_load)
3785
v3dv_DestroyRenderPass(_device, (*pipeline)->pass_no_load, &device->vk.alloc);
3786
if ((*pipeline)->pipeline)
3787
v3dv_DestroyPipeline(_device, (*pipeline)->pipeline, &device->vk.alloc);
3788
vk_free(&device->vk.alloc, *pipeline);
3789
*pipeline = NULL;
3790
}
3791
3792
return false;
3793
}
3794
3795
static void
3796
compute_blit_box(const VkOffset3D *offsets,
3797
uint32_t image_w, uint32_t image_h,
3798
uint32_t *x, uint32_t *y, uint32_t *w, uint32_t *h,
3799
bool *mirror_x, bool *mirror_y)
3800
{
3801
if (offsets[1].x >= offsets[0].x) {
3802
*mirror_x = false;
3803
*x = MIN2(offsets[0].x, image_w - 1);
3804
*w = MIN2(offsets[1].x - offsets[0].x, image_w - offsets[0].x);
3805
} else {
3806
*mirror_x = true;
3807
*x = MIN2(offsets[1].x, image_w - 1);
3808
*w = MIN2(offsets[0].x - offsets[1].x, image_w - offsets[1].x);
3809
}
3810
if (offsets[1].y >= offsets[0].y) {
3811
*mirror_y = false;
3812
*y = MIN2(offsets[0].y, image_h - 1);
3813
*h = MIN2(offsets[1].y - offsets[0].y, image_h - offsets[0].y);
3814
} else {
3815
*mirror_y = true;
3816
*y = MIN2(offsets[1].y, image_h - 1);
3817
*h = MIN2(offsets[0].y - offsets[1].y, image_h - offsets[1].y);
3818
}
3819
}
3820
3821
static void
3822
compute_blit_3d_layers(const VkOffset3D *offsets,
3823
uint32_t *min_layer, uint32_t *max_layer,
3824
bool *mirror_z)
3825
{
3826
if (offsets[1].z >= offsets[0].z) {
3827
*mirror_z = false;
3828
*min_layer = offsets[0].z;
3829
*max_layer = offsets[1].z;
3830
} else {
3831
*mirror_z = true;
3832
*min_layer = offsets[1].z;
3833
*max_layer = offsets[0].z;
3834
}
3835
}
3836
3837
static VkResult
3838
create_blit_descriptor_pool(struct v3dv_cmd_buffer *cmd_buffer)
3839
{
3840
/* If this is not the first pool we create for this command buffer
3841
* size it based on the size of the currently exhausted pool.
3842
*/
3843
uint32_t descriptor_count = 64;
3844
if (cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE) {
3845
struct v3dv_descriptor_pool *exhausted_pool =
3846
v3dv_descriptor_pool_from_handle(cmd_buffer->meta.blit.dspool);
3847
descriptor_count = MIN2(exhausted_pool->max_entry_count * 2, 1024);
3848
}
3849
3850
/* Create the descriptor pool */
3851
cmd_buffer->meta.blit.dspool = VK_NULL_HANDLE;
3852
VkDescriptorPoolSize pool_size = {
3853
.type = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
3854
.descriptorCount = descriptor_count,
3855
};
3856
VkDescriptorPoolCreateInfo info = {
3857
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_POOL_CREATE_INFO,
3858
.maxSets = descriptor_count,
3859
.poolSizeCount = 1,
3860
.pPoolSizes = &pool_size,
3861
.flags = 0,
3862
};
3863
VkResult result =
3864
v3dv_CreateDescriptorPool(v3dv_device_to_handle(cmd_buffer->device),
3865
&info,
3866
&cmd_buffer->device->vk.alloc,
3867
&cmd_buffer->meta.blit.dspool);
3868
3869
if (result == VK_SUCCESS) {
3870
assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
3871
const VkDescriptorPool _pool = cmd_buffer->meta.blit.dspool;
3872
3873
v3dv_cmd_buffer_add_private_obj(
3874
cmd_buffer, (uintptr_t) _pool,
3875
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyDescriptorPool);
3876
3877
struct v3dv_descriptor_pool *pool =
3878
v3dv_descriptor_pool_from_handle(_pool);
3879
pool->is_driver_internal = true;
3880
}
3881
3882
return result;
3883
}
3884
3885
static VkResult
3886
allocate_blit_source_descriptor_set(struct v3dv_cmd_buffer *cmd_buffer,
3887
VkDescriptorSet *set)
3888
{
3889
/* Make sure we have a descriptor pool */
3890
VkResult result;
3891
if (cmd_buffer->meta.blit.dspool == VK_NULL_HANDLE) {
3892
result = create_blit_descriptor_pool(cmd_buffer);
3893
if (result != VK_SUCCESS)
3894
return result;
3895
}
3896
assert(cmd_buffer->meta.blit.dspool != VK_NULL_HANDLE);
3897
3898
/* Allocate descriptor set */
3899
struct v3dv_device *device = cmd_buffer->device;
3900
VkDevice _device = v3dv_device_to_handle(device);
3901
VkDescriptorSetAllocateInfo info = {
3902
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO,
3903
.descriptorPool = cmd_buffer->meta.blit.dspool,
3904
.descriptorSetCount = 1,
3905
.pSetLayouts = &device->meta.blit.ds_layout,
3906
};
3907
result = v3dv_AllocateDescriptorSets(_device, &info, set);
3908
3909
/* If we ran out of pool space, grow the pool and try again */
3910
if (result == VK_ERROR_OUT_OF_POOL_MEMORY) {
3911
result = create_blit_descriptor_pool(cmd_buffer);
3912
if (result == VK_SUCCESS) {
3913
info.descriptorPool = cmd_buffer->meta.blit.dspool;
3914
result = v3dv_AllocateDescriptorSets(_device, &info, set);
3915
}
3916
}
3917
3918
return result;
3919
}
3920
3921
/**
3922
* Returns true if the implementation supports the requested operation (even if
3923
* it failed to process it, for example, due to an out-of-memory error).
3924
*
3925
* The caller can specify the channels on the destination to be written via the
3926
* cmask parameter (which can be 0 to default to all channels), as well as a
3927
* swizzle to apply to the source via the cswizzle parameter (which can be NULL
3928
* to use the default identity swizzle).
3929
*/
3930
static bool
3931
blit_shader(struct v3dv_cmd_buffer *cmd_buffer,
3932
struct v3dv_image *dst,
3933
VkFormat dst_format,
3934
struct v3dv_image *src,
3935
VkFormat src_format,
3936
VkColorComponentFlags cmask,
3937
VkComponentMapping *cswizzle,
3938
const VkImageBlit2KHR *_region,
3939
VkFilter filter,
3940
bool dst_is_padded_image)
3941
{
3942
bool handled = true;
3943
VkResult result;
3944
uint32_t dirty_dynamic_state = 0;
3945
3946
/* We don't support rendering to linear depth/stencil, this should have
3947
* been rewritten to a compatible color blit by the caller.
3948
*/
3949
assert(dst->tiling != VK_IMAGE_TILING_LINEAR ||
3950
!vk_format_is_depth_or_stencil(dst_format));
3951
3952
/* Can't sample from linear images */
3953
if (src->tiling == VK_IMAGE_TILING_LINEAR && src->type != VK_IMAGE_TYPE_1D)
3954
return false;
3955
3956
VkImageBlit2KHR region = *_region;
3957
/* Rewrite combined D/S blits to compatible color blits */
3958
if (vk_format_is_depth_or_stencil(dst_format)) {
3959
assert(src_format == dst_format);
3960
assert(cmask == 0);
3961
switch(dst_format) {
3962
case VK_FORMAT_D16_UNORM:
3963
dst_format = VK_FORMAT_R16_UINT;
3964
break;
3965
case VK_FORMAT_D32_SFLOAT:
3966
dst_format = VK_FORMAT_R32_UINT;
3967
break;
3968
case VK_FORMAT_X8_D24_UNORM_PACK32:
3969
case VK_FORMAT_D24_UNORM_S8_UINT:
3970
if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
3971
cmask |= VK_COLOR_COMPONENT_G_BIT |
3972
VK_COLOR_COMPONENT_B_BIT |
3973
VK_COLOR_COMPONENT_A_BIT;
3974
}
3975
if (region.srcSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
3976
assert(dst_format == VK_FORMAT_D24_UNORM_S8_UINT);
3977
cmask |= VK_COLOR_COMPONENT_R_BIT;
3978
}
3979
dst_format = VK_FORMAT_R8G8B8A8_UINT;
3980
break;
3981
default:
3982
unreachable("Unsupported depth/stencil format");
3983
};
3984
src_format = dst_format;
3985
region.srcSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3986
region.dstSubresource.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT;
3987
}
3988
3989
const VkColorComponentFlags full_cmask = VK_COLOR_COMPONENT_R_BIT |
3990
VK_COLOR_COMPONENT_G_BIT |
3991
VK_COLOR_COMPONENT_B_BIT |
3992
VK_COLOR_COMPONENT_A_BIT;
3993
if (cmask == 0)
3994
cmask = full_cmask;
3995
3996
VkComponentMapping ident_swizzle = {
3997
.r = VK_COMPONENT_SWIZZLE_IDENTITY,
3998
.g = VK_COMPONENT_SWIZZLE_IDENTITY,
3999
.b = VK_COMPONENT_SWIZZLE_IDENTITY,
4000
.a = VK_COMPONENT_SWIZZLE_IDENTITY,
4001
};
4002
if (!cswizzle)
4003
cswizzle = &ident_swizzle;
4004
4005
/* When we get here from a copy between compressed / uncompressed images
4006
* we choose to specify the destination blit region based on the size
4007
* semantics of the source image of the copy (see copy_image_blit), so we
4008
* need to apply those same semantics here when we compute the size of the
4009
* destination image level.
4010
*/
4011
const uint32_t dst_block_w = vk_format_get_blockwidth(dst->vk_format);
4012
const uint32_t dst_block_h = vk_format_get_blockheight(dst->vk_format);
4013
const uint32_t src_block_w = vk_format_get_blockwidth(src->vk_format);
4014
const uint32_t src_block_h = vk_format_get_blockheight(src->vk_format);
4015
const uint32_t dst_level_w =
4016
u_minify(DIV_ROUND_UP(dst->extent.width * src_block_w, dst_block_w),
4017
region.dstSubresource.mipLevel);
4018
const uint32_t dst_level_h =
4019
u_minify(DIV_ROUND_UP(dst->extent.height * src_block_h, dst_block_h),
4020
region.dstSubresource.mipLevel);
4021
4022
const uint32_t src_level_w =
4023
u_minify(src->extent.width, region.srcSubresource.mipLevel);
4024
const uint32_t src_level_h =
4025
u_minify(src->extent.height, region.srcSubresource.mipLevel);
4026
const uint32_t src_level_d =
4027
u_minify(src->extent.depth, region.srcSubresource.mipLevel);
4028
4029
uint32_t dst_x, dst_y, dst_w, dst_h;
4030
bool dst_mirror_x, dst_mirror_y;
4031
compute_blit_box(region.dstOffsets,
4032
dst_level_w, dst_level_h,
4033
&dst_x, &dst_y, &dst_w, &dst_h,
4034
&dst_mirror_x, &dst_mirror_y);
4035
4036
uint32_t src_x, src_y, src_w, src_h;
4037
bool src_mirror_x, src_mirror_y;
4038
compute_blit_box(region.srcOffsets,
4039
src_level_w, src_level_h,
4040
&src_x, &src_y, &src_w, &src_h,
4041
&src_mirror_x, &src_mirror_y);
4042
4043
uint32_t min_dst_layer;
4044
uint32_t max_dst_layer;
4045
bool dst_mirror_z = false;
4046
if (dst->type != VK_IMAGE_TYPE_3D) {
4047
min_dst_layer = region.dstSubresource.baseArrayLayer;
4048
max_dst_layer = min_dst_layer + region.dstSubresource.layerCount;
4049
} else {
4050
compute_blit_3d_layers(region.dstOffsets,
4051
&min_dst_layer, &max_dst_layer,
4052
&dst_mirror_z);
4053
}
4054
4055
uint32_t min_src_layer;
4056
uint32_t max_src_layer;
4057
bool src_mirror_z = false;
4058
if (src->type != VK_IMAGE_TYPE_3D) {
4059
min_src_layer = region.srcSubresource.baseArrayLayer;
4060
max_src_layer = min_src_layer + region.srcSubresource.layerCount;
4061
} else {
4062
compute_blit_3d_layers(region.srcOffsets,
4063
&min_src_layer, &max_src_layer,
4064
&src_mirror_z);
4065
}
4066
4067
uint32_t layer_count = max_dst_layer - min_dst_layer;
4068
4069
/* Translate source blit coordinates to normalized texture coordinates for
4070
* single sampled textures. For multisampled textures we require
4071
* unnormalized coordinates, since we can only do texelFetch on them.
4072
*/
4073
float coords[4] = {
4074
(float)src_x,
4075
(float)src_y,
4076
(float)(src_x + src_w),
4077
(float)(src_y + src_h),
4078
};
4079
4080
if (src->samples == VK_SAMPLE_COUNT_1_BIT) {
4081
coords[0] /= (float)src_level_w;
4082
coords[1] /= (float)src_level_h;
4083
coords[2] /= (float)src_level_w;
4084
coords[3] /= (float)src_level_h;
4085
}
4086
4087
/* Handle mirroring */
4088
const bool mirror_x = dst_mirror_x != src_mirror_x;
4089
const bool mirror_y = dst_mirror_y != src_mirror_y;
4090
const bool mirror_z = dst_mirror_z != src_mirror_z;
4091
float tex_coords[5] = {
4092
!mirror_x ? coords[0] : coords[2],
4093
!mirror_y ? coords[1] : coords[3],
4094
!mirror_x ? coords[2] : coords[0],
4095
!mirror_y ? coords[3] : coords[1],
4096
/* Z coordinate for 3D blit sources, to be filled for each
4097
* destination layer
4098
*/
4099
0.0f
4100
};
4101
4102
/* For blits from 3D images we also need to compute the slice coordinate to
4103
* sample from, which will change for each layer in the destination.
4104
* Compute the step we should increase for each iteration.
4105
*/
4106
const float src_z_step =
4107
(float)(max_src_layer - min_src_layer) / (float)layer_count;
4108
4109
/* Get the blit pipeline */
4110
struct v3dv_meta_blit_pipeline *pipeline = NULL;
4111
bool ok = get_blit_pipeline(cmd_buffer->device,
4112
dst_format, src_format, cmask, src->type,
4113
dst->samples, src->samples,
4114
&pipeline);
4115
if (!ok)
4116
return handled;
4117
assert(pipeline && pipeline->pipeline &&
4118
pipeline->pass && pipeline->pass_no_load);
4119
4120
struct v3dv_device *device = cmd_buffer->device;
4121
assert(device->meta.blit.ds_layout);
4122
4123
VkDevice _device = v3dv_device_to_handle(device);
4124
VkCommandBuffer _cmd_buffer = v3dv_cmd_buffer_to_handle(cmd_buffer);
4125
4126
/* Create sampler for blit source image */
4127
VkSamplerCreateInfo sampler_info = {
4128
.sType = VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
4129
.magFilter = filter,
4130
.minFilter = filter,
4131
.addressModeU = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4132
.addressModeV = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4133
.addressModeW = VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
4134
.mipmapMode = VK_SAMPLER_MIPMAP_MODE_NEAREST,
4135
};
4136
VkSampler sampler;
4137
result = v3dv_CreateSampler(_device, &sampler_info, &device->vk.alloc,
4138
&sampler);
4139
if (result != VK_SUCCESS)
4140
goto fail;
4141
4142
v3dv_cmd_buffer_add_private_obj(
4143
cmd_buffer, (uintptr_t)sampler,
4144
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroySampler);
4145
4146
/* Push command buffer state before starting meta operation */
4147
v3dv_cmd_buffer_meta_state_push(cmd_buffer, true);
4148
4149
/* Push state that is common for all layers */
4150
v3dv_CmdBindPipeline(_cmd_buffer,
4151
VK_PIPELINE_BIND_POINT_GRAPHICS,
4152
pipeline->pipeline);
4153
4154
const VkViewport viewport = {
4155
.x = dst_x,
4156
.y = dst_y,
4157
.width = dst_w,
4158
.height = dst_h,
4159
.minDepth = 0.0f,
4160
.maxDepth = 1.0f
4161
};
4162
v3dv_CmdSetViewport(_cmd_buffer, 0, 1, &viewport);
4163
4164
const VkRect2D scissor = {
4165
.offset = { dst_x, dst_y },
4166
.extent = { dst_w, dst_h }
4167
};
4168
v3dv_CmdSetScissor(_cmd_buffer, 0, 1, &scissor);
4169
4170
bool can_skip_tlb_load = false;
4171
const VkRect2D render_area = {
4172
.offset = { dst_x, dst_y },
4173
.extent = { dst_w, dst_h },
4174
};
4175
4176
/* Record per-layer commands */
4177
VkImageAspectFlags aspects = region.dstSubresource.aspectMask;
4178
for (uint32_t i = 0; i < layer_count; i++) {
4179
/* Setup framebuffer */
4180
VkImageViewCreateInfo dst_image_view_info = {
4181
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4182
.image = v3dv_image_to_handle(dst),
4183
.viewType = v3dv_image_type_to_view_type(dst->type),
4184
.format = dst_format,
4185
.subresourceRange = {
4186
.aspectMask = aspects,
4187
.baseMipLevel = region.dstSubresource.mipLevel,
4188
.levelCount = 1,
4189
.baseArrayLayer = min_dst_layer + i,
4190
.layerCount = 1
4191
},
4192
};
4193
VkImageView dst_image_view;
4194
result = v3dv_CreateImageView(_device, &dst_image_view_info,
4195
&device->vk.alloc, &dst_image_view);
4196
if (result != VK_SUCCESS)
4197
goto fail;
4198
4199
v3dv_cmd_buffer_add_private_obj(
4200
cmd_buffer, (uintptr_t)dst_image_view,
4201
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4202
4203
VkFramebufferCreateInfo fb_info = {
4204
.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
4205
.renderPass = pipeline->pass,
4206
.attachmentCount = 1,
4207
.pAttachments = &dst_image_view,
4208
.width = dst_x + dst_w,
4209
.height = dst_y + dst_h,
4210
.layers = 1,
4211
};
4212
4213
VkFramebuffer fb;
4214
result = v3dv_CreateFramebuffer(_device, &fb_info,
4215
&cmd_buffer->device->vk.alloc, &fb);
4216
if (result != VK_SUCCESS)
4217
goto fail;
4218
4219
struct v3dv_framebuffer *framebuffer = v3dv_framebuffer_from_handle(fb);
4220
framebuffer->has_edge_padding = fb_info.width == dst_level_w &&
4221
fb_info.height == dst_level_h &&
4222
dst_is_padded_image;
4223
4224
v3dv_cmd_buffer_add_private_obj(
4225
cmd_buffer, (uintptr_t)fb,
4226
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyFramebuffer);
4227
4228
/* Setup descriptor set for blit source texture. We don't have to
4229
* register the descriptor as a private command buffer object since
4230
* all descriptors will be freed automatically with the descriptor
4231
* pool.
4232
*/
4233
VkDescriptorSet set;
4234
result = allocate_blit_source_descriptor_set(cmd_buffer, &set);
4235
if (result != VK_SUCCESS)
4236
goto fail;
4237
4238
VkImageViewCreateInfo src_image_view_info = {
4239
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
4240
.image = v3dv_image_to_handle(src),
4241
.viewType = v3dv_image_type_to_view_type(src->type),
4242
.format = src_format,
4243
.components = *cswizzle,
4244
.subresourceRange = {
4245
.aspectMask = aspects,
4246
.baseMipLevel = region.srcSubresource.mipLevel,
4247
.levelCount = 1,
4248
.baseArrayLayer =
4249
src->type == VK_IMAGE_TYPE_3D ? 0 : min_src_layer + i,
4250
.layerCount = 1
4251
},
4252
};
4253
VkImageView src_image_view;
4254
result = v3dv_CreateImageView(_device, &src_image_view_info,
4255
&device->vk.alloc, &src_image_view);
4256
if (result != VK_SUCCESS)
4257
goto fail;
4258
4259
v3dv_cmd_buffer_add_private_obj(
4260
cmd_buffer, (uintptr_t)src_image_view,
4261
(v3dv_cmd_buffer_private_obj_destroy_cb)v3dv_DestroyImageView);
4262
4263
VkDescriptorImageInfo image_info = {
4264
.sampler = sampler,
4265
.imageView = src_image_view,
4266
.imageLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
4267
};
4268
VkWriteDescriptorSet write = {
4269
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
4270
.dstSet = set,
4271
.dstBinding = 0,
4272
.dstArrayElement = 0,
4273
.descriptorCount = 1,
4274
.descriptorType = VK_DESCRIPTOR_TYPE_COMBINED_IMAGE_SAMPLER,
4275
.pImageInfo = &image_info,
4276
};
4277
v3dv_UpdateDescriptorSets(_device, 1, &write, 0, NULL);
4278
4279
v3dv_CmdBindDescriptorSets(_cmd_buffer,
4280
VK_PIPELINE_BIND_POINT_GRAPHICS,
4281
device->meta.blit.p_layout,
4282
0, 1, &set,
4283
0, NULL);
4284
4285
/* If the region we are about to blit is tile-aligned, then we can
4286
* use the render pass version that won't pre-load the tile buffer
4287
* with the dst image contents before the blit. The exception is when we
4288
* don't have a full color mask, since in that case we need to preserve
4289
* the original value of some of the color components.
4290
*
4291
* Since all layers have the same area, we only need to compute this for
4292
* the first.
4293
*/
4294
if (i == 0) {
4295
struct v3dv_render_pass *pipeline_pass =
4296
v3dv_render_pass_from_handle(pipeline->pass);
4297
can_skip_tlb_load =
4298
cmask == full_cmask &&
4299
v3dv_subpass_area_is_tile_aligned(cmd_buffer->device, &render_area,
4300
framebuffer, pipeline_pass, 0);
4301
}
4302
4303
/* Record blit */
4304
VkRenderPassBeginInfo rp_info = {
4305
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
4306
.renderPass = can_skip_tlb_load ? pipeline->pass_no_load :
4307
pipeline->pass,
4308
.framebuffer = fb,
4309
.renderArea = render_area,
4310
.clearValueCount = 0,
4311
};
4312
4313
v3dv_CmdBeginRenderPass(_cmd_buffer, &rp_info, VK_SUBPASS_CONTENTS_INLINE);
4314
struct v3dv_job *job = cmd_buffer->state.job;
4315
if (!job)
4316
goto fail;
4317
4318
/* For 3D blits we need to compute the source slice to blit from (the Z
4319
* coordinate of the source sample operation). We want to choose this
4320
* based on the ratio of the depth of the source and the destination
4321
* images, picking the coordinate in the middle of each step.
4322
*/
4323
if (src->type == VK_IMAGE_TYPE_3D) {
4324
tex_coords[4] =
4325
!mirror_z ?
4326
(min_src_layer + (i + 0.5f) * src_z_step) / (float)src_level_d :
4327
(max_src_layer - (i + 0.5f) * src_z_step) / (float)src_level_d;
4328
}
4329
4330
v3dv_CmdPushConstants(_cmd_buffer,
4331
device->meta.blit.p_layout,
4332
VK_SHADER_STAGE_VERTEX_BIT, 0, 20,
4333
&tex_coords);
4334
4335
v3dv_CmdDraw(_cmd_buffer, 4, 1, 0, 0);
4336
4337
v3dv_CmdEndRenderPass(_cmd_buffer);
4338
dirty_dynamic_state = V3DV_CMD_DIRTY_VIEWPORT | V3DV_CMD_DIRTY_SCISSOR;
4339
}
4340
4341
fail:
4342
v3dv_cmd_buffer_meta_state_pop(cmd_buffer, dirty_dynamic_state, true);
4343
4344
return handled;
4345
}
4346
4347
VKAPI_ATTR void VKAPI_CALL
4348
v3dv_CmdBlitImage2KHR(VkCommandBuffer commandBuffer,
4349
const VkBlitImageInfo2KHR *pBlitImageInfo)
4350
{
4351
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4352
V3DV_FROM_HANDLE(v3dv_image, src, pBlitImageInfo->srcImage);
4353
V3DV_FROM_HANDLE(v3dv_image, dst, pBlitImageInfo->dstImage);
4354
4355
/* This command can only happen outside a render pass */
4356
assert(cmd_buffer->state.pass == NULL);
4357
assert(cmd_buffer->state.job == NULL);
4358
4359
/* From the Vulkan 1.0 spec, vkCmdBlitImage valid usage */
4360
assert(dst->samples == VK_SAMPLE_COUNT_1_BIT &&
4361
src->samples == VK_SAMPLE_COUNT_1_BIT);
4362
4363
/* We don't export VK_FORMAT_FEATURE_BLIT_DST_BIT on compressed formats */
4364
assert(!vk_format_is_compressed(dst->vk_format));
4365
4366
for (uint32_t i = 0; i < pBlitImageInfo->regionCount; i++) {
4367
if (blit_tfu(cmd_buffer, dst, src, &pBlitImageInfo->pRegions[i]))
4368
continue;
4369
if (blit_shader(cmd_buffer,
4370
dst, dst->vk_format,
4371
src, src->vk_format,
4372
0, NULL,
4373
&pBlitImageInfo->pRegions[i],
4374
pBlitImageInfo->filter, true)) {
4375
continue;
4376
}
4377
unreachable("Unsupported blit operation");
4378
}
4379
}
4380
4381
static bool
4382
resolve_image_tlb(struct v3dv_cmd_buffer *cmd_buffer,
4383
struct v3dv_image *dst,
4384
struct v3dv_image *src,
4385
const VkImageResolve2KHR *region)
4386
{
4387
if (!can_use_tlb(src, &region->srcOffset, NULL) ||
4388
!can_use_tlb(dst, &region->dstOffset, NULL)) {
4389
return false;
4390
}
4391
4392
if (!v3dv_X(cmd_buffer->device, format_supports_tlb_resolve)(src->format))
4393
return false;
4394
4395
const VkFormat fb_format = src->vk_format;
4396
4397
uint32_t num_layers;
4398
if (dst->type != VK_IMAGE_TYPE_3D)
4399
num_layers = region->dstSubresource.layerCount;
4400
else
4401
num_layers = region->extent.depth;
4402
assert(num_layers > 0);
4403
4404
struct v3dv_job *job =
4405
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
4406
if (!job)
4407
return true;
4408
4409
const uint32_t block_w = vk_format_get_blockwidth(dst->vk_format);
4410
const uint32_t block_h = vk_format_get_blockheight(dst->vk_format);
4411
const uint32_t width = DIV_ROUND_UP(region->extent.width, block_w);
4412
const uint32_t height = DIV_ROUND_UP(region->extent.height, block_h);
4413
4414
uint32_t internal_type, internal_bpp;
4415
v3dv_X(cmd_buffer->device, get_internal_type_bpp_for_image_aspects)
4416
(fb_format, region->srcSubresource.aspectMask,
4417
&internal_type, &internal_bpp);
4418
4419
v3dv_job_start_frame(job, width, height, num_layers, 1, internal_bpp, true);
4420
4421
struct framebuffer_data framebuffer;
4422
v3dv_X(job->device, setup_framebuffer_data)(&framebuffer, fb_format, internal_type,
4423
&job->frame_tiling);
4424
4425
v3dv_X(job->device, job_emit_binning_flush)(job);
4426
v3dv_X(job->device, job_emit_resolve_image_rcl)(job, dst, src, &framebuffer, region);
4427
4428
v3dv_cmd_buffer_finish_job(cmd_buffer);
4429
return true;
4430
}
4431
4432
static bool
4433
resolve_image_blit(struct v3dv_cmd_buffer *cmd_buffer,
4434
struct v3dv_image *dst,
4435
struct v3dv_image *src,
4436
const VkImageResolve2KHR *region)
4437
{
4438
const VkImageBlit2KHR blit_region = {
4439
.sType = VK_STRUCTURE_TYPE_IMAGE_BLIT_2_KHR,
4440
.srcSubresource = region->srcSubresource,
4441
.srcOffsets = {
4442
region->srcOffset,
4443
{
4444
region->srcOffset.x + region->extent.width,
4445
region->srcOffset.y + region->extent.height,
4446
}
4447
},
4448
.dstSubresource = region->dstSubresource,
4449
.dstOffsets = {
4450
region->dstOffset,
4451
{
4452
region->dstOffset.x + region->extent.width,
4453
region->dstOffset.y + region->extent.height,
4454
}
4455
},
4456
};
4457
return blit_shader(cmd_buffer,
4458
dst, dst->vk_format,
4459
src, src->vk_format,
4460
0, NULL,
4461
&blit_region, VK_FILTER_NEAREST, true);
4462
}
4463
4464
VKAPI_ATTR void VKAPI_CALL
4465
v3dv_CmdResolveImage2KHR(VkCommandBuffer commandBuffer,
4466
const VkResolveImageInfo2KHR *info)
4467
4468
{
4469
V3DV_FROM_HANDLE(v3dv_cmd_buffer, cmd_buffer, commandBuffer);
4470
V3DV_FROM_HANDLE(v3dv_image, src, info->srcImage);
4471
V3DV_FROM_HANDLE(v3dv_image, dst, info->dstImage);
4472
4473
/* This command can only happen outside a render pass */
4474
assert(cmd_buffer->state.pass == NULL);
4475
assert(cmd_buffer->state.job == NULL);
4476
4477
assert(src->samples == VK_SAMPLE_COUNT_4_BIT);
4478
assert(dst->samples == VK_SAMPLE_COUNT_1_BIT);
4479
4480
for (uint32_t i = 0; i < info->regionCount; i++) {
4481
if (resolve_image_tlb(cmd_buffer, dst, src, &info->pRegions[i]))
4482
continue;
4483
if (resolve_image_blit(cmd_buffer, dst, src, &info->pRegions[i]))
4484
continue;
4485
unreachable("Unsupported multismaple resolve operation");
4486
}
4487
}
4488
4489