Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/vulkan/radv_image.c
7130 views
1
/*
2
* Copyright © 2016 Red Hat.
3
* Copyright © 2016 Bas Nieuwenhuizen
4
*
5
* based in part on anv driver which is:
6
* Copyright © 2015 Intel Corporation
7
*
8
* Permission is hereby granted, free of charge, to any person obtaining a
9
* copy of this software and associated documentation files (the "Software"),
10
* to deal in the Software without restriction, including without limitation
11
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
12
* and/or sell copies of the Software, and to permit persons to whom the
13
* Software is furnished to do so, subject to the following conditions:
14
*
15
* The above copyright notice and this permission notice (including the next
16
* paragraph) shall be included in all copies or substantial portions of the
17
* Software.
18
*
19
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
20
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
21
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
22
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
23
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
24
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
25
* IN THE SOFTWARE.
26
*/
27
28
#include "ac_drm_fourcc.h"
29
#include "util/debug.h"
30
#include "util/u_atomic.h"
31
#include "vulkan/util/vk_format.h"
32
#include "radv_debug.h"
33
#include "radv_private.h"
34
#include "radv_radeon_winsys.h"
35
#include "sid.h"
36
#include "vk_format.h"
37
#include "vk_util.h"
38
39
#include "gfx10_format_table.h"
40
41
static const VkImageUsageFlagBits RADV_IMAGE_USAGE_WRITE_BITS =
42
VK_IMAGE_USAGE_TRANSFER_DST_BIT | VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
43
VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_STORAGE_BIT;
44
45
static unsigned
46
radv_choose_tiling(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
47
VkFormat format)
48
{
49
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR) {
50
assert(pCreateInfo->samples <= 1);
51
return RADEON_SURF_MODE_LINEAR_ALIGNED;
52
}
53
54
/* MSAA resources must be 2D tiled. */
55
if (pCreateInfo->samples > 1)
56
return RADEON_SURF_MODE_2D;
57
58
if (!vk_format_is_compressed(format) && !vk_format_is_depth_or_stencil(format) &&
59
device->physical_device->rad_info.chip_class <= GFX8) {
60
/* this causes hangs in some VK CTS tests on GFX9. */
61
/* Textures with a very small height are recommended to be linear. */
62
if (pCreateInfo->imageType == VK_IMAGE_TYPE_1D ||
63
/* Only very thin and long 2D textures should benefit from
64
* linear_aligned. */
65
(pCreateInfo->extent.width > 8 && pCreateInfo->extent.height <= 2))
66
return RADEON_SURF_MODE_LINEAR_ALIGNED;
67
}
68
69
return RADEON_SURF_MODE_2D;
70
}
71
72
static bool
73
radv_use_tc_compat_htile_for_image(struct radv_device *device, const VkImageCreateInfo *pCreateInfo,
74
VkFormat format)
75
{
76
/* TC-compat HTILE is only available for GFX8+. */
77
if (device->physical_device->rad_info.chip_class < GFX8)
78
return false;
79
80
if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT))
81
return false;
82
83
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
84
return false;
85
86
/* Do not enable TC-compatible HTILE if the image isn't readable by a
87
* shader because no texture fetches will happen.
88
*/
89
if (!(pCreateInfo->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
90
VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
91
return false;
92
93
if (device->physical_device->rad_info.chip_class < GFX9) {
94
/* TC-compat HTILE for MSAA depth/stencil images is broken
95
* on GFX8 because the tiling doesn't match.
96
*/
97
if (pCreateInfo->samples >= 2 && format == VK_FORMAT_D32_SFLOAT_S8_UINT)
98
return false;
99
100
/* GFX9+ supports compression for both 32-bit and 16-bit depth
101
* surfaces, while GFX8 only supports 32-bit natively. Though,
102
* the driver allows TC-compat HTILE for 16-bit depth surfaces
103
* with no Z planes compression.
104
*/
105
if (format != VK_FORMAT_D32_SFLOAT_S8_UINT && format != VK_FORMAT_D32_SFLOAT &&
106
format != VK_FORMAT_D16_UNORM)
107
return false;
108
}
109
110
return true;
111
}
112
113
static bool
114
radv_surface_has_scanout(struct radv_device *device, const struct radv_image_create_info *info)
115
{
116
if (info->bo_metadata) {
117
if (device->physical_device->rad_info.chip_class >= GFX9)
118
return info->bo_metadata->u.gfx9.scanout;
119
else
120
return info->bo_metadata->u.legacy.scanout;
121
}
122
123
return info->scanout;
124
}
125
126
static bool
127
radv_image_use_fast_clear_for_image(const struct radv_device *device,
128
const struct radv_image *image)
129
{
130
if (device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS)
131
return true;
132
133
if (image->info.samples <= 1 && image->info.width * image->info.height <= 512 * 512) {
134
/* Do not enable CMASK or DCC for small surfaces where the cost
135
* of the eliminate pass can be higher than the benefit of fast
136
* clear. RadeonSI does this, but the image threshold is
137
* different.
138
*/
139
return false;
140
}
141
142
return image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT &&
143
(image->exclusive ||
144
/* Enable DCC for concurrent images if stores are
145
* supported because that means we can keep DCC compressed on
146
* all layouts/queues.
147
*/
148
radv_image_use_dcc_image_stores(device, image));
149
}
150
151
bool
152
radv_are_formats_dcc_compatible(const struct radv_physical_device *pdev, const void *pNext,
153
VkFormat format, VkImageCreateFlags flags)
154
{
155
bool blendable;
156
157
if (!radv_is_colorbuffer_format_supported(pdev, format, &blendable))
158
return false;
159
160
if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
161
const struct VkImageFormatListCreateInfo *format_list =
162
(const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
163
pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
164
165
/* We have to ignore the existence of the list if viewFormatCount = 0 */
166
if (format_list && format_list->viewFormatCount) {
167
/* compatibility is transitive, so we only need to check
168
* one format with everything else. */
169
for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
170
if (format_list->pViewFormats[i] == VK_FORMAT_UNDEFINED)
171
continue;
172
173
if (!radv_dcc_formats_compatible(format, format_list->pViewFormats[i]))
174
return false;
175
}
176
} else {
177
return false;
178
}
179
}
180
181
return true;
182
}
183
184
static bool
185
radv_formats_is_atomic_allowed(const void *pNext, VkFormat format, VkImageCreateFlags flags)
186
{
187
if (radv_is_atomic_format_supported(format))
188
return true;
189
190
if (flags & VK_IMAGE_CREATE_MUTABLE_FORMAT_BIT) {
191
const struct VkImageFormatListCreateInfo *format_list =
192
(const struct VkImageFormatListCreateInfo *)vk_find_struct_const(
193
pNext, IMAGE_FORMAT_LIST_CREATE_INFO);
194
195
/* We have to ignore the existence of the list if viewFormatCount = 0 */
196
if (format_list && format_list->viewFormatCount) {
197
for (unsigned i = 0; i < format_list->viewFormatCount; ++i) {
198
if (radv_is_atomic_format_supported(format_list->pViewFormats[i]))
199
return true;
200
}
201
}
202
}
203
204
return false;
205
}
206
207
static bool
208
radv_use_dcc_for_image(struct radv_device *device, const struct radv_image *image,
209
const VkImageCreateInfo *pCreateInfo, VkFormat format)
210
{
211
/* DCC (Delta Color Compression) is only available for GFX8+. */
212
if (device->physical_device->rad_info.chip_class < GFX8)
213
return false;
214
215
if (device->instance->debug_flags & RADV_DEBUG_NO_DCC)
216
return false;
217
218
if (image->shareable && image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
219
return false;
220
221
/*
222
* TODO: Enable DCC for storage images on GFX9 and earlier.
223
*
224
* Also disable DCC with atomics because even when DCC stores are
225
* supported atomics will always decompress. So if we are
226
* decompressing a lot anyway we might as well not have DCC.
227
*/
228
if ((pCreateInfo->usage & VK_IMAGE_USAGE_STORAGE_BIT) &&
229
(!radv_image_use_dcc_image_stores(device, image) ||
230
radv_formats_is_atomic_allowed(pCreateInfo->pNext, format, pCreateInfo->flags)))
231
return false;
232
233
/* Do not enable DCC for fragment shading rate attachments. */
234
if (pCreateInfo->usage & VK_IMAGE_USAGE_FRAGMENT_SHADING_RATE_ATTACHMENT_BIT_KHR)
235
return false;
236
237
if (pCreateInfo->tiling == VK_IMAGE_TILING_LINEAR)
238
return false;
239
240
if (vk_format_is_subsampled(format) || vk_format_get_plane_count(format) > 1)
241
return false;
242
243
if (!radv_image_use_fast_clear_for_image(device, image) &&
244
image->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
245
return false;
246
247
/* Do not enable DCC for mipmapped arrays because performance is worse. */
248
if (pCreateInfo->arrayLayers > 1 && pCreateInfo->mipLevels > 1)
249
return false;
250
251
if (device->physical_device->rad_info.chip_class < GFX10) {
252
/* TODO: Add support for DCC MSAA on GFX8-9. */
253
if (pCreateInfo->samples > 1 && !device->physical_device->dcc_msaa_allowed)
254
return false;
255
256
/* TODO: Add support for DCC layers/mipmaps on GFX9. */
257
if ((pCreateInfo->arrayLayers > 1 || pCreateInfo->mipLevels > 1) &&
258
device->physical_device->rad_info.chip_class == GFX9)
259
return false;
260
}
261
262
return radv_are_formats_dcc_compatible(device->physical_device, pCreateInfo->pNext, format,
263
pCreateInfo->flags);
264
}
265
266
/*
267
* Whether to enable image stores with DCC compression for this image. If
268
* this function returns false the image subresource should be decompressed
269
* before using it with image stores.
270
*
271
* Note that this can have mixed performance implications, see
272
* https://gitlab.freedesktop.org/mesa/mesa/-/merge_requests/6796#note_643299
273
*
274
* This function assumes the image uses DCC compression.
275
*/
276
bool
277
radv_image_use_dcc_image_stores(const struct radv_device *device, const struct radv_image *image)
278
{
279
return device->physical_device->rad_info.chip_class >= GFX10;
280
}
281
282
/*
283
* Whether to use a predicate to determine whether DCC is in a compressed
284
* state. This can be used to avoid decompressing an image multiple times.
285
*/
286
bool
287
radv_image_use_dcc_predication(const struct radv_device *device, const struct radv_image *image)
288
{
289
return radv_image_has_dcc(image) && !radv_image_use_dcc_image_stores(device, image);
290
}
291
292
static inline bool
293
radv_use_fmask_for_image(const struct radv_device *device, const struct radv_image *image)
294
{
295
return image->info.samples > 1 && ((image->usage & VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT) ||
296
(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS));
297
}
298
299
static inline bool
300
radv_use_htile_for_image(const struct radv_device *device, const struct radv_image *image)
301
{
302
/* TODO:
303
* - Investigate about mips+layers.
304
* - Enable on other gens.
305
*/
306
bool use_htile_for_mips =
307
image->info.array_size == 1 && device->physical_device->rad_info.chip_class >= GFX10;
308
309
/* Do not enable HTILE for very small images because it seems less performant but make sure it's
310
* allowed with VRS attachments because we need HTILE.
311
*/
312
if (image->info.width * image->info.height < 8 * 8 &&
313
!(device->instance->debug_flags & RADV_DEBUG_FORCE_COMPRESS) &&
314
!device->attachment_vrs_enabled)
315
return false;
316
317
return (image->info.levels == 1 || use_htile_for_mips) && !image->shareable;
318
}
319
320
static bool
321
radv_use_tc_compat_cmask_for_image(struct radv_device *device, struct radv_image *image)
322
{
323
/* TC-compat CMASK is only available for GFX8+. */
324
if (device->physical_device->rad_info.chip_class < GFX8)
325
return false;
326
327
if (device->instance->debug_flags & RADV_DEBUG_NO_TC_COMPAT_CMASK)
328
return false;
329
330
if (image->usage & VK_IMAGE_USAGE_STORAGE_BIT)
331
return false;
332
333
/* Do not enable TC-compatible if the image isn't readable by a shader
334
* because no texture fetches will happen.
335
*/
336
if (!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT |
337
VK_IMAGE_USAGE_TRANSFER_SRC_BIT)))
338
return false;
339
340
/* If the image doesn't have FMASK, it can't be fetchable. */
341
if (!radv_image_has_fmask(image))
342
return false;
343
344
return true;
345
}
346
347
static uint32_t
348
si_get_bo_metadata_word1(const struct radv_device *device)
349
{
350
return (ATI_VENDOR_ID << 16) | device->physical_device->rad_info.pci_id;
351
}
352
353
static bool
354
radv_is_valid_opaque_metadata(const struct radv_device *device, const struct radeon_bo_metadata *md)
355
{
356
if (md->metadata[0] != 1 || md->metadata[1] != si_get_bo_metadata_word1(device))
357
return false;
358
359
if (md->size_metadata < 40)
360
return false;
361
362
return true;
363
}
364
365
static void
366
radv_patch_surface_from_metadata(struct radv_device *device, struct radeon_surf *surface,
367
const struct radeon_bo_metadata *md)
368
{
369
surface->flags = RADEON_SURF_CLR(surface->flags, MODE);
370
371
if (device->physical_device->rad_info.chip_class >= GFX9) {
372
if (md->u.gfx9.swizzle_mode > 0)
373
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
374
else
375
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
376
377
surface->u.gfx9.swizzle_mode = md->u.gfx9.swizzle_mode;
378
} else {
379
surface->u.legacy.pipe_config = md->u.legacy.pipe_config;
380
surface->u.legacy.bankw = md->u.legacy.bankw;
381
surface->u.legacy.bankh = md->u.legacy.bankh;
382
surface->u.legacy.tile_split = md->u.legacy.tile_split;
383
surface->u.legacy.mtilea = md->u.legacy.mtilea;
384
surface->u.legacy.num_banks = md->u.legacy.num_banks;
385
386
if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED)
387
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_2D, MODE);
388
else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED)
389
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_1D, MODE);
390
else
391
surface->flags |= RADEON_SURF_SET(RADEON_SURF_MODE_LINEAR_ALIGNED, MODE);
392
}
393
}
394
395
static VkResult
396
radv_patch_image_dimensions(struct radv_device *device, struct radv_image *image,
397
const struct radv_image_create_info *create_info,
398
struct ac_surf_info *image_info)
399
{
400
unsigned width = image->info.width;
401
unsigned height = image->info.height;
402
403
/*
404
* minigbm sometimes allocates bigger images which is going to result in
405
* weird strides and other properties. Lets be lenient where possible and
406
* fail it on GFX10 (as we cannot cope there).
407
*
408
* Example hack: https://chromium-review.googlesource.com/c/chromiumos/platform/minigbm/+/1457777/
409
*/
410
if (create_info->bo_metadata &&
411
radv_is_valid_opaque_metadata(device, create_info->bo_metadata)) {
412
const struct radeon_bo_metadata *md = create_info->bo_metadata;
413
414
if (device->physical_device->rad_info.chip_class >= GFX10) {
415
width = G_00A004_WIDTH_LO(md->metadata[3]) + (G_00A008_WIDTH_HI(md->metadata[4]) << 2) + 1;
416
height = G_00A008_HEIGHT(md->metadata[4]) + 1;
417
} else {
418
width = G_008F18_WIDTH(md->metadata[4]) + 1;
419
height = G_008F18_HEIGHT(md->metadata[4]) + 1;
420
}
421
}
422
423
if (image->info.width == width && image->info.height == height)
424
return VK_SUCCESS;
425
426
if (width < image->info.width || height < image->info.height) {
427
fprintf(stderr,
428
"The imported image has smaller dimensions than the internal\n"
429
"dimensions. Using it is going to fail badly, so we reject\n"
430
"this import.\n"
431
"(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
432
image->info.width, image->info.height, width, height);
433
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
434
} else if (device->physical_device->rad_info.chip_class >= GFX10) {
435
fprintf(stderr,
436
"Tried to import an image with inconsistent width on GFX10.\n"
437
"As GFX10 has no separate stride fields we cannot cope with\n"
438
"an inconsistency in width and will fail this import.\n"
439
"(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
440
image->info.width, image->info.height, width, height);
441
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
442
} else {
443
fprintf(stderr,
444
"Tried to import an image with inconsistent width on pre-GFX10.\n"
445
"As GFX10 has no separate stride fields we cannot cope with\n"
446
"an inconsistency and would fail on GFX10.\n"
447
"(internal dimensions: %d x %d, external dimensions: %d x %d)\n",
448
image->info.width, image->info.height, width, height);
449
}
450
image_info->width = width;
451
image_info->height = height;
452
453
return VK_SUCCESS;
454
}
455
456
static VkResult
457
radv_patch_image_from_extra_info(struct radv_device *device, struct radv_image *image,
458
const struct radv_image_create_info *create_info,
459
struct ac_surf_info *image_info)
460
{
461
VkResult result = radv_patch_image_dimensions(device, image, create_info, image_info);
462
if (result != VK_SUCCESS)
463
return result;
464
465
for (unsigned plane = 0; plane < image->plane_count; ++plane) {
466
if (create_info->bo_metadata) {
467
radv_patch_surface_from_metadata(device, &image->planes[plane].surface,
468
create_info->bo_metadata);
469
}
470
471
if (radv_surface_has_scanout(device, create_info)) {
472
image->planes[plane].surface.flags |= RADEON_SURF_SCANOUT;
473
if (device->instance->debug_flags & RADV_DEBUG_NO_DISPLAY_DCC)
474
image->planes[plane].surface.flags |= RADEON_SURF_DISABLE_DCC;
475
476
image->info.surf_index = NULL;
477
}
478
}
479
return VK_SUCCESS;
480
}
481
482
static uint64_t
483
radv_get_surface_flags(struct radv_device *device, const struct radv_image *image,
484
unsigned plane_id, const VkImageCreateInfo *pCreateInfo,
485
VkFormat image_format)
486
{
487
uint64_t flags;
488
unsigned array_mode = radv_choose_tiling(device, pCreateInfo, image_format);
489
VkFormat format = vk_format_get_plane_format(image_format, plane_id);
490
const struct util_format_description *desc = vk_format_description(format);
491
bool is_depth, is_stencil;
492
493
is_depth = util_format_has_depth(desc);
494
is_stencil = util_format_has_stencil(desc);
495
496
flags = RADEON_SURF_SET(array_mode, MODE);
497
498
switch (pCreateInfo->imageType) {
499
case VK_IMAGE_TYPE_1D:
500
if (pCreateInfo->arrayLayers > 1)
501
flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D_ARRAY, TYPE);
502
else
503
flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_1D, TYPE);
504
break;
505
case VK_IMAGE_TYPE_2D:
506
if (pCreateInfo->arrayLayers > 1)
507
flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D_ARRAY, TYPE);
508
else
509
flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_2D, TYPE);
510
break;
511
case VK_IMAGE_TYPE_3D:
512
flags |= RADEON_SURF_SET(RADEON_SURF_TYPE_3D, TYPE);
513
break;
514
default:
515
unreachable("unhandled image type");
516
}
517
518
/* Required for clearing/initializing a specific layer on GFX8. */
519
flags |= RADEON_SURF_CONTIGUOUS_DCC_LAYERS;
520
521
if (is_depth) {
522
flags |= RADEON_SURF_ZBUFFER;
523
524
if (radv_use_htile_for_image(device, image) &&
525
!(device->instance->debug_flags & RADV_DEBUG_NO_HIZ)) {
526
if (radv_use_tc_compat_htile_for_image(device, pCreateInfo, image_format))
527
flags |= RADEON_SURF_TC_COMPATIBLE_HTILE;
528
} else {
529
flags |= RADEON_SURF_NO_HTILE;
530
}
531
}
532
533
if (is_stencil)
534
flags |= RADEON_SURF_SBUFFER;
535
536
if (device->physical_device->rad_info.chip_class >= GFX9 &&
537
pCreateInfo->imageType == VK_IMAGE_TYPE_3D &&
538
vk_format_get_blocksizebits(image_format) == 128 && vk_format_is_compressed(image_format))
539
flags |= RADEON_SURF_NO_RENDER_TARGET;
540
541
if (!radv_use_dcc_for_image(device, image, pCreateInfo, image_format))
542
flags |= RADEON_SURF_DISABLE_DCC;
543
544
if (!radv_use_fmask_for_image(device, image))
545
flags |= RADEON_SURF_NO_FMASK;
546
547
if (pCreateInfo->flags & VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT) {
548
flags |=
549
RADEON_SURF_PRT | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE | RADEON_SURF_DISABLE_DCC;
550
}
551
552
return flags;
553
}
554
555
static inline unsigned
556
si_tile_mode_index(const struct radv_image_plane *plane, unsigned level, bool stencil)
557
{
558
if (stencil)
559
return plane->surface.u.legacy.zs.stencil_tiling_index[level];
560
else
561
return plane->surface.u.legacy.tiling_index[level];
562
}
563
564
static unsigned
565
radv_map_swizzle(unsigned swizzle)
566
{
567
switch (swizzle) {
568
case PIPE_SWIZZLE_Y:
569
return V_008F0C_SQ_SEL_Y;
570
case PIPE_SWIZZLE_Z:
571
return V_008F0C_SQ_SEL_Z;
572
case PIPE_SWIZZLE_W:
573
return V_008F0C_SQ_SEL_W;
574
case PIPE_SWIZZLE_0:
575
return V_008F0C_SQ_SEL_0;
576
case PIPE_SWIZZLE_1:
577
return V_008F0C_SQ_SEL_1;
578
default: /* PIPE_SWIZZLE_X */
579
return V_008F0C_SQ_SEL_X;
580
}
581
}
582
583
static void
584
radv_compose_swizzle(const struct util_format_description *desc, const VkComponentMapping *mapping,
585
enum pipe_swizzle swizzle[4])
586
{
587
if (desc->format == PIPE_FORMAT_R64_UINT || desc->format == PIPE_FORMAT_R64_SINT) {
588
/* 64-bit formats only support storage images and storage images
589
* require identity component mappings. We use 32-bit
590
* instructions to access 64-bit images, so we need a special
591
* case here.
592
*
593
* The zw components are 1,0 so that they can be easily be used
594
* by loads to create the w component, which has to be 0 for
595
* NULL descriptors.
596
*/
597
swizzle[0] = PIPE_SWIZZLE_X;
598
swizzle[1] = PIPE_SWIZZLE_Y;
599
swizzle[2] = PIPE_SWIZZLE_1;
600
swizzle[3] = PIPE_SWIZZLE_0;
601
} else if (!mapping) {
602
for (unsigned i = 0; i < 4; i++)
603
swizzle[i] = desc->swizzle[i];
604
} else if (desc->colorspace == UTIL_FORMAT_COLORSPACE_ZS) {
605
const unsigned char swizzle_xxxx[4] = {PIPE_SWIZZLE_X, PIPE_SWIZZLE_0, PIPE_SWIZZLE_0,
606
PIPE_SWIZZLE_1};
607
vk_format_compose_swizzles(mapping, swizzle_xxxx, swizzle);
608
} else {
609
vk_format_compose_swizzles(mapping, desc->swizzle, swizzle);
610
}
611
}
612
613
static void
614
radv_make_buffer_descriptor(struct radv_device *device, struct radv_buffer *buffer,
615
VkFormat vk_format, unsigned offset, unsigned range, uint32_t *state)
616
{
617
const struct util_format_description *desc;
618
unsigned stride;
619
uint64_t gpu_address = radv_buffer_get_va(buffer->bo);
620
uint64_t va = gpu_address + buffer->offset;
621
unsigned num_format, data_format;
622
int first_non_void;
623
enum pipe_swizzle swizzle[4];
624
desc = vk_format_description(vk_format);
625
first_non_void = vk_format_get_first_non_void_channel(vk_format);
626
stride = desc->block.bits / 8;
627
628
radv_compose_swizzle(desc, NULL, swizzle);
629
630
va += offset;
631
state[0] = va;
632
state[1] = S_008F04_BASE_ADDRESS_HI(va >> 32) | S_008F04_STRIDE(stride);
633
634
if (device->physical_device->rad_info.chip_class != GFX8 && stride) {
635
range /= stride;
636
}
637
638
state[2] = range;
639
state[3] = S_008F0C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
640
S_008F0C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
641
S_008F0C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
642
S_008F0C_DST_SEL_W(radv_map_swizzle(swizzle[3]));
643
644
if (device->physical_device->rad_info.chip_class >= GFX10) {
645
const struct gfx10_format *fmt = &gfx10_format_table[vk_format_to_pipe_format(vk_format)];
646
647
/* OOB_SELECT chooses the out-of-bounds check:
648
* - 0: (index >= NUM_RECORDS) || (offset >= STRIDE)
649
* - 1: index >= NUM_RECORDS
650
* - 2: NUM_RECORDS == 0
651
* - 3: if SWIZZLE_ENABLE == 0: offset >= NUM_RECORDS
652
* else: swizzle_address >= NUM_RECORDS
653
*/
654
state[3] |= S_008F0C_FORMAT(fmt->img_format) |
655
S_008F0C_OOB_SELECT(V_008F0C_OOB_SELECT_STRUCTURED_WITH_OFFSET) |
656
S_008F0C_RESOURCE_LEVEL(1);
657
} else {
658
num_format = radv_translate_buffer_numformat(desc, first_non_void);
659
data_format = radv_translate_buffer_dataformat(desc, first_non_void);
660
661
assert(data_format != V_008F0C_BUF_DATA_FORMAT_INVALID);
662
assert(num_format != ~0);
663
664
state[3] |= S_008F0C_NUM_FORMAT(num_format) | S_008F0C_DATA_FORMAT(data_format);
665
}
666
}
667
668
static void
669
si_set_mutable_tex_desc_fields(struct radv_device *device, struct radv_image *image,
670
const struct legacy_surf_level *base_level_info, unsigned plane_id,
671
unsigned base_level, unsigned first_level, unsigned block_width,
672
bool is_stencil, bool is_storage_image, bool disable_compression,
673
bool enable_write_compression, uint32_t *state)
674
{
675
struct radv_image_plane *plane = &image->planes[plane_id];
676
uint64_t gpu_address = image->bo ? radv_buffer_get_va(image->bo) + image->offset : 0;
677
uint64_t va = gpu_address;
678
enum chip_class chip_class = device->physical_device->rad_info.chip_class;
679
uint64_t meta_va = 0;
680
if (chip_class >= GFX9) {
681
if (is_stencil)
682
va += plane->surface.u.gfx9.zs.stencil_offset;
683
else
684
va += plane->surface.u.gfx9.surf_offset;
685
} else
686
va += (uint64_t)base_level_info->offset_256B * 256;
687
688
state[0] = va >> 8;
689
if (chip_class >= GFX9 || base_level_info->mode == RADEON_SURF_MODE_2D)
690
state[0] |= plane->surface.tile_swizzle;
691
state[1] &= C_008F14_BASE_ADDRESS_HI;
692
state[1] |= S_008F14_BASE_ADDRESS_HI(va >> 40);
693
694
if (chip_class >= GFX8) {
695
state[6] &= C_008F28_COMPRESSION_EN;
696
state[7] = 0;
697
if (!disable_compression && radv_dcc_enabled(image, first_level)) {
698
meta_va = gpu_address + plane->surface.meta_offset;
699
if (chip_class <= GFX8)
700
meta_va += plane->surface.u.legacy.color.dcc_level[base_level].dcc_offset;
701
702
unsigned dcc_tile_swizzle = plane->surface.tile_swizzle << 8;
703
dcc_tile_swizzle &= (1 << plane->surface.meta_alignment_log2) - 1;
704
meta_va |= dcc_tile_swizzle;
705
} else if (!disable_compression && radv_image_is_tc_compat_htile(image)) {
706
meta_va = gpu_address + plane->surface.meta_offset;
707
}
708
709
if (meta_va) {
710
state[6] |= S_008F28_COMPRESSION_EN(1);
711
if (chip_class <= GFX9)
712
state[7] = meta_va >> 8;
713
}
714
}
715
716
if (chip_class >= GFX10) {
717
state[3] &= C_00A00C_SW_MODE;
718
719
if (is_stencil) {
720
state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
721
} else {
722
state[3] |= S_00A00C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
723
}
724
725
state[6] &= C_00A018_META_DATA_ADDRESS_LO & C_00A018_META_PIPE_ALIGNED;
726
727
if (meta_va) {
728
struct gfx9_surf_meta_flags meta = {
729
.rb_aligned = 1,
730
.pipe_aligned = 1,
731
};
732
733
if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
734
meta = plane->surface.u.gfx9.color.dcc;
735
736
if (radv_dcc_enabled(image, first_level) && is_storage_image && enable_write_compression)
737
state[6] |= S_00A018_WRITE_COMPRESS_ENABLE(1);
738
739
state[6] |= S_00A018_META_PIPE_ALIGNED(meta.pipe_aligned) |
740
S_00A018_META_DATA_ADDRESS_LO(meta_va >> 8);
741
}
742
743
state[7] = meta_va >> 16;
744
} else if (chip_class == GFX9) {
745
state[3] &= C_008F1C_SW_MODE;
746
state[4] &= C_008F20_PITCH;
747
748
if (is_stencil) {
749
state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.zs.stencil_swizzle_mode);
750
state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.zs.stencil_epitch);
751
} else {
752
state[3] |= S_008F1C_SW_MODE(plane->surface.u.gfx9.swizzle_mode);
753
state[4] |= S_008F20_PITCH(plane->surface.u.gfx9.epitch);
754
}
755
756
state[5] &=
757
C_008F24_META_DATA_ADDRESS & C_008F24_META_PIPE_ALIGNED & C_008F24_META_RB_ALIGNED;
758
if (meta_va) {
759
struct gfx9_surf_meta_flags meta = {
760
.rb_aligned = 1,
761
.pipe_aligned = 1,
762
};
763
764
if (!(plane->surface.flags & RADEON_SURF_Z_OR_SBUFFER))
765
meta = plane->surface.u.gfx9.color.dcc;
766
767
state[5] |= S_008F24_META_DATA_ADDRESS(meta_va >> 40) |
768
S_008F24_META_PIPE_ALIGNED(meta.pipe_aligned) |
769
S_008F24_META_RB_ALIGNED(meta.rb_aligned);
770
}
771
} else {
772
/* GFX6-GFX8 */
773
unsigned pitch = base_level_info->nblk_x * block_width;
774
unsigned index = si_tile_mode_index(plane, base_level, is_stencil);
775
776
state[3] &= C_008F1C_TILING_INDEX;
777
state[3] |= S_008F1C_TILING_INDEX(index);
778
state[4] &= C_008F20_PITCH;
779
state[4] |= S_008F20_PITCH(pitch - 1);
780
}
781
}
782
783
static unsigned
784
radv_tex_dim(VkImageType image_type, VkImageViewType view_type, unsigned nr_layers,
785
unsigned nr_samples, bool is_storage_image, bool gfx9)
786
{
787
if (view_type == VK_IMAGE_VIEW_TYPE_CUBE || view_type == VK_IMAGE_VIEW_TYPE_CUBE_ARRAY)
788
return is_storage_image ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_CUBE;
789
790
/* GFX9 allocates 1D textures as 2D. */
791
if (gfx9 && image_type == VK_IMAGE_TYPE_1D)
792
image_type = VK_IMAGE_TYPE_2D;
793
switch (image_type) {
794
case VK_IMAGE_TYPE_1D:
795
return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_1D_ARRAY : V_008F1C_SQ_RSRC_IMG_1D;
796
case VK_IMAGE_TYPE_2D:
797
if (nr_samples > 1)
798
return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY : V_008F1C_SQ_RSRC_IMG_2D_MSAA;
799
else
800
return nr_layers > 1 ? V_008F1C_SQ_RSRC_IMG_2D_ARRAY : V_008F1C_SQ_RSRC_IMG_2D;
801
case VK_IMAGE_TYPE_3D:
802
if (view_type == VK_IMAGE_VIEW_TYPE_3D)
803
return V_008F1C_SQ_RSRC_IMG_3D;
804
else
805
return V_008F1C_SQ_RSRC_IMG_2D_ARRAY;
806
default:
807
unreachable("illegal image type");
808
}
809
}
810
811
static unsigned
812
gfx9_border_color_swizzle(const struct util_format_description *desc)
813
{
814
unsigned bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
815
816
if (desc->swizzle[3] == PIPE_SWIZZLE_X) {
817
/* For the pre-defined border color values (white, opaque
818
* black, transparent black), the only thing that matters is
819
* that the alpha channel winds up in the correct place
820
* (because the RGB channels are all the same) so either of
821
* these enumerations will work.
822
*/
823
if (desc->swizzle[2] == PIPE_SWIZZLE_Y)
824
bc_swizzle = V_008F20_BC_SWIZZLE_WZYX;
825
else
826
bc_swizzle = V_008F20_BC_SWIZZLE_WXYZ;
827
} else if (desc->swizzle[0] == PIPE_SWIZZLE_X) {
828
if (desc->swizzle[1] == PIPE_SWIZZLE_Y)
829
bc_swizzle = V_008F20_BC_SWIZZLE_XYZW;
830
else
831
bc_swizzle = V_008F20_BC_SWIZZLE_XWYZ;
832
} else if (desc->swizzle[1] == PIPE_SWIZZLE_X) {
833
bc_swizzle = V_008F20_BC_SWIZZLE_YXWZ;
834
} else if (desc->swizzle[2] == PIPE_SWIZZLE_X) {
835
bc_swizzle = V_008F20_BC_SWIZZLE_ZYXW;
836
}
837
838
return bc_swizzle;
839
}
840
841
bool
842
vi_alpha_is_on_msb(struct radv_device *device, VkFormat format)
843
{
844
const struct util_format_description *desc = vk_format_description(format);
845
846
if (device->physical_device->rad_info.chip_class >= GFX10 && desc->nr_channels == 1)
847
return desc->swizzle[3] == PIPE_SWIZZLE_X;
848
849
return radv_translate_colorswap(format, false) <= 1;
850
}
851
/**
852
* Build the sampler view descriptor for a texture (GFX10).
853
*/
854
static void
855
gfx10_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
856
bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
857
const VkComponentMapping *mapping, unsigned first_level,
858
unsigned last_level, unsigned first_layer, unsigned last_layer,
859
unsigned width, unsigned height, unsigned depth, uint32_t *state,
860
uint32_t *fmask_state)
861
{
862
const struct util_format_description *desc;
863
enum pipe_swizzle swizzle[4];
864
unsigned img_format;
865
unsigned type;
866
867
desc = vk_format_description(vk_format);
868
img_format = gfx10_format_table[vk_format_to_pipe_format(vk_format)].img_format;
869
870
radv_compose_swizzle(desc, mapping, swizzle);
871
872
type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
873
is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
874
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
875
height = 1;
876
depth = image->info.array_size;
877
} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
878
if (view_type != VK_IMAGE_VIEW_TYPE_3D)
879
depth = image->info.array_size;
880
} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
881
depth = image->info.array_size / 6;
882
883
state[0] = 0;
884
state[1] = S_00A004_FORMAT(img_format) | S_00A004_WIDTH_LO(width - 1);
885
state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
886
S_00A008_RESOURCE_LEVEL(1);
887
state[3] = S_00A00C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
888
S_00A00C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
889
S_00A00C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
890
S_00A00C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
891
S_00A00C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
892
S_00A00C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
893
: last_level) |
894
S_00A00C_BC_SWIZZLE(gfx9_border_color_swizzle(desc)) | S_00A00C_TYPE(type);
895
/* Depth is the the last accessible layer on gfx9+. The hw doesn't need
896
* to know the total number of layers.
897
*/
898
state[4] = S_00A010_DEPTH(type == V_008F1C_SQ_RSRC_IMG_3D ? depth - 1 : last_layer) |
899
S_00A010_BASE_ARRAY(first_layer);
900
state[5] = S_00A014_ARRAY_PITCH(0) |
901
S_00A014_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
902
: image->info.levels - 1) |
903
S_00A014_PERF_MOD(4);
904
state[6] = 0;
905
state[7] = 0;
906
907
if (radv_dcc_enabled(image, first_level)) {
908
state[6] |= S_00A018_MAX_UNCOMPRESSED_BLOCK_SIZE(V_028C78_MAX_BLOCK_SIZE_256B) |
909
S_00A018_MAX_COMPRESSED_BLOCK_SIZE(
910
image->planes[0].surface.u.gfx9.color.dcc.max_compressed_block_size) |
911
S_00A018_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
912
}
913
914
if (radv_image_get_iterate256(device, image)) {
915
state[6] |= S_00A018_ITERATE_256(1);
916
}
917
918
/* Initialize the sampler view for FMASK. */
919
if (fmask_state) {
920
if (radv_image_has_fmask(image)) {
921
uint64_t gpu_address = radv_buffer_get_va(image->bo);
922
uint32_t format;
923
uint64_t va;
924
925
assert(image->plane_count == 1);
926
927
va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
928
929
switch (image->info.samples) {
930
case 2:
931
format = V_008F0C_GFX10_FORMAT_FMASK8_S2_F2;
932
break;
933
case 4:
934
format = V_008F0C_GFX10_FORMAT_FMASK8_S4_F4;
935
break;
936
case 8:
937
format = V_008F0C_GFX10_FORMAT_FMASK32_S8_F8;
938
break;
939
default:
940
unreachable("invalid nr_samples");
941
}
942
943
fmask_state[0] = (va >> 8) | image->planes[0].surface.fmask_tile_swizzle;
944
fmask_state[1] = S_00A004_BASE_ADDRESS_HI(va >> 40) | S_00A004_FORMAT(format) |
945
S_00A004_WIDTH_LO(width - 1);
946
fmask_state[2] = S_00A008_WIDTH_HI((width - 1) >> 2) | S_00A008_HEIGHT(height - 1) |
947
S_00A008_RESOURCE_LEVEL(1);
948
fmask_state[3] =
949
S_00A00C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
950
S_00A00C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_00A00C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
951
S_00A00C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode) |
952
S_00A00C_TYPE(
953
radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
954
fmask_state[4] = S_00A010_DEPTH(last_layer) | S_00A010_BASE_ARRAY(first_layer);
955
fmask_state[5] = 0;
956
fmask_state[6] = S_00A018_META_PIPE_ALIGNED(1);
957
fmask_state[7] = 0;
958
959
if (radv_image_is_tc_compat_cmask(image)) {
960
va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
961
962
fmask_state[6] |= S_00A018_COMPRESSION_EN(1);
963
fmask_state[6] |= S_00A018_META_DATA_ADDRESS_LO(va >> 8);
964
fmask_state[7] |= va >> 16;
965
}
966
} else
967
memset(fmask_state, 0, 8 * 4);
968
}
969
}
970
971
/**
972
* Build the sampler view descriptor for a texture (SI-GFX9)
973
*/
974
static void
975
si_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
976
bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
977
const VkComponentMapping *mapping, unsigned first_level,
978
unsigned last_level, unsigned first_layer, unsigned last_layer,
979
unsigned width, unsigned height, unsigned depth, uint32_t *state,
980
uint32_t *fmask_state)
981
{
982
const struct util_format_description *desc;
983
enum pipe_swizzle swizzle[4];
984
int first_non_void;
985
unsigned num_format, data_format, type;
986
987
desc = vk_format_description(vk_format);
988
989
radv_compose_swizzle(desc, mapping, swizzle);
990
991
first_non_void = vk_format_get_first_non_void_channel(vk_format);
992
993
num_format = radv_translate_tex_numformat(vk_format, desc, first_non_void);
994
if (num_format == ~0) {
995
num_format = 0;
996
}
997
998
data_format = radv_translate_tex_dataformat(vk_format, desc, first_non_void);
999
if (data_format == ~0) {
1000
data_format = 0;
1001
}
1002
1003
/* S8 with either Z16 or Z32 HTILE need a special format. */
1004
if (device->physical_device->rad_info.chip_class == GFX9 && vk_format == VK_FORMAT_S8_UINT &&
1005
radv_image_is_tc_compat_htile(image)) {
1006
if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT)
1007
data_format = V_008F14_IMG_DATA_FORMAT_S8_32;
1008
else if (image->vk_format == VK_FORMAT_D16_UNORM_S8_UINT)
1009
data_format = V_008F14_IMG_DATA_FORMAT_S8_16;
1010
}
1011
type = radv_tex_dim(image->type, view_type, image->info.array_size, image->info.samples,
1012
is_storage_image, device->physical_device->rad_info.chip_class == GFX9);
1013
if (type == V_008F1C_SQ_RSRC_IMG_1D_ARRAY) {
1014
height = 1;
1015
depth = image->info.array_size;
1016
} else if (type == V_008F1C_SQ_RSRC_IMG_2D_ARRAY || type == V_008F1C_SQ_RSRC_IMG_2D_MSAA_ARRAY) {
1017
if (view_type != VK_IMAGE_VIEW_TYPE_3D)
1018
depth = image->info.array_size;
1019
} else if (type == V_008F1C_SQ_RSRC_IMG_CUBE)
1020
depth = image->info.array_size / 6;
1021
1022
state[0] = 0;
1023
state[1] = (S_008F14_DATA_FORMAT(data_format) | S_008F14_NUM_FORMAT(num_format));
1024
state[2] = (S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1) | S_008F18_PERF_MOD(4));
1025
state[3] = (S_008F1C_DST_SEL_X(radv_map_swizzle(swizzle[0])) |
1026
S_008F1C_DST_SEL_Y(radv_map_swizzle(swizzle[1])) |
1027
S_008F1C_DST_SEL_Z(radv_map_swizzle(swizzle[2])) |
1028
S_008F1C_DST_SEL_W(radv_map_swizzle(swizzle[3])) |
1029
S_008F1C_BASE_LEVEL(image->info.samples > 1 ? 0 : first_level) |
1030
S_008F1C_LAST_LEVEL(image->info.samples > 1 ? util_logbase2(image->info.samples)
1031
: last_level) |
1032
S_008F1C_TYPE(type));
1033
state[4] = 0;
1034
state[5] = S_008F24_BASE_ARRAY(first_layer);
1035
state[6] = 0;
1036
state[7] = 0;
1037
1038
if (device->physical_device->rad_info.chip_class == GFX9) {
1039
unsigned bc_swizzle = gfx9_border_color_swizzle(desc);
1040
1041
/* Depth is the last accessible layer on Gfx9.
1042
* The hw doesn't need to know the total number of layers.
1043
*/
1044
if (type == V_008F1C_SQ_RSRC_IMG_3D)
1045
state[4] |= S_008F20_DEPTH(depth - 1);
1046
else
1047
state[4] |= S_008F20_DEPTH(last_layer);
1048
1049
state[4] |= S_008F20_BC_SWIZZLE(bc_swizzle);
1050
state[5] |= S_008F24_MAX_MIP(image->info.samples > 1 ? util_logbase2(image->info.samples)
1051
: image->info.levels - 1);
1052
} else {
1053
state[3] |= S_008F1C_POW2_PAD(image->info.levels > 1);
1054
state[4] |= S_008F20_DEPTH(depth - 1);
1055
state[5] |= S_008F24_LAST_ARRAY(last_layer);
1056
}
1057
if (!(image->planes[0].surface.flags & RADEON_SURF_Z_OR_SBUFFER) &&
1058
image->planes[0].surface.meta_offset) {
1059
state[6] = S_008F28_ALPHA_IS_ON_MSB(vi_alpha_is_on_msb(device, vk_format));
1060
} else {
1061
/* The last dword is unused by hw. The shader uses it to clear
1062
* bits in the first dword of sampler state.
1063
*/
1064
if (device->physical_device->rad_info.chip_class <= GFX7 && image->info.samples <= 1) {
1065
if (first_level == last_level)
1066
state[7] = C_008F30_MAX_ANISO_RATIO;
1067
else
1068
state[7] = 0xffffffff;
1069
}
1070
}
1071
1072
/* Initialize the sampler view for FMASK. */
1073
if (fmask_state) {
1074
if (radv_image_has_fmask(image)) {
1075
uint32_t fmask_format;
1076
uint64_t gpu_address = radv_buffer_get_va(image->bo);
1077
uint64_t va;
1078
1079
assert(image->plane_count == 1);
1080
1081
va = gpu_address + image->offset + image->planes[0].surface.fmask_offset;
1082
1083
if (device->physical_device->rad_info.chip_class == GFX9) {
1084
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK;
1085
switch (image->info.samples) {
1086
case 2:
1087
num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_2_2;
1088
break;
1089
case 4:
1090
num_format = V_008F14_IMG_NUM_FORMAT_FMASK_8_4_4;
1091
break;
1092
case 8:
1093
num_format = V_008F14_IMG_NUM_FORMAT_FMASK_32_8_8;
1094
break;
1095
default:
1096
unreachable("invalid nr_samples");
1097
}
1098
} else {
1099
switch (image->info.samples) {
1100
case 2:
1101
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S2_F2;
1102
break;
1103
case 4:
1104
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK8_S4_F4;
1105
break;
1106
case 8:
1107
fmask_format = V_008F14_IMG_DATA_FORMAT_FMASK32_S8_F8;
1108
break;
1109
default:
1110
assert(0);
1111
fmask_format = V_008F14_IMG_DATA_FORMAT_INVALID;
1112
}
1113
num_format = V_008F14_IMG_NUM_FORMAT_UINT;
1114
}
1115
1116
fmask_state[0] = va >> 8;
1117
fmask_state[0] |= image->planes[0].surface.fmask_tile_swizzle;
1118
fmask_state[1] = S_008F14_BASE_ADDRESS_HI(va >> 40) | S_008F14_DATA_FORMAT(fmask_format) |
1119
S_008F14_NUM_FORMAT(num_format);
1120
fmask_state[2] = S_008F18_WIDTH(width - 1) | S_008F18_HEIGHT(height - 1);
1121
fmask_state[3] =
1122
S_008F1C_DST_SEL_X(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_Y(V_008F1C_SQ_SEL_X) |
1123
S_008F1C_DST_SEL_Z(V_008F1C_SQ_SEL_X) | S_008F1C_DST_SEL_W(V_008F1C_SQ_SEL_X) |
1124
S_008F1C_TYPE(
1125
radv_tex_dim(image->type, view_type, image->info.array_size, 0, false, false));
1126
fmask_state[4] = 0;
1127
fmask_state[5] = S_008F24_BASE_ARRAY(first_layer);
1128
fmask_state[6] = 0;
1129
fmask_state[7] = 0;
1130
1131
if (device->physical_device->rad_info.chip_class == GFX9) {
1132
fmask_state[3] |= S_008F1C_SW_MODE(image->planes[0].surface.u.gfx9.color.fmask_swizzle_mode);
1133
fmask_state[4] |= S_008F20_DEPTH(last_layer) |
1134
S_008F20_PITCH(image->planes[0].surface.u.gfx9.color.fmask_epitch);
1135
fmask_state[5] |= S_008F24_META_PIPE_ALIGNED(1) | S_008F24_META_RB_ALIGNED(1);
1136
1137
if (radv_image_is_tc_compat_cmask(image)) {
1138
va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1139
1140
fmask_state[5] |= S_008F24_META_DATA_ADDRESS(va >> 40);
1141
fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1142
fmask_state[7] |= va >> 8;
1143
}
1144
} else {
1145
fmask_state[3] |=
1146
S_008F1C_TILING_INDEX(image->planes[0].surface.u.legacy.color.fmask.tiling_index);
1147
fmask_state[4] |=
1148
S_008F20_DEPTH(depth - 1) |
1149
S_008F20_PITCH(image->planes[0].surface.u.legacy.color.fmask.pitch_in_pixels - 1);
1150
fmask_state[5] |= S_008F24_LAST_ARRAY(last_layer);
1151
1152
if (radv_image_is_tc_compat_cmask(image)) {
1153
va = gpu_address + image->offset + image->planes[0].surface.cmask_offset;
1154
1155
fmask_state[6] |= S_008F28_COMPRESSION_EN(1);
1156
fmask_state[7] |= va >> 8;
1157
}
1158
}
1159
} else
1160
memset(fmask_state, 0, 8 * 4);
1161
}
1162
}
1163
1164
static void
1165
radv_make_texture_descriptor(struct radv_device *device, struct radv_image *image,
1166
bool is_storage_image, VkImageViewType view_type, VkFormat vk_format,
1167
const VkComponentMapping *mapping, unsigned first_level,
1168
unsigned last_level, unsigned first_layer, unsigned last_layer,
1169
unsigned width, unsigned height, unsigned depth, uint32_t *state,
1170
uint32_t *fmask_state)
1171
{
1172
if (device->physical_device->rad_info.chip_class >= GFX10) {
1173
gfx10_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1174
first_level, last_level, first_layer, last_layer, width, height,
1175
depth, state, fmask_state);
1176
} else {
1177
si_make_texture_descriptor(device, image, is_storage_image, view_type, vk_format, mapping,
1178
first_level, last_level, first_layer, last_layer, width, height,
1179
depth, state, fmask_state);
1180
}
1181
}
1182
1183
static void
1184
radv_query_opaque_metadata(struct radv_device *device, struct radv_image *image,
1185
struct radeon_bo_metadata *md)
1186
{
1187
static const VkComponentMapping fixedmapping;
1188
uint32_t desc[8];
1189
1190
assert(image->plane_count == 1);
1191
1192
radv_make_texture_descriptor(device, image, false, (VkImageViewType)image->type,
1193
image->vk_format, &fixedmapping, 0, image->info.levels - 1, 0,
1194
image->info.array_size - 1, image->info.width, image->info.height,
1195
image->info.depth, desc, NULL);
1196
1197
si_set_mutable_tex_desc_fields(device, image, &image->planes[0].surface.u.legacy.level[0], 0, 0,
1198
0, image->planes[0].surface.blk_w, false, false, false, false,
1199
desc);
1200
1201
ac_surface_get_umd_metadata(&device->physical_device->rad_info, &image->planes[0].surface,
1202
image->info.levels, desc, &md->size_metadata, md->metadata);
1203
}
1204
1205
void
1206
radv_init_metadata(struct radv_device *device, struct radv_image *image,
1207
struct radeon_bo_metadata *metadata)
1208
{
1209
struct radeon_surf *surface = &image->planes[0].surface;
1210
1211
memset(metadata, 0, sizeof(*metadata));
1212
1213
if (device->physical_device->rad_info.chip_class >= GFX9) {
1214
uint64_t dcc_offset =
1215
image->offset +
1216
(surface->display_dcc_offset ? surface->display_dcc_offset : surface->meta_offset);
1217
metadata->u.gfx9.swizzle_mode = surface->u.gfx9.swizzle_mode;
1218
metadata->u.gfx9.dcc_offset_256b = dcc_offset >> 8;
1219
metadata->u.gfx9.dcc_pitch_max = surface->u.gfx9.color.display_dcc_pitch_max;
1220
metadata->u.gfx9.dcc_independent_64b_blocks = surface->u.gfx9.color.dcc.independent_64B_blocks;
1221
metadata->u.gfx9.dcc_independent_128b_blocks = surface->u.gfx9.color.dcc.independent_128B_blocks;
1222
metadata->u.gfx9.dcc_max_compressed_block_size =
1223
surface->u.gfx9.color.dcc.max_compressed_block_size;
1224
metadata->u.gfx9.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1225
} else {
1226
metadata->u.legacy.microtile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_1D
1227
? RADEON_LAYOUT_TILED
1228
: RADEON_LAYOUT_LINEAR;
1229
metadata->u.legacy.macrotile = surface->u.legacy.level[0].mode >= RADEON_SURF_MODE_2D
1230
? RADEON_LAYOUT_TILED
1231
: RADEON_LAYOUT_LINEAR;
1232
metadata->u.legacy.pipe_config = surface->u.legacy.pipe_config;
1233
metadata->u.legacy.bankw = surface->u.legacy.bankw;
1234
metadata->u.legacy.bankh = surface->u.legacy.bankh;
1235
metadata->u.legacy.tile_split = surface->u.legacy.tile_split;
1236
metadata->u.legacy.mtilea = surface->u.legacy.mtilea;
1237
metadata->u.legacy.num_banks = surface->u.legacy.num_banks;
1238
metadata->u.legacy.stride = surface->u.legacy.level[0].nblk_x * surface->bpe;
1239
metadata->u.legacy.scanout = (surface->flags & RADEON_SURF_SCANOUT) != 0;
1240
}
1241
radv_query_opaque_metadata(device, image, metadata);
1242
}
1243
1244
void
1245
radv_image_override_offset_stride(struct radv_device *device, struct radv_image *image,
1246
uint64_t offset, uint32_t stride)
1247
{
1248
ac_surface_override_offset_stride(&device->physical_device->rad_info, &image->planes[0].surface,
1249
image->info.levels, offset, stride);
1250
}
1251
1252
static void
1253
radv_image_alloc_single_sample_cmask(const struct radv_device *device,
1254
const struct radv_image *image, struct radeon_surf *surf)
1255
{
1256
if (!surf->cmask_size || surf->cmask_offset || surf->bpe > 8 || image->info.levels > 1 ||
1257
image->info.depth > 1 || radv_image_has_dcc(image) ||
1258
!radv_image_use_fast_clear_for_image(device, image) ||
1259
(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT))
1260
return;
1261
1262
assert(image->info.storage_samples == 1);
1263
1264
surf->cmask_offset = align64(surf->total_size, 1 << surf->cmask_alignment_log2);
1265
surf->total_size = surf->cmask_offset + surf->cmask_size;
1266
surf->alignment_log2 = MAX2(surf->alignment_log2, surf->cmask_alignment_log2);
1267
}
1268
1269
static void
1270
radv_image_alloc_values(const struct radv_device *device, struct radv_image *image)
1271
{
1272
/* images with modifiers can be potentially imported */
1273
if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT)
1274
return;
1275
1276
if (radv_image_has_cmask(image) || radv_image_has_dcc(image)) {
1277
image->fce_pred_offset = image->size;
1278
image->size += 8 * image->info.levels;
1279
}
1280
1281
if (radv_image_use_dcc_predication(device, image)) {
1282
image->dcc_pred_offset = image->size;
1283
image->size += 8 * image->info.levels;
1284
}
1285
1286
if (radv_image_has_dcc(image) || radv_image_has_cmask(image) || radv_image_has_htile(image)) {
1287
image->clear_value_offset = image->size;
1288
image->size += 8 * image->info.levels;
1289
}
1290
1291
if (radv_image_is_tc_compat_htile(image) &&
1292
device->physical_device->rad_info.has_tc_compat_zrange_bug) {
1293
/* Metadata for the TC-compatible HTILE hardware bug which
1294
* have to be fixed by updating ZRANGE_PRECISION when doing
1295
* fast depth clears to 0.0f.
1296
*/
1297
image->tc_compat_zrange_offset = image->size;
1298
image->size += image->info.levels * 4;
1299
}
1300
}
1301
1302
/* Determine if the image is affected by the pipe misaligned metadata issue
1303
* which requires to invalidate L2.
1304
*/
1305
static bool
1306
radv_image_is_pipe_misaligned(const struct radv_device *device, const struct radv_image *image)
1307
{
1308
struct radeon_info *rad_info = &device->physical_device->rad_info;
1309
int log2_samples = util_logbase2(image->info.samples);
1310
1311
assert(rad_info->chip_class >= GFX10);
1312
1313
for (unsigned i = 0; i < image->plane_count; ++i) {
1314
VkFormat fmt = vk_format_get_plane_format(image->vk_format, i);
1315
int log2_bpp = util_logbase2(vk_format_get_blocksize(fmt));
1316
int log2_bpp_and_samples;
1317
1318
if (rad_info->chip_class >= GFX10_3) {
1319
log2_bpp_and_samples = log2_bpp + log2_samples;
1320
} else {
1321
if (vk_format_has_depth(image->vk_format) && image->info.array_size >= 8) {
1322
log2_bpp = 2;
1323
}
1324
1325
log2_bpp_and_samples = MIN2(6, log2_bpp + log2_samples);
1326
}
1327
1328
int num_pipes = G_0098F8_NUM_PIPES(rad_info->gb_addr_config);
1329
int overlap = MAX2(0, log2_bpp_and_samples + num_pipes - 8);
1330
1331
if (vk_format_has_depth(image->vk_format)) {
1332
if (radv_image_is_tc_compat_htile(image) && overlap) {
1333
return true;
1334
}
1335
} else {
1336
int max_compressed_frags = G_0098F8_MAX_COMPRESSED_FRAGS(rad_info->gb_addr_config);
1337
int log2_samples_frag_diff = MAX2(0, log2_samples - max_compressed_frags);
1338
int samples_overlap = MIN2(log2_samples, overlap);
1339
1340
/* TODO: It shouldn't be necessary if the image has DCC but
1341
* not readable by shader.
1342
*/
1343
if ((radv_image_has_dcc(image) || radv_image_is_tc_compat_cmask(image)) &&
1344
(samples_overlap > log2_samples_frag_diff)) {
1345
return true;
1346
}
1347
}
1348
}
1349
1350
return false;
1351
}
1352
1353
static bool
1354
radv_image_is_l2_coherent(const struct radv_device *device, const struct radv_image *image)
1355
{
1356
if (device->physical_device->rad_info.chip_class >= GFX10) {
1357
return !device->physical_device->rad_info.tcc_rb_non_coherent &&
1358
!radv_image_is_pipe_misaligned(device, image);
1359
} else if (device->physical_device->rad_info.chip_class == GFX9) {
1360
if (image->info.samples == 1 &&
1361
(image->usage &
1362
(VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT)) &&
1363
!vk_format_has_stencil(image->vk_format)) {
1364
/* Single-sample color and single-sample depth
1365
* (not stencil) are coherent with shaders on
1366
* GFX9.
1367
*/
1368
return true;
1369
}
1370
}
1371
1372
return false;
1373
}
1374
1375
static void
1376
radv_image_reset_layout(struct radv_image *image)
1377
{
1378
image->size = 0;
1379
image->alignment = 1;
1380
1381
image->tc_compatible_cmask = 0;
1382
image->fce_pred_offset = image->dcc_pred_offset = 0;
1383
image->clear_value_offset = image->tc_compat_zrange_offset = 0;
1384
1385
for (unsigned i = 0; i < image->plane_count; ++i) {
1386
VkFormat format = vk_format_get_plane_format(image->vk_format, i);
1387
1388
uint64_t flags = image->planes[i].surface.flags;
1389
uint64_t modifier = image->planes[i].surface.modifier;
1390
memset(image->planes + i, 0, sizeof(image->planes[i]));
1391
1392
image->planes[i].surface.flags = flags;
1393
image->planes[i].surface.modifier = modifier;
1394
image->planes[i].surface.blk_w = vk_format_get_blockwidth(format);
1395
image->planes[i].surface.blk_h = vk_format_get_blockheight(format);
1396
image->planes[i].surface.bpe = vk_format_get_blocksize(vk_format_depth_only(format));
1397
1398
/* align byte per element on dword */
1399
if (image->planes[i].surface.bpe == 3) {
1400
image->planes[i].surface.bpe = 4;
1401
}
1402
}
1403
}
1404
1405
VkResult
1406
radv_image_create_layout(struct radv_device *device, struct radv_image_create_info create_info,
1407
const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *mod_info,
1408
struct radv_image *image)
1409
{
1410
/* Clear the pCreateInfo pointer so we catch issues in the delayed case when we test in the
1411
* common internal case. */
1412
create_info.vk_info = NULL;
1413
1414
struct ac_surf_info image_info = image->info;
1415
VkResult result = radv_patch_image_from_extra_info(device, image, &create_info, &image_info);
1416
if (result != VK_SUCCESS)
1417
return result;
1418
1419
assert(!mod_info || mod_info->drmFormatModifierPlaneCount >= image->plane_count);
1420
1421
radv_image_reset_layout(image);
1422
1423
for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1424
struct ac_surf_info info = image_info;
1425
uint64_t offset;
1426
unsigned stride;
1427
1428
info.width = vk_format_get_plane_width(image->vk_format, plane, info.width);
1429
info.height = vk_format_get_plane_height(image->vk_format, plane, info.height);
1430
1431
if (create_info.no_metadata_planes || image->plane_count > 1) {
1432
image->planes[plane].surface.flags |=
1433
RADEON_SURF_DISABLE_DCC | RADEON_SURF_NO_FMASK | RADEON_SURF_NO_HTILE;
1434
}
1435
1436
device->ws->surface_init(device->ws, &info, &image->planes[plane].surface);
1437
1438
if (create_info.bo_metadata && !mod_info &&
1439
!ac_surface_set_umd_metadata(&device->physical_device->rad_info,
1440
&image->planes[plane].surface, image_info.storage_samples,
1441
image_info.levels, create_info.bo_metadata->size_metadata,
1442
create_info.bo_metadata->metadata))
1443
return VK_ERROR_INVALID_EXTERNAL_HANDLE;
1444
1445
if (!create_info.no_metadata_planes && !create_info.bo_metadata && image->plane_count == 1 &&
1446
!mod_info)
1447
radv_image_alloc_single_sample_cmask(device, image, &image->planes[plane].surface);
1448
1449
if (mod_info) {
1450
if (mod_info->pPlaneLayouts[plane].rowPitch % image->planes[plane].surface.bpe ||
1451
!mod_info->pPlaneLayouts[plane].rowPitch)
1452
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1453
1454
offset = mod_info->pPlaneLayouts[plane].offset;
1455
stride = mod_info->pPlaneLayouts[plane].rowPitch / image->planes[plane].surface.bpe;
1456
} else {
1457
offset = align64(image->size, 1 << image->planes[plane].surface.alignment_log2);
1458
stride = 0; /* 0 means no override */
1459
}
1460
1461
if (!ac_surface_override_offset_stride(&device->physical_device->rad_info,
1462
&image->planes[plane].surface, image->info.levels,
1463
offset, stride))
1464
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1465
1466
/* Validate DCC offsets in modifier layout. */
1467
if (image->plane_count == 1 && mod_info) {
1468
unsigned mem_planes = ac_surface_get_nplanes(&image->planes[plane].surface);
1469
if (mod_info->drmFormatModifierPlaneCount != mem_planes)
1470
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1471
1472
for (unsigned i = 1; i < mem_planes; ++i) {
1473
if (ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1474
&image->planes[plane].surface, i,
1475
0) != mod_info->pPlaneLayouts[i].offset)
1476
return VK_ERROR_INVALID_DRM_FORMAT_MODIFIER_PLANE_LAYOUT_EXT;
1477
}
1478
}
1479
1480
image->size = MAX2(image->size, offset + image->planes[plane].surface.total_size);
1481
image->alignment = MAX2(image->alignment, 1 << image->planes[plane].surface.alignment_log2);
1482
1483
image->planes[plane].format = vk_format_get_plane_format(image->vk_format, plane);
1484
}
1485
1486
image->tc_compatible_cmask =
1487
radv_image_has_cmask(image) && radv_use_tc_compat_cmask_for_image(device, image);
1488
1489
radv_image_alloc_values(device, image);
1490
1491
assert(image->planes[0].surface.surf_size);
1492
assert(image->planes[0].surface.modifier == DRM_FORMAT_MOD_INVALID ||
1493
ac_modifier_has_dcc(image->planes[0].surface.modifier) == radv_image_has_dcc(image));
1494
return VK_SUCCESS;
1495
}
1496
1497
static void
1498
radv_destroy_image(struct radv_device *device, const VkAllocationCallbacks *pAllocator,
1499
struct radv_image *image)
1500
{
1501
if ((image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) && image->bo)
1502
device->ws->buffer_destroy(device->ws, image->bo);
1503
1504
if (image->owned_memory != VK_NULL_HANDLE) {
1505
RADV_FROM_HANDLE(radv_device_memory, mem, image->owned_memory);
1506
radv_free_memory(device, pAllocator, mem);
1507
}
1508
1509
vk_object_base_finish(&image->base);
1510
vk_free2(&device->vk.alloc, pAllocator, image);
1511
}
1512
1513
static void
1514
radv_image_print_info(struct radv_device *device, struct radv_image *image)
1515
{
1516
fprintf(stderr, "Image:\n");
1517
fprintf(stderr,
1518
" Info: size=%" PRIu64 ", alignment=%" PRIu32 ", "
1519
"width=%" PRIu32 ", height=%" PRIu32 ", "
1520
"offset=%" PRIu64 ", array_size=%" PRIu32 "\n",
1521
image->size, image->alignment, image->info.width, image->info.height, image->offset,
1522
image->info.array_size);
1523
for (unsigned i = 0; i < image->plane_count; ++i) {
1524
const struct radv_image_plane *plane = &image->planes[i];
1525
const struct radeon_surf *surf = &plane->surface;
1526
const struct util_format_description *desc = vk_format_description(plane->format);
1527
uint64_t offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
1528
&plane->surface, 0, 0);
1529
1530
fprintf(stderr, " Plane[%u]: vkformat=%s, offset=%" PRIu64 "\n", i, desc->name, offset);
1531
1532
ac_surface_print_info(stderr, &device->physical_device->rad_info, surf);
1533
}
1534
}
1535
1536
/**
1537
* Determine if the given image can be fast cleared.
1538
*/
1539
static bool
1540
radv_image_can_fast_clear(const struct radv_device *device, const struct radv_image *image)
1541
{
1542
if (device->instance->debug_flags & RADV_DEBUG_NO_FAST_CLEARS)
1543
return false;
1544
1545
if (vk_format_is_color(image->vk_format)) {
1546
if (!radv_image_has_cmask(image) && !radv_image_has_dcc(image))
1547
return false;
1548
1549
/* RB+ doesn't work with CMASK fast clear on Stoney. */
1550
if (!radv_image_has_dcc(image) && device->physical_device->rad_info.family == CHIP_STONEY)
1551
return false;
1552
} else {
1553
if (!radv_image_has_htile(image))
1554
return false;
1555
}
1556
1557
/* Do not fast clears 3D images. */
1558
if (image->type == VK_IMAGE_TYPE_3D)
1559
return false;
1560
1561
return true;
1562
}
1563
1564
static uint64_t
1565
radv_select_modifier(const struct radv_device *dev, VkFormat format,
1566
const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list)
1567
{
1568
const struct radv_physical_device *pdev = dev->physical_device;
1569
unsigned mod_count;
1570
1571
assert(mod_list->drmFormatModifierCount);
1572
1573
/* We can allow everything here as it does not affect order and the application
1574
* is only allowed to specify modifiers that we support. */
1575
const struct ac_modifier_options modifier_options = {
1576
.dcc = true,
1577
.dcc_retile = true,
1578
};
1579
1580
ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1581
&mod_count, NULL);
1582
1583
uint64_t *mods = calloc(mod_count, sizeof(*mods));
1584
1585
/* If allocations fail, fall back to a dumber solution. */
1586
if (!mods)
1587
return mod_list->pDrmFormatModifiers[0];
1588
1589
ac_get_supported_modifiers(&pdev->rad_info, &modifier_options, vk_format_to_pipe_format(format),
1590
&mod_count, mods);
1591
1592
for (unsigned i = 0; i < mod_count; ++i) {
1593
for (uint32_t j = 0; j < mod_list->drmFormatModifierCount; ++j) {
1594
if (mods[i] == mod_list->pDrmFormatModifiers[j]) {
1595
free(mods);
1596
return mod_list->pDrmFormatModifiers[j];
1597
}
1598
}
1599
}
1600
unreachable("App specified an invalid modifier");
1601
}
1602
1603
VkResult
1604
radv_image_create(VkDevice _device, const struct radv_image_create_info *create_info,
1605
const VkAllocationCallbacks *alloc, VkImage *pImage)
1606
{
1607
RADV_FROM_HANDLE(radv_device, device, _device);
1608
const VkImageCreateInfo *pCreateInfo = create_info->vk_info;
1609
uint64_t modifier = DRM_FORMAT_MOD_INVALID;
1610
struct radv_image *image = NULL;
1611
VkFormat format = radv_select_android_external_format(pCreateInfo->pNext, pCreateInfo->format);
1612
const struct VkImageDrmFormatModifierListCreateInfoEXT *mod_list =
1613
vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_LIST_CREATE_INFO_EXT);
1614
const struct VkImageDrmFormatModifierExplicitCreateInfoEXT *explicit_mod =
1615
vk_find_struct_const(pCreateInfo->pNext, IMAGE_DRM_FORMAT_MODIFIER_EXPLICIT_CREATE_INFO_EXT);
1616
assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO);
1617
1618
const unsigned plane_count = vk_format_get_plane_count(format);
1619
const size_t image_struct_size = sizeof(*image) + sizeof(struct radv_image_plane) * plane_count;
1620
1621
radv_assert(pCreateInfo->mipLevels > 0);
1622
radv_assert(pCreateInfo->arrayLayers > 0);
1623
radv_assert(pCreateInfo->samples > 0);
1624
radv_assert(pCreateInfo->extent.width > 0);
1625
radv_assert(pCreateInfo->extent.height > 0);
1626
radv_assert(pCreateInfo->extent.depth > 0);
1627
1628
image =
1629
vk_zalloc2(&device->vk.alloc, alloc, image_struct_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
1630
if (!image)
1631
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
1632
1633
vk_object_base_init(&device->vk, &image->base, VK_OBJECT_TYPE_IMAGE);
1634
1635
image->type = pCreateInfo->imageType;
1636
image->info.width = pCreateInfo->extent.width;
1637
image->info.height = pCreateInfo->extent.height;
1638
image->info.depth = pCreateInfo->extent.depth;
1639
image->info.samples = pCreateInfo->samples;
1640
image->info.storage_samples = pCreateInfo->samples;
1641
image->info.array_size = pCreateInfo->arrayLayers;
1642
image->info.levels = pCreateInfo->mipLevels;
1643
image->info.num_channels = vk_format_get_nr_components(format);
1644
1645
image->vk_format = format;
1646
image->tiling = pCreateInfo->tiling;
1647
image->usage = pCreateInfo->usage;
1648
image->flags = pCreateInfo->flags;
1649
image->plane_count = plane_count;
1650
1651
image->exclusive = pCreateInfo->sharingMode == VK_SHARING_MODE_EXCLUSIVE;
1652
if (pCreateInfo->sharingMode == VK_SHARING_MODE_CONCURRENT) {
1653
for (uint32_t i = 0; i < pCreateInfo->queueFamilyIndexCount; ++i)
1654
if (pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_EXTERNAL ||
1655
pCreateInfo->pQueueFamilyIndices[i] == VK_QUEUE_FAMILY_FOREIGN_EXT)
1656
image->queue_family_mask |= (1u << RADV_MAX_QUEUE_FAMILIES) - 1u;
1657
else
1658
image->queue_family_mask |= 1u << pCreateInfo->pQueueFamilyIndices[i];
1659
}
1660
1661
const VkExternalMemoryImageCreateInfo *external_info =
1662
vk_find_struct_const(pCreateInfo->pNext, EXTERNAL_MEMORY_IMAGE_CREATE_INFO);
1663
1664
image->shareable = external_info;
1665
if (!vk_format_is_depth_or_stencil(format) && !image->shareable &&
1666
!(image->flags & VK_IMAGE_CREATE_SPARSE_ALIASED_BIT) &&
1667
pCreateInfo->tiling != VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
1668
image->info.surf_index = &device->image_mrt_offset_counter;
1669
}
1670
1671
if (mod_list)
1672
modifier = radv_select_modifier(device, format, mod_list);
1673
else if (explicit_mod)
1674
modifier = explicit_mod->drmFormatModifier;
1675
1676
for (unsigned plane = 0; plane < image->plane_count; ++plane) {
1677
image->planes[plane].surface.flags =
1678
radv_get_surface_flags(device, image, plane, pCreateInfo, format);
1679
image->planes[plane].surface.modifier = modifier;
1680
}
1681
1682
bool delay_layout =
1683
external_info && (external_info->handleTypes &
1684
VK_EXTERNAL_MEMORY_HANDLE_TYPE_ANDROID_HARDWARE_BUFFER_BIT_ANDROID);
1685
1686
if (delay_layout) {
1687
*pImage = radv_image_to_handle(image);
1688
assert(!(image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT));
1689
return VK_SUCCESS;
1690
}
1691
1692
VkResult result = radv_image_create_layout(device, *create_info, explicit_mod, image);
1693
if (result != VK_SUCCESS) {
1694
radv_destroy_image(device, alloc, image);
1695
return result;
1696
}
1697
1698
if (image->flags & VK_IMAGE_CREATE_SPARSE_BINDING_BIT) {
1699
image->alignment = MAX2(image->alignment, 4096);
1700
image->size = align64(image->size, image->alignment);
1701
image->offset = 0;
1702
1703
result =
1704
device->ws->buffer_create(device->ws, image->size, image->alignment, 0,
1705
RADEON_FLAG_VIRTUAL, RADV_BO_PRIORITY_VIRTUAL, 0, &image->bo);
1706
if (result != VK_SUCCESS) {
1707
radv_destroy_image(device, alloc, image);
1708
return vk_error(device->instance, result);
1709
}
1710
}
1711
image->l2_coherent = radv_image_is_l2_coherent(device, image);
1712
1713
if (device->instance->debug_flags & RADV_DEBUG_IMG) {
1714
radv_image_print_info(device, image);
1715
}
1716
1717
*pImage = radv_image_to_handle(image);
1718
1719
return VK_SUCCESS;
1720
}
1721
1722
static void
1723
radv_image_view_make_descriptor(struct radv_image_view *iview, struct radv_device *device,
1724
VkFormat vk_format, const VkComponentMapping *components,
1725
bool is_storage_image, bool disable_compression,
1726
bool enable_compression, unsigned plane_id,
1727
unsigned descriptor_plane_id)
1728
{
1729
struct radv_image *image = iview->image;
1730
struct radv_image_plane *plane = &image->planes[plane_id];
1731
bool is_stencil = iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT;
1732
uint32_t blk_w;
1733
union radv_descriptor *descriptor;
1734
uint32_t hw_level = 0;
1735
1736
if (is_storage_image) {
1737
descriptor = &iview->storage_descriptor;
1738
} else {
1739
descriptor = &iview->descriptor;
1740
}
1741
1742
assert(vk_format_get_plane_count(vk_format) == 1);
1743
assert(plane->surface.blk_w % vk_format_get_blockwidth(plane->format) == 0);
1744
blk_w = plane->surface.blk_w / vk_format_get_blockwidth(plane->format) *
1745
vk_format_get_blockwidth(vk_format);
1746
1747
if (device->physical_device->rad_info.chip_class >= GFX9)
1748
hw_level = iview->base_mip;
1749
radv_make_texture_descriptor(
1750
device, image, is_storage_image, iview->type, vk_format, components, hw_level,
1751
hw_level + iview->level_count - 1, iview->base_layer,
1752
iview->base_layer + iview->layer_count - 1,
1753
vk_format_get_plane_width(image->vk_format, plane_id, iview->extent.width),
1754
vk_format_get_plane_height(image->vk_format, plane_id, iview->extent.height),
1755
iview->extent.depth, descriptor->plane_descriptors[descriptor_plane_id],
1756
descriptor_plane_id ? NULL : descriptor->fmask_descriptor);
1757
1758
const struct legacy_surf_level *base_level_info = NULL;
1759
if (device->physical_device->rad_info.chip_class <= GFX9) {
1760
if (is_stencil)
1761
base_level_info = &plane->surface.u.legacy.zs.stencil_level[iview->base_mip];
1762
else
1763
base_level_info = &plane->surface.u.legacy.level[iview->base_mip];
1764
}
1765
1766
bool enable_write_compression = radv_image_use_dcc_image_stores(device, image);
1767
if (is_storage_image && !(enable_write_compression || enable_compression))
1768
disable_compression = true;
1769
si_set_mutable_tex_desc_fields(device, image, base_level_info, plane_id, iview->base_mip,
1770
iview->base_mip, blk_w, is_stencil, is_storage_image,
1771
disable_compression, enable_write_compression,
1772
descriptor->plane_descriptors[descriptor_plane_id]);
1773
}
1774
1775
static unsigned
1776
radv_plane_from_aspect(VkImageAspectFlags mask)
1777
{
1778
switch (mask) {
1779
case VK_IMAGE_ASPECT_PLANE_1_BIT:
1780
case VK_IMAGE_ASPECT_MEMORY_PLANE_1_BIT_EXT:
1781
return 1;
1782
case VK_IMAGE_ASPECT_PLANE_2_BIT:
1783
case VK_IMAGE_ASPECT_MEMORY_PLANE_2_BIT_EXT:
1784
return 2;
1785
case VK_IMAGE_ASPECT_MEMORY_PLANE_3_BIT_EXT:
1786
return 3;
1787
default:
1788
return 0;
1789
}
1790
}
1791
1792
VkFormat
1793
radv_get_aspect_format(struct radv_image *image, VkImageAspectFlags mask)
1794
{
1795
switch (mask) {
1796
case VK_IMAGE_ASPECT_PLANE_0_BIT:
1797
return image->planes[0].format;
1798
case VK_IMAGE_ASPECT_PLANE_1_BIT:
1799
return image->planes[1].format;
1800
case VK_IMAGE_ASPECT_PLANE_2_BIT:
1801
return image->planes[2].format;
1802
case VK_IMAGE_ASPECT_STENCIL_BIT:
1803
return vk_format_stencil_only(image->vk_format);
1804
case VK_IMAGE_ASPECT_DEPTH_BIT:
1805
return vk_format_depth_only(image->vk_format);
1806
case VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT:
1807
return vk_format_depth_only(image->vk_format);
1808
default:
1809
return image->vk_format;
1810
}
1811
}
1812
1813
/**
1814
* Determine if the given image view can be fast cleared.
1815
*/
1816
static bool
1817
radv_image_view_can_fast_clear(const struct radv_device *device,
1818
const struct radv_image_view *iview)
1819
{
1820
struct radv_image *image;
1821
1822
if (!iview)
1823
return false;
1824
image = iview->image;
1825
1826
/* Only fast clear if the image itself can be fast cleared. */
1827
if (!radv_image_can_fast_clear(device, image))
1828
return false;
1829
1830
/* Only fast clear if all layers are bound. */
1831
if (iview->base_layer > 0 || iview->layer_count != image->info.array_size)
1832
return false;
1833
1834
/* Only fast clear if the view covers the whole image. */
1835
if (!radv_image_extent_compare(image, &iview->extent))
1836
return false;
1837
1838
return true;
1839
}
1840
1841
void
1842
radv_image_view_init(struct radv_image_view *iview, struct radv_device *device,
1843
const VkImageViewCreateInfo *pCreateInfo,
1844
const struct radv_image_view_extra_create_info *extra_create_info)
1845
{
1846
RADV_FROM_HANDLE(radv_image, image, pCreateInfo->image);
1847
const VkImageSubresourceRange *range = &pCreateInfo->subresourceRange;
1848
uint32_t plane_count = 1;
1849
1850
switch (image->type) {
1851
case VK_IMAGE_TYPE_1D:
1852
case VK_IMAGE_TYPE_2D:
1853
assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
1854
image->info.array_size);
1855
break;
1856
case VK_IMAGE_TYPE_3D:
1857
assert(range->baseArrayLayer + radv_get_layerCount(image, range) - 1 <=
1858
radv_minify(image->info.depth, range->baseMipLevel));
1859
break;
1860
default:
1861
unreachable("bad VkImageType");
1862
}
1863
iview->image = image;
1864
iview->type = pCreateInfo->viewType;
1865
iview->plane_id = radv_plane_from_aspect(pCreateInfo->subresourceRange.aspectMask);
1866
iview->aspect_mask = pCreateInfo->subresourceRange.aspectMask;
1867
iview->base_layer = range->baseArrayLayer;
1868
iview->layer_count = radv_get_layerCount(image, range);
1869
iview->base_mip = range->baseMipLevel;
1870
iview->level_count = radv_get_levelCount(image, range);
1871
1872
iview->vk_format = pCreateInfo->format;
1873
1874
/* If the image has an Android external format, pCreateInfo->format will be
1875
* VK_FORMAT_UNDEFINED. */
1876
if (iview->vk_format == VK_FORMAT_UNDEFINED)
1877
iview->vk_format = image->vk_format;
1878
1879
if (iview->aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1880
iview->vk_format = vk_format_stencil_only(iview->vk_format);
1881
} else if (iview->aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT) {
1882
iview->vk_format = vk_format_depth_only(iview->vk_format);
1883
}
1884
1885
if (device->physical_device->rad_info.chip_class >= GFX9) {
1886
iview->extent = (VkExtent3D){
1887
.width = image->info.width,
1888
.height = image->info.height,
1889
.depth = image->info.depth,
1890
};
1891
} else {
1892
iview->extent = (VkExtent3D){
1893
.width = radv_minify(image->info.width, range->baseMipLevel),
1894
.height = radv_minify(image->info.height, range->baseMipLevel),
1895
.depth = radv_minify(image->info.depth, range->baseMipLevel),
1896
};
1897
}
1898
1899
if (iview->vk_format != image->planes[iview->plane_id].format) {
1900
unsigned view_bw = vk_format_get_blockwidth(iview->vk_format);
1901
unsigned view_bh = vk_format_get_blockheight(iview->vk_format);
1902
unsigned img_bw = vk_format_get_blockwidth(image->vk_format);
1903
unsigned img_bh = vk_format_get_blockheight(image->vk_format);
1904
1905
iview->extent.width = round_up_u32(iview->extent.width * view_bw, img_bw);
1906
iview->extent.height = round_up_u32(iview->extent.height * view_bh, img_bh);
1907
1908
/* Comment ported from amdvlk -
1909
* If we have the following image:
1910
* Uncompressed pixels Compressed block sizes (4x4)
1911
* mip0: 22 x 22 6 x 6
1912
* mip1: 11 x 11 3 x 3
1913
* mip2: 5 x 5 2 x 2
1914
* mip3: 2 x 2 1 x 1
1915
* mip4: 1 x 1 1 x 1
1916
*
1917
* On GFX9 the descriptor is always programmed with the WIDTH and HEIGHT of the base level and
1918
* the HW is calculating the degradation of the block sizes down the mip-chain as follows
1919
* (straight-up divide-by-two integer math): mip0: 6x6 mip1: 3x3 mip2: 1x1 mip3: 1x1
1920
*
1921
* This means that mip2 will be missing texels.
1922
*
1923
* Fix this by calculating the base mip's width and height, then convert
1924
* that, and round it back up to get the level 0 size. Clamp the
1925
* converted size between the original values, and the physical extent
1926
* of the base mipmap.
1927
*
1928
* On GFX10 we have to take care to not go over the physical extent
1929
* of the base mipmap as otherwise the GPU computes a different layout.
1930
* Note that the GPU does use the same base-mip dimensions for both a
1931
* block compatible format and the compressed format, so even if we take
1932
* the plain converted dimensions the physical layout is correct.
1933
*/
1934
if (device->physical_device->rad_info.chip_class >= GFX9 &&
1935
vk_format_is_compressed(image->vk_format) && !vk_format_is_compressed(iview->vk_format)) {
1936
/* If we have multiple levels in the view we should ideally take the last level,
1937
* but the mip calculation has a max(..., 1) so walking back to the base mip in an
1938
* useful way is hard. */
1939
if (iview->level_count > 1) {
1940
iview->extent.width = iview->image->planes[0].surface.u.gfx9.base_mip_width;
1941
iview->extent.height = iview->image->planes[0].surface.u.gfx9.base_mip_height;
1942
} else {
1943
unsigned lvl_width = radv_minify(image->info.width, range->baseMipLevel);
1944
unsigned lvl_height = radv_minify(image->info.height, range->baseMipLevel);
1945
1946
lvl_width = round_up_u32(lvl_width * view_bw, img_bw);
1947
lvl_height = round_up_u32(lvl_height * view_bh, img_bh);
1948
1949
lvl_width <<= range->baseMipLevel;
1950
lvl_height <<= range->baseMipLevel;
1951
1952
iview->extent.width = CLAMP(lvl_width, iview->extent.width,
1953
iview->image->planes[0].surface.u.gfx9.base_mip_width);
1954
iview->extent.height = CLAMP(lvl_height, iview->extent.height,
1955
iview->image->planes[0].surface.u.gfx9.base_mip_height);
1956
}
1957
}
1958
}
1959
1960
iview->support_fast_clear = radv_image_view_can_fast_clear(device, iview);
1961
1962
if (vk_format_get_plane_count(image->vk_format) > 1 &&
1963
iview->aspect_mask == VK_IMAGE_ASPECT_COLOR_BIT) {
1964
plane_count = vk_format_get_plane_count(iview->vk_format);
1965
}
1966
1967
bool disable_compression = extra_create_info ? extra_create_info->disable_compression : false;
1968
bool enable_compression = extra_create_info ? extra_create_info->enable_compression : false;
1969
for (unsigned i = 0; i < plane_count; ++i) {
1970
VkFormat format = vk_format_get_plane_format(iview->vk_format, i);
1971
radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, false,
1972
disable_compression, enable_compression, iview->plane_id + i,
1973
i);
1974
radv_image_view_make_descriptor(iview, device, format, &pCreateInfo->components, true,
1975
disable_compression, enable_compression, iview->plane_id + i,
1976
i);
1977
}
1978
}
1979
1980
bool
1981
radv_layout_is_htile_compressed(const struct radv_device *device, const struct radv_image *image,
1982
VkImageLayout layout, bool in_render_loop, unsigned queue_mask)
1983
{
1984
switch (layout) {
1985
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_ATTACHMENT_OPTIMAL:
1986
case VK_IMAGE_LAYOUT_DEPTH_ATTACHMENT_OPTIMAL_KHR:
1987
case VK_IMAGE_LAYOUT_STENCIL_ATTACHMENT_OPTIMAL_KHR:
1988
return radv_image_has_htile(image);
1989
case VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL:
1990
return radv_image_has_htile(image) && queue_mask == (1u << RADV_QUEUE_GENERAL);
1991
case VK_IMAGE_LAYOUT_SHARED_PRESENT_KHR:
1992
case VK_IMAGE_LAYOUT_GENERAL:
1993
/* It should be safe to enable TC-compat HTILE with
1994
* VK_IMAGE_LAYOUT_GENERAL if we are not in a render loop and
1995
* if the image doesn't have the storage bit set. This
1996
* improves performance for apps that use GENERAL for the main
1997
* depth pass because this allows compression and this reduces
1998
* the number of decompressions from/to GENERAL.
1999
*/
2000
/* FIXME: Enabling TC-compat HTILE in GENERAL on the compute
2001
* queue is likely broken for eg. depth/stencil copies.
2002
*/
2003
if (radv_image_is_tc_compat_htile(image) && queue_mask & (1u << RADV_QUEUE_GENERAL) &&
2004
!in_render_loop && !device->instance->disable_tc_compat_htile_in_general) {
2005
/* GFX10+ supports compressed writes to HTILE. */
2006
return device->physical_device->rad_info.chip_class >= GFX10 ||
2007
!(image->usage & VK_IMAGE_USAGE_STORAGE_BIT);
2008
} else {
2009
return false;
2010
}
2011
case VK_IMAGE_LAYOUT_DEPTH_STENCIL_READ_ONLY_OPTIMAL:
2012
if (radv_image_is_tc_compat_htile(image) ||
2013
(radv_image_has_htile(image) &&
2014
!(image->usage & (VK_IMAGE_USAGE_SAMPLED_BIT | VK_IMAGE_USAGE_INPUT_ATTACHMENT_BIT)))) {
2015
/* Keep HTILE compressed if the image is only going to
2016
* be used as a depth/stencil read-only attachment.
2017
*/
2018
return true;
2019
} else {
2020
return false;
2021
}
2022
break;
2023
default:
2024
return radv_image_is_tc_compat_htile(image);
2025
}
2026
}
2027
2028
bool
2029
radv_layout_can_fast_clear(const struct radv_device *device, const struct radv_image *image,
2030
unsigned level, VkImageLayout layout, bool in_render_loop,
2031
unsigned queue_mask)
2032
{
2033
if (radv_dcc_enabled(image, level) &&
2034
!radv_layout_dcc_compressed(device, image, level, layout, in_render_loop, queue_mask))
2035
return false;
2036
2037
if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2038
return false;
2039
2040
return layout == VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL &&
2041
queue_mask == (1u << RADV_QUEUE_GENERAL);
2042
}
2043
2044
bool
2045
radv_layout_dcc_compressed(const struct radv_device *device, const struct radv_image *image,
2046
unsigned level, VkImageLayout layout, bool in_render_loop,
2047
unsigned queue_mask)
2048
{
2049
if (!radv_dcc_enabled(image, level))
2050
return false;
2051
2052
if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT && queue_mask & (1u << RADV_QUEUE_FOREIGN))
2053
return true;
2054
2055
/* If the image is read-only, we can always just keep it compressed */
2056
if (!(image->usage & RADV_IMAGE_USAGE_WRITE_BITS))
2057
return true;
2058
2059
/* Don't compress compute transfer dst when image stores are not supported. */
2060
if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2061
(queue_mask & (1u << RADV_QUEUE_COMPUTE)) && !radv_image_use_dcc_image_stores(device, image))
2062
return false;
2063
2064
return device->physical_device->rad_info.chip_class >= GFX10 || layout != VK_IMAGE_LAYOUT_GENERAL;
2065
}
2066
2067
bool
2068
radv_layout_fmask_compressed(const struct radv_device *device, const struct radv_image *image,
2069
VkImageLayout layout, unsigned queue_mask)
2070
{
2071
if (!radv_image_has_fmask(image))
2072
return false;
2073
2074
/* Don't compress compute transfer dst because image stores ignore FMASK and it needs to be
2075
* expanded before.
2076
*/
2077
if ((layout == VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL || layout == VK_IMAGE_LAYOUT_GENERAL) &&
2078
(queue_mask & (1u << RADV_QUEUE_COMPUTE)))
2079
return false;
2080
2081
/* Only compress concurrent images if TC-compat CMASK is enabled (no FMASK decompression). */
2082
return layout != VK_IMAGE_LAYOUT_GENERAL &&
2083
(queue_mask == (1u << RADV_QUEUE_GENERAL) || radv_image_is_tc_compat_cmask(image));
2084
}
2085
2086
unsigned
2087
radv_image_queue_family_mask(const struct radv_image *image, uint32_t family, uint32_t queue_family)
2088
{
2089
if (!image->exclusive)
2090
return image->queue_family_mask;
2091
if (family == VK_QUEUE_FAMILY_EXTERNAL || family == VK_QUEUE_FAMILY_FOREIGN_EXT)
2092
return ((1u << RADV_MAX_QUEUE_FAMILIES) - 1u) | (1u << RADV_QUEUE_FOREIGN);
2093
if (family == VK_QUEUE_FAMILY_IGNORED)
2094
return 1u << queue_family;
2095
return 1u << family;
2096
}
2097
2098
VkResult
2099
radv_CreateImage(VkDevice device, const VkImageCreateInfo *pCreateInfo,
2100
const VkAllocationCallbacks *pAllocator, VkImage *pImage)
2101
{
2102
#ifdef ANDROID
2103
const VkNativeBufferANDROID *gralloc_info =
2104
vk_find_struct_const(pCreateInfo->pNext, NATIVE_BUFFER_ANDROID);
2105
2106
if (gralloc_info)
2107
return radv_image_from_gralloc(device, pCreateInfo, gralloc_info, pAllocator, pImage);
2108
#endif
2109
2110
const struct wsi_image_create_info *wsi_info =
2111
vk_find_struct_const(pCreateInfo->pNext, WSI_IMAGE_CREATE_INFO_MESA);
2112
bool scanout = wsi_info && wsi_info->scanout;
2113
2114
return radv_image_create(device,
2115
&(struct radv_image_create_info){
2116
.vk_info = pCreateInfo,
2117
.scanout = scanout,
2118
},
2119
pAllocator, pImage);
2120
}
2121
2122
void
2123
radv_DestroyImage(VkDevice _device, VkImage _image, const VkAllocationCallbacks *pAllocator)
2124
{
2125
RADV_FROM_HANDLE(radv_device, device, _device);
2126
RADV_FROM_HANDLE(radv_image, image, _image);
2127
2128
if (!image)
2129
return;
2130
2131
radv_destroy_image(device, pAllocator, image);
2132
}
2133
2134
void
2135
radv_GetImageSubresourceLayout(VkDevice _device, VkImage _image,
2136
const VkImageSubresource *pSubresource, VkSubresourceLayout *pLayout)
2137
{
2138
RADV_FROM_HANDLE(radv_image, image, _image);
2139
RADV_FROM_HANDLE(radv_device, device, _device);
2140
int level = pSubresource->mipLevel;
2141
int layer = pSubresource->arrayLayer;
2142
2143
unsigned plane_id = 0;
2144
if (vk_format_get_plane_count(image->vk_format) > 1)
2145
plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2146
2147
struct radv_image_plane *plane = &image->planes[plane_id];
2148
struct radeon_surf *surface = &plane->surface;
2149
2150
if (image->tiling == VK_IMAGE_TILING_DRM_FORMAT_MODIFIER_EXT) {
2151
unsigned mem_plane_id = radv_plane_from_aspect(pSubresource->aspectMask);
2152
2153
assert(level == 0);
2154
assert(layer == 0);
2155
2156
pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2157
surface, mem_plane_id, 0);
2158
pLayout->rowPitch = ac_surface_get_plane_stride(device->physical_device->rad_info.chip_class,
2159
surface, mem_plane_id);
2160
pLayout->arrayPitch = 0;
2161
pLayout->depthPitch = 0;
2162
pLayout->size = ac_surface_get_plane_size(surface, mem_plane_id);
2163
} else if (device->physical_device->rad_info.chip_class >= GFX9) {
2164
uint64_t level_offset = surface->is_linear ? surface->u.gfx9.offset[level] : 0;
2165
2166
pLayout->offset = ac_surface_get_plane_offset(device->physical_device->rad_info.chip_class,
2167
&plane->surface, 0, layer) +
2168
level_offset;
2169
if (image->vk_format == VK_FORMAT_R32G32B32_UINT ||
2170
image->vk_format == VK_FORMAT_R32G32B32_SINT ||
2171
image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
2172
/* Adjust the number of bytes between each row because
2173
* the pitch is actually the number of components per
2174
* row.
2175
*/
2176
pLayout->rowPitch = surface->u.gfx9.surf_pitch * surface->bpe / 3;
2177
} else {
2178
uint32_t pitch =
2179
surface->is_linear ? surface->u.gfx9.pitch[level] : surface->u.gfx9.surf_pitch;
2180
2181
assert(util_is_power_of_two_nonzero(surface->bpe));
2182
pLayout->rowPitch = pitch * surface->bpe;
2183
}
2184
2185
pLayout->arrayPitch = surface->u.gfx9.surf_slice_size;
2186
pLayout->depthPitch = surface->u.gfx9.surf_slice_size;
2187
pLayout->size = surface->u.gfx9.surf_slice_size;
2188
if (image->type == VK_IMAGE_TYPE_3D)
2189
pLayout->size *= u_minify(image->info.depth, level);
2190
} else {
2191
pLayout->offset = (uint64_t)surface->u.legacy.level[level].offset_256B * 256 +
2192
(uint64_t)surface->u.legacy.level[level].slice_size_dw * 4 * layer;
2193
pLayout->rowPitch = surface->u.legacy.level[level].nblk_x * surface->bpe;
2194
pLayout->arrayPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2195
pLayout->depthPitch = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2196
pLayout->size = (uint64_t)surface->u.legacy.level[level].slice_size_dw * 4;
2197
if (image->type == VK_IMAGE_TYPE_3D)
2198
pLayout->size *= u_minify(image->info.depth, level);
2199
}
2200
}
2201
2202
VkResult
2203
radv_GetImageDrmFormatModifierPropertiesEXT(VkDevice _device, VkImage _image,
2204
VkImageDrmFormatModifierPropertiesEXT *pProperties)
2205
{
2206
RADV_FROM_HANDLE(radv_image, image, _image);
2207
2208
pProperties->drmFormatModifier = image->planes[0].surface.modifier;
2209
return VK_SUCCESS;
2210
}
2211
2212
VkResult
2213
radv_CreateImageView(VkDevice _device, const VkImageViewCreateInfo *pCreateInfo,
2214
const VkAllocationCallbacks *pAllocator, VkImageView *pView)
2215
{
2216
RADV_FROM_HANDLE(radv_device, device, _device);
2217
struct radv_image_view *view;
2218
2219
view =
2220
vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2221
if (view == NULL)
2222
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2223
2224
vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_IMAGE_VIEW);
2225
2226
radv_image_view_init(view, device, pCreateInfo, NULL);
2227
2228
*pView = radv_image_view_to_handle(view);
2229
2230
return VK_SUCCESS;
2231
}
2232
2233
void
2234
radv_DestroyImageView(VkDevice _device, VkImageView _iview, const VkAllocationCallbacks *pAllocator)
2235
{
2236
RADV_FROM_HANDLE(radv_device, device, _device);
2237
RADV_FROM_HANDLE(radv_image_view, iview, _iview);
2238
2239
if (!iview)
2240
return;
2241
2242
vk_object_base_finish(&iview->base);
2243
vk_free2(&device->vk.alloc, pAllocator, iview);
2244
}
2245
2246
void
2247
radv_buffer_view_init(struct radv_buffer_view *view, struct radv_device *device,
2248
const VkBufferViewCreateInfo *pCreateInfo)
2249
{
2250
RADV_FROM_HANDLE(radv_buffer, buffer, pCreateInfo->buffer);
2251
2252
view->bo = buffer->bo;
2253
view->range =
2254
pCreateInfo->range == VK_WHOLE_SIZE ? buffer->size - pCreateInfo->offset : pCreateInfo->range;
2255
view->vk_format = pCreateInfo->format;
2256
2257
radv_make_buffer_descriptor(device, buffer, view->vk_format, pCreateInfo->offset, view->range,
2258
view->state);
2259
}
2260
2261
VkResult
2262
radv_CreateBufferView(VkDevice _device, const VkBufferViewCreateInfo *pCreateInfo,
2263
const VkAllocationCallbacks *pAllocator, VkBufferView *pView)
2264
{
2265
RADV_FROM_HANDLE(radv_device, device, _device);
2266
struct radv_buffer_view *view;
2267
2268
view =
2269
vk_alloc2(&device->vk.alloc, pAllocator, sizeof(*view), 8, VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
2270
if (!view)
2271
return vk_error(device->instance, VK_ERROR_OUT_OF_HOST_MEMORY);
2272
2273
vk_object_base_init(&device->vk, &view->base, VK_OBJECT_TYPE_BUFFER_VIEW);
2274
2275
radv_buffer_view_init(view, device, pCreateInfo);
2276
2277
*pView = radv_buffer_view_to_handle(view);
2278
2279
return VK_SUCCESS;
2280
}
2281
2282
void
2283
radv_DestroyBufferView(VkDevice _device, VkBufferView bufferView,
2284
const VkAllocationCallbacks *pAllocator)
2285
{
2286
RADV_FROM_HANDLE(radv_device, device, _device);
2287
RADV_FROM_HANDLE(radv_buffer_view, view, bufferView);
2288
2289
if (!view)
2290
return;
2291
2292
vk_object_base_finish(&view->base);
2293
vk_free2(&device->vk.alloc, pAllocator, view);
2294
}
2295
2296