Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/broadcom/vulkan/v3dvx_meta_copy.c
4560 views
1
/*
2
* Copyright © 2021 Raspberry Pi
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include "v3dv_private.h"
25
#include "v3dv_meta_copy.h"
26
#include "broadcom/common/v3d_macros.h"
27
#include "broadcom/cle/v3dx_pack.h"
28
#include "broadcom/compiler/v3d_compiler.h"
29
30
#include "vk_format_info.h"
31
32
struct rcl_clear_info {
33
const union v3dv_clear_value *clear_value;
34
struct v3dv_image *image;
35
VkImageAspectFlags aspects;
36
uint32_t layer;
37
uint32_t level;
38
};
39
40
static struct v3dv_cl *
41
emit_rcl_prologue(struct v3dv_job *job,
42
struct framebuffer_data *fb,
43
const struct rcl_clear_info *clear_info)
44
{
45
const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
46
47
struct v3dv_cl *rcl = &job->rcl;
48
v3dv_cl_ensure_space_with_branch(rcl, 200 +
49
tiling->layers * 256 *
50
cl_packet_length(SUPERTILE_COORDINATES));
51
if (job->cmd_buffer->state.oom)
52
return NULL;
53
54
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COMMON, config) {
55
config.early_z_disable = true;
56
config.image_width_pixels = tiling->width;
57
config.image_height_pixels = tiling->height;
58
config.number_of_render_targets = 1;
59
config.multisample_mode_4x = tiling->msaa;
60
config.maximum_bpp_of_all_render_targets = tiling->internal_bpp;
61
config.internal_depth_type = fb->internal_depth_type;
62
}
63
64
if (clear_info && (clear_info->aspects & VK_IMAGE_ASPECT_COLOR_BIT)) {
65
uint32_t clear_pad = 0;
66
if (clear_info->image) {
67
const struct v3dv_image *image = clear_info->image;
68
const struct v3d_resource_slice *slice =
69
&image->slices[clear_info->level];
70
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
71
slice->tiling == V3D_TILING_UIF_XOR) {
72
int uif_block_height = v3d_utile_height(image->cpp) * 2;
73
74
uint32_t implicit_padded_height =
75
align(tiling->height, uif_block_height) / uif_block_height;
76
77
if (slice->padded_height_of_output_image_in_uif_blocks -
78
implicit_padded_height >= 15) {
79
clear_pad = slice->padded_height_of_output_image_in_uif_blocks;
80
}
81
}
82
}
83
84
const uint32_t *color = &clear_info->clear_value->color[0];
85
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART1, clear) {
86
clear.clear_color_low_32_bits = color[0];
87
clear.clear_color_next_24_bits = color[1] & 0x00ffffff;
88
clear.render_target_number = 0;
89
};
90
91
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_64) {
92
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART2, clear) {
93
clear.clear_color_mid_low_32_bits =
94
((color[1] >> 24) | (color[2] << 8));
95
clear.clear_color_mid_high_24_bits =
96
((color[2] >> 24) | ((color[3] & 0xffff) << 8));
97
clear.render_target_number = 0;
98
};
99
}
100
101
if (tiling->internal_bpp >= V3D_INTERNAL_BPP_128 || clear_pad) {
102
cl_emit(rcl, TILE_RENDERING_MODE_CFG_CLEAR_COLORS_PART3, clear) {
103
clear.uif_padded_height_in_uif_blocks = clear_pad;
104
clear.clear_color_high_16_bits = color[3] >> 16;
105
clear.render_target_number = 0;
106
};
107
}
108
}
109
110
cl_emit(rcl, TILE_RENDERING_MODE_CFG_COLOR, rt) {
111
rt.render_target_0_internal_bpp = tiling->internal_bpp;
112
rt.render_target_0_internal_type = fb->internal_type;
113
rt.render_target_0_clamp = V3D_RENDER_TARGET_CLAMP_NONE;
114
}
115
116
cl_emit(rcl, TILE_RENDERING_MODE_CFG_ZS_CLEAR_VALUES, clear) {
117
clear.z_clear_value = clear_info ? clear_info->clear_value->z : 1.0f;
118
clear.stencil_clear_value = clear_info ? clear_info->clear_value->s : 0;
119
};
120
121
cl_emit(rcl, TILE_LIST_INITIAL_BLOCK_SIZE, init) {
122
init.use_auto_chained_tile_lists = true;
123
init.size_of_first_block_in_chained_tile_lists =
124
TILE_ALLOCATION_BLOCK_SIZE_64B;
125
}
126
127
return rcl;
128
}
129
130
static void
131
emit_frame_setup(struct v3dv_job *job,
132
uint32_t layer,
133
const union v3dv_clear_value *clear_value)
134
{
135
v3dv_return_if_oom(NULL, job);
136
137
const struct v3dv_frame_tiling *tiling = &job->frame_tiling;
138
139
struct v3dv_cl *rcl = &job->rcl;
140
141
const uint32_t tile_alloc_offset =
142
64 * layer * tiling->draw_tiles_x * tiling->draw_tiles_y;
143
cl_emit(rcl, MULTICORE_RENDERING_TILE_LIST_SET_BASE, list) {
144
list.address = v3dv_cl_address(job->tile_alloc, tile_alloc_offset);
145
}
146
147
cl_emit(rcl, MULTICORE_RENDERING_SUPERTILE_CFG, config) {
148
config.number_of_bin_tile_lists = 1;
149
config.total_frame_width_in_tiles = tiling->draw_tiles_x;
150
config.total_frame_height_in_tiles = tiling->draw_tiles_y;
151
152
config.supertile_width_in_tiles = tiling->supertile_width;
153
config.supertile_height_in_tiles = tiling->supertile_height;
154
155
config.total_frame_width_in_supertiles =
156
tiling->frame_width_in_supertiles;
157
config.total_frame_height_in_supertiles =
158
tiling->frame_height_in_supertiles;
159
}
160
161
/* Implement GFXH-1742 workaround. Also, if we are clearing we have to do
162
* it here.
163
*/
164
for (int i = 0; i < 2; i++) {
165
cl_emit(rcl, TILE_COORDINATES, coords);
166
cl_emit(rcl, END_OF_LOADS, end);
167
cl_emit(rcl, STORE_TILE_BUFFER_GENERAL, store) {
168
store.buffer_to_store = NONE;
169
}
170
if (clear_value && i == 0) {
171
cl_emit(rcl, CLEAR_TILE_BUFFERS, clear) {
172
clear.clear_z_stencil_buffer = true;
173
clear.clear_all_render_targets = true;
174
}
175
}
176
cl_emit(rcl, END_OF_TILE_MARKER, end);
177
}
178
179
cl_emit(rcl, FLUSH_VCD_CACHE, flush);
180
}
181
182
static void
183
emit_supertile_coordinates(struct v3dv_job *job,
184
struct framebuffer_data *framebuffer)
185
{
186
v3dv_return_if_oom(NULL, job);
187
188
struct v3dv_cl *rcl = &job->rcl;
189
190
const uint32_t min_y = framebuffer->min_y_supertile;
191
const uint32_t max_y = framebuffer->max_y_supertile;
192
const uint32_t min_x = framebuffer->min_x_supertile;
193
const uint32_t max_x = framebuffer->max_x_supertile;
194
195
for (int y = min_y; y <= max_y; y++) {
196
for (int x = min_x; x <= max_x; x++) {
197
cl_emit(rcl, SUPERTILE_COORDINATES, coords) {
198
coords.column_number_in_supertiles = x;
199
coords.row_number_in_supertiles = y;
200
}
201
}
202
}
203
}
204
205
static void
206
emit_linear_load(struct v3dv_cl *cl,
207
uint32_t buffer,
208
struct v3dv_bo *bo,
209
uint32_t offset,
210
uint32_t stride,
211
uint32_t format)
212
{
213
cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
214
load.buffer_to_load = buffer;
215
load.address = v3dv_cl_address(bo, offset);
216
load.input_image_format = format;
217
load.memory_format = V3D_TILING_RASTER;
218
load.height_in_ub_or_stride = stride;
219
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
220
}
221
}
222
223
static void
224
emit_linear_store(struct v3dv_cl *cl,
225
uint32_t buffer,
226
struct v3dv_bo *bo,
227
uint32_t offset,
228
uint32_t stride,
229
bool msaa,
230
uint32_t format)
231
{
232
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
233
store.buffer_to_store = RENDER_TARGET_0;
234
store.address = v3dv_cl_address(bo, offset);
235
store.clear_buffer_being_stored = false;
236
store.output_image_format = format;
237
store.memory_format = V3D_TILING_RASTER;
238
store.height_in_ub_or_stride = stride;
239
store.decimate_mode = msaa ? V3D_DECIMATE_MODE_ALL_SAMPLES :
240
V3D_DECIMATE_MODE_SAMPLE_0;
241
}
242
}
243
244
/* This chooses a tile buffer format that is appropriate for the copy operation.
245
* Typically, this is the image render target type, however, if we are copying
246
* depth/stencil to/from a buffer the hardware can't do raster loads/stores, so
247
* we need to load and store to/from a tile color buffer using a compatible
248
* color format.
249
*/
250
static uint32_t
251
choose_tlb_format(struct framebuffer_data *framebuffer,
252
VkImageAspectFlags aspect,
253
bool for_store,
254
bool is_copy_to_buffer,
255
bool is_copy_from_buffer)
256
{
257
if (is_copy_to_buffer || is_copy_from_buffer) {
258
switch (framebuffer->vk_format) {
259
case VK_FORMAT_D16_UNORM:
260
return V3D_OUTPUT_IMAGE_FORMAT_R16UI;
261
case VK_FORMAT_D32_SFLOAT:
262
return V3D_OUTPUT_IMAGE_FORMAT_R32F;
263
case VK_FORMAT_X8_D24_UNORM_PACK32:
264
return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
265
case VK_FORMAT_D24_UNORM_S8_UINT:
266
/* When storing the stencil aspect of a combined depth/stencil image
267
* to a buffer, the Vulkan spec states that the output buffer must
268
* have packed stencil values, so we choose an R8UI format for our
269
* store outputs. For the load input we still want RGBA8UI since the
270
* source image contains 4 channels (including the 3 channels
271
* containing the 24-bit depth value).
272
*
273
* When loading the stencil aspect of a combined depth/stencil image
274
* from a buffer, we read packed 8-bit stencil values from the buffer
275
* that we need to put into the LSB of the 32-bit format (the R
276
* channel), so we use R8UI. For the store, if we used R8UI then we
277
* would write 8-bit stencil values consecutively over depth channels,
278
* so we need to use RGBA8UI. This will write each stencil value in
279
* its correct position, but will overwrite depth values (channels G
280
* B,A) with undefined values. To fix this, we will have to restore
281
* the depth aspect from the Z tile buffer, which we should pre-load
282
* from the image before the store).
283
*/
284
if (aspect & VK_IMAGE_ASPECT_DEPTH_BIT) {
285
return V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
286
} else {
287
assert(aspect & VK_IMAGE_ASPECT_STENCIL_BIT);
288
if (is_copy_to_buffer) {
289
return for_store ? V3D_OUTPUT_IMAGE_FORMAT_R8UI :
290
V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
291
} else {
292
assert(is_copy_from_buffer);
293
return for_store ? V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI :
294
V3D_OUTPUT_IMAGE_FORMAT_R8UI;
295
}
296
}
297
default: /* Color formats */
298
return framebuffer->format->rt_type;
299
break;
300
}
301
} else {
302
return framebuffer->format->rt_type;
303
}
304
}
305
306
static inline bool
307
format_needs_rb_swap(struct v3dv_device *device,
308
VkFormat format)
309
{
310
const uint8_t *swizzle = v3dv_get_format_swizzle(device, format);
311
return swizzle[0] == PIPE_SWIZZLE_Z;
312
}
313
314
static void
315
emit_image_load(struct v3dv_device *device,
316
struct v3dv_cl *cl,
317
struct framebuffer_data *framebuffer,
318
struct v3dv_image *image,
319
VkImageAspectFlags aspect,
320
uint32_t layer,
321
uint32_t mip_level,
322
bool is_copy_to_buffer,
323
bool is_copy_from_buffer)
324
{
325
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
326
327
/* For image to/from buffer copies we always load to and store from RT0,
328
* even for depth/stencil aspects, because the hardware can't do raster
329
* stores or loads from/to the depth/stencil tile buffers.
330
*/
331
bool load_to_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
332
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
333
334
const struct v3d_resource_slice *slice = &image->slices[mip_level];
335
cl_emit(cl, LOAD_TILE_BUFFER_GENERAL, load) {
336
load.buffer_to_load = load_to_color_tlb ?
337
RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
338
339
load.address = v3dv_cl_address(image->mem->bo, layer_offset);
340
341
load.input_image_format = choose_tlb_format(framebuffer, aspect, false,
342
is_copy_to_buffer,
343
is_copy_from_buffer);
344
load.memory_format = slice->tiling;
345
346
/* When copying depth/stencil images to a buffer, for D24 formats Vulkan
347
* expects the depth value in the LSB bits of each 32-bit pixel.
348
* Unfortunately, the hardware seems to put the S8/X8 bits there and the
349
* depth bits on the MSB. To work around that we can reverse the channel
350
* order and then swap the R/B channels to get what we want.
351
*
352
* NOTE: reversing and swapping only gets us the behavior we want if the
353
* operations happen in that exact order, which seems to be the case when
354
* done on the tile buffer load operations. On the store, it seems the
355
* order is not the same. The order on the store is probably reversed so
356
* that reversing and swapping on both the load and the store preserves
357
* the original order of the channels in memory.
358
*
359
* Notice that we only need to do this when copying to a buffer, where
360
* depth and stencil aspects are copied as separate regions and
361
* the spec expects them to be tightly packed.
362
*/
363
bool needs_rb_swap = false;
364
bool needs_chan_reverse = false;
365
if (is_copy_to_buffer &&
366
(framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
367
(framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
368
(aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
369
needs_rb_swap = true;
370
needs_chan_reverse = true;
371
} else if (!is_copy_from_buffer && !is_copy_to_buffer &&
372
(aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
373
/* This is not a raw data copy (i.e. we are clearing the image),
374
* so we need to make sure we respect the format swizzle.
375
*/
376
needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
377
}
378
379
load.r_b_swap = needs_rb_swap;
380
load.channel_reverse = needs_chan_reverse;
381
382
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
383
slice->tiling == V3D_TILING_UIF_XOR) {
384
load.height_in_ub_or_stride =
385
slice->padded_height_of_output_image_in_uif_blocks;
386
} else if (slice->tiling == V3D_TILING_RASTER) {
387
load.height_in_ub_or_stride = slice->stride;
388
}
389
390
if (image->samples > VK_SAMPLE_COUNT_1_BIT)
391
load.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
392
else
393
load.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
394
}
395
}
396
397
static void
398
emit_image_store(struct v3dv_device *device,
399
struct v3dv_cl *cl,
400
struct framebuffer_data *framebuffer,
401
struct v3dv_image *image,
402
VkImageAspectFlags aspect,
403
uint32_t layer,
404
uint32_t mip_level,
405
bool is_copy_to_buffer,
406
bool is_copy_from_buffer,
407
bool is_multisample_resolve)
408
{
409
uint32_t layer_offset = v3dv_layer_offset(image, mip_level, layer);
410
411
bool store_from_color_tlb = is_copy_to_buffer || is_copy_from_buffer ||
412
aspect == VK_IMAGE_ASPECT_COLOR_BIT;
413
414
const struct v3d_resource_slice *slice = &image->slices[mip_level];
415
cl_emit(cl, STORE_TILE_BUFFER_GENERAL, store) {
416
store.buffer_to_store = store_from_color_tlb ?
417
RENDER_TARGET_0 : v3dX(zs_buffer_from_aspect_bits)(aspect);
418
419
store.address = v3dv_cl_address(image->mem->bo, layer_offset);
420
store.clear_buffer_being_stored = false;
421
422
/* See rationale in emit_image_load() */
423
bool needs_rb_swap = false;
424
bool needs_chan_reverse = false;
425
if (is_copy_from_buffer &&
426
(framebuffer->vk_format == VK_FORMAT_X8_D24_UNORM_PACK32 ||
427
(framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
428
(aspect & VK_IMAGE_ASPECT_DEPTH_BIT)))) {
429
needs_rb_swap = true;
430
needs_chan_reverse = true;
431
} else if (!is_copy_from_buffer && !is_copy_to_buffer &&
432
(aspect & VK_IMAGE_ASPECT_COLOR_BIT)) {
433
needs_rb_swap = format_needs_rb_swap(device, framebuffer->vk_format);
434
}
435
436
store.r_b_swap = needs_rb_swap;
437
store.channel_reverse = needs_chan_reverse;
438
439
store.output_image_format = choose_tlb_format(framebuffer, aspect, true,
440
is_copy_to_buffer,
441
is_copy_from_buffer);
442
store.memory_format = slice->tiling;
443
if (slice->tiling == V3D_TILING_UIF_NO_XOR ||
444
slice->tiling == V3D_TILING_UIF_XOR) {
445
store.height_in_ub_or_stride =
446
slice->padded_height_of_output_image_in_uif_blocks;
447
} else if (slice->tiling == V3D_TILING_RASTER) {
448
store.height_in_ub_or_stride = slice->stride;
449
}
450
451
if (image->samples > VK_SAMPLE_COUNT_1_BIT)
452
store.decimate_mode = V3D_DECIMATE_MODE_ALL_SAMPLES;
453
else if (is_multisample_resolve)
454
store.decimate_mode = V3D_DECIMATE_MODE_4X;
455
else
456
store.decimate_mode = V3D_DECIMATE_MODE_SAMPLE_0;
457
}
458
}
459
460
static void
461
emit_copy_layer_to_buffer_per_tile_list(struct v3dv_job *job,
462
struct framebuffer_data *framebuffer,
463
struct v3dv_buffer *buffer,
464
struct v3dv_image *image,
465
uint32_t layer_offset,
466
const VkBufferImageCopy2KHR *region)
467
{
468
struct v3dv_cl *cl = &job->indirect;
469
v3dv_cl_ensure_space(cl, 200, 1);
470
v3dv_return_if_oom(NULL, job);
471
472
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
473
474
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
475
476
/* Load image to TLB */
477
assert((image->type != VK_IMAGE_TYPE_3D &&
478
layer_offset < region->imageSubresource.layerCount) ||
479
layer_offset < image->extent.depth);
480
481
const uint32_t image_layer = image->type != VK_IMAGE_TYPE_3D ?
482
region->imageSubresource.baseArrayLayer + layer_offset :
483
region->imageOffset.z + layer_offset;
484
485
emit_image_load(job->device, cl, framebuffer, image,
486
region->imageSubresource.aspectMask,
487
image_layer,
488
region->imageSubresource.mipLevel,
489
true, false);
490
491
cl_emit(cl, END_OF_LOADS, end);
492
493
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
494
495
/* Store TLB to buffer */
496
uint32_t width, height;
497
if (region->bufferRowLength == 0)
498
width = region->imageExtent.width;
499
else
500
width = region->bufferRowLength;
501
502
if (region->bufferImageHeight == 0)
503
height = region->imageExtent.height;
504
else
505
height = region->bufferImageHeight;
506
507
/* Handle copy from compressed format */
508
width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
509
height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
510
511
/* If we are storing stencil from a combined depth/stencil format the
512
* Vulkan spec states that the output buffer must have packed stencil
513
* values, where each stencil value is 1 byte.
514
*/
515
uint32_t cpp =
516
region->imageSubresource.aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
517
1 : image->cpp;
518
uint32_t buffer_stride = width * cpp;
519
uint32_t buffer_offset = buffer->mem_offset + region->bufferOffset +
520
height * buffer_stride * layer_offset;
521
522
uint32_t format = choose_tlb_format(framebuffer,
523
region->imageSubresource.aspectMask,
524
true, true, false);
525
bool msaa = image->samples > VK_SAMPLE_COUNT_1_BIT;
526
527
emit_linear_store(cl, RENDER_TARGET_0, buffer->mem->bo,
528
buffer_offset, buffer_stride, msaa, format);
529
530
cl_emit(cl, END_OF_TILE_MARKER, end);
531
532
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
533
534
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
535
branch.start = tile_list_start;
536
branch.end = v3dv_cl_get_address(cl);
537
}
538
}
539
540
static void
541
emit_copy_layer_to_buffer(struct v3dv_job *job,
542
struct v3dv_buffer *buffer,
543
struct v3dv_image *image,
544
struct framebuffer_data *framebuffer,
545
uint32_t layer,
546
const VkBufferImageCopy2KHR *region)
547
{
548
emit_frame_setup(job, layer, NULL);
549
emit_copy_layer_to_buffer_per_tile_list(job, framebuffer, buffer,
550
image, layer, region);
551
emit_supertile_coordinates(job, framebuffer);
552
}
553
554
void
555
v3dX(job_emit_copy_image_to_buffer_rcl)(struct v3dv_job *job,
556
struct v3dv_buffer *buffer,
557
struct v3dv_image *image,
558
struct framebuffer_data *framebuffer,
559
const VkBufferImageCopy2KHR *region)
560
{
561
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
562
v3dv_return_if_oom(NULL, job);
563
564
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
565
emit_copy_layer_to_buffer(job, buffer, image, framebuffer, layer, region);
566
cl_emit(rcl, END_OF_RENDERING, end);
567
}
568
569
static void
570
emit_resolve_image_layer_per_tile_list(struct v3dv_job *job,
571
struct framebuffer_data *framebuffer,
572
struct v3dv_image *dst,
573
struct v3dv_image *src,
574
uint32_t layer_offset,
575
const VkImageResolve2KHR *region)
576
{
577
struct v3dv_cl *cl = &job->indirect;
578
v3dv_cl_ensure_space(cl, 200, 1);
579
v3dv_return_if_oom(NULL, job);
580
581
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
582
583
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
584
585
assert((src->type != VK_IMAGE_TYPE_3D &&
586
layer_offset < region->srcSubresource.layerCount) ||
587
layer_offset < src->extent.depth);
588
589
const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
590
region->srcSubresource.baseArrayLayer + layer_offset :
591
region->srcOffset.z + layer_offset;
592
593
emit_image_load(job->device, cl, framebuffer, src,
594
region->srcSubresource.aspectMask,
595
src_layer,
596
region->srcSubresource.mipLevel,
597
false, false);
598
599
cl_emit(cl, END_OF_LOADS, end);
600
601
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
602
603
assert((dst->type != VK_IMAGE_TYPE_3D &&
604
layer_offset < region->dstSubresource.layerCount) ||
605
layer_offset < dst->extent.depth);
606
607
const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
608
region->dstSubresource.baseArrayLayer + layer_offset :
609
region->dstOffset.z + layer_offset;
610
611
emit_image_store(job->device, cl, framebuffer, dst,
612
region->dstSubresource.aspectMask,
613
dst_layer,
614
region->dstSubresource.mipLevel,
615
false, false, true);
616
617
cl_emit(cl, END_OF_TILE_MARKER, end);
618
619
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
620
621
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
622
branch.start = tile_list_start;
623
branch.end = v3dv_cl_get_address(cl);
624
}
625
}
626
627
static void
628
emit_resolve_image_layer(struct v3dv_job *job,
629
struct v3dv_image *dst,
630
struct v3dv_image *src,
631
struct framebuffer_data *framebuffer,
632
uint32_t layer,
633
const VkImageResolve2KHR *region)
634
{
635
emit_frame_setup(job, layer, NULL);
636
emit_resolve_image_layer_per_tile_list(job, framebuffer,
637
dst, src, layer, region);
638
emit_supertile_coordinates(job, framebuffer);
639
}
640
641
void
642
v3dX(job_emit_resolve_image_rcl)(struct v3dv_job *job,
643
struct v3dv_image *dst,
644
struct v3dv_image *src,
645
struct framebuffer_data *framebuffer,
646
const VkImageResolve2KHR *region)
647
{
648
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
649
v3dv_return_if_oom(NULL, job);
650
651
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
652
emit_resolve_image_layer(job, dst, src, framebuffer, layer, region);
653
cl_emit(rcl, END_OF_RENDERING, end);
654
}
655
656
static void
657
emit_copy_buffer_per_tile_list(struct v3dv_job *job,
658
struct v3dv_bo *dst,
659
struct v3dv_bo *src,
660
uint32_t dst_offset,
661
uint32_t src_offset,
662
uint32_t stride,
663
uint32_t format)
664
{
665
struct v3dv_cl *cl = &job->indirect;
666
v3dv_cl_ensure_space(cl, 200, 1);
667
v3dv_return_if_oom(NULL, job);
668
669
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
670
671
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
672
673
emit_linear_load(cl, RENDER_TARGET_0, src, src_offset, stride, format);
674
675
cl_emit(cl, END_OF_LOADS, end);
676
677
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
678
679
emit_linear_store(cl, RENDER_TARGET_0,
680
dst, dst_offset, stride, false, format);
681
682
cl_emit(cl, END_OF_TILE_MARKER, end);
683
684
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
685
686
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
687
branch.start = tile_list_start;
688
branch.end = v3dv_cl_get_address(cl);
689
}
690
}
691
692
void
693
v3dX(job_emit_copy_buffer)(struct v3dv_job *job,
694
struct v3dv_bo *dst,
695
struct v3dv_bo *src,
696
uint32_t dst_offset,
697
uint32_t src_offset,
698
struct framebuffer_data *framebuffer,
699
uint32_t format,
700
uint32_t item_size)
701
{
702
const uint32_t stride = job->frame_tiling.width * item_size;
703
emit_copy_buffer_per_tile_list(job, dst, src,
704
dst_offset, src_offset,
705
stride, format);
706
emit_supertile_coordinates(job, framebuffer);
707
}
708
709
void
710
v3dX(job_emit_copy_buffer_rcl)(struct v3dv_job *job,
711
struct v3dv_bo *dst,
712
struct v3dv_bo *src,
713
uint32_t dst_offset,
714
uint32_t src_offset,
715
struct framebuffer_data *framebuffer,
716
uint32_t format,
717
uint32_t item_size)
718
{
719
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
720
v3dv_return_if_oom(NULL, job);
721
722
emit_frame_setup(job, 0, NULL);
723
724
v3dX(job_emit_copy_buffer)(job, dst, src, dst_offset, src_offset,
725
framebuffer, format, item_size);
726
727
cl_emit(rcl, END_OF_RENDERING, end);
728
}
729
730
static void
731
emit_copy_image_layer_per_tile_list(struct v3dv_job *job,
732
struct framebuffer_data *framebuffer,
733
struct v3dv_image *dst,
734
struct v3dv_image *src,
735
uint32_t layer_offset,
736
const VkImageCopy2KHR *region)
737
{
738
struct v3dv_cl *cl = &job->indirect;
739
v3dv_cl_ensure_space(cl, 200, 1);
740
v3dv_return_if_oom(NULL, job);
741
742
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
743
744
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
745
746
assert((src->type != VK_IMAGE_TYPE_3D &&
747
layer_offset < region->srcSubresource.layerCount) ||
748
layer_offset < src->extent.depth);
749
750
const uint32_t src_layer = src->type != VK_IMAGE_TYPE_3D ?
751
region->srcSubresource.baseArrayLayer + layer_offset :
752
region->srcOffset.z + layer_offset;
753
754
emit_image_load(job->device, cl, framebuffer, src,
755
region->srcSubresource.aspectMask,
756
src_layer,
757
region->srcSubresource.mipLevel,
758
false, false);
759
760
cl_emit(cl, END_OF_LOADS, end);
761
762
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
763
764
assert((dst->type != VK_IMAGE_TYPE_3D &&
765
layer_offset < region->dstSubresource.layerCount) ||
766
layer_offset < dst->extent.depth);
767
768
const uint32_t dst_layer = dst->type != VK_IMAGE_TYPE_3D ?
769
region->dstSubresource.baseArrayLayer + layer_offset :
770
region->dstOffset.z + layer_offset;
771
772
emit_image_store(job->device, cl, framebuffer, dst,
773
region->dstSubresource.aspectMask,
774
dst_layer,
775
region->dstSubresource.mipLevel,
776
false, false, false);
777
778
cl_emit(cl, END_OF_TILE_MARKER, end);
779
780
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
781
782
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
783
branch.start = tile_list_start;
784
branch.end = v3dv_cl_get_address(cl);
785
}
786
}
787
788
static void
789
emit_copy_image_layer(struct v3dv_job *job,
790
struct v3dv_image *dst,
791
struct v3dv_image *src,
792
struct framebuffer_data *framebuffer,
793
uint32_t layer,
794
const VkImageCopy2KHR *region)
795
{
796
emit_frame_setup(job, layer, NULL);
797
emit_copy_image_layer_per_tile_list(job, framebuffer, dst, src, layer, region);
798
emit_supertile_coordinates(job, framebuffer);
799
}
800
801
void
802
v3dX(job_emit_copy_image_rcl)(struct v3dv_job *job,
803
struct v3dv_image *dst,
804
struct v3dv_image *src,
805
struct framebuffer_data *framebuffer,
806
const VkImageCopy2KHR *region)
807
{
808
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
809
v3dv_return_if_oom(NULL, job);
810
811
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
812
emit_copy_image_layer(job, dst, src, framebuffer, layer, region);
813
cl_emit(rcl, END_OF_RENDERING, end);
814
}
815
816
void
817
v3dX(cmd_buffer_emit_tfu_job)(struct v3dv_cmd_buffer *cmd_buffer,
818
struct v3dv_image *dst,
819
uint32_t dst_mip_level,
820
uint32_t dst_layer,
821
struct v3dv_image *src,
822
uint32_t src_mip_level,
823
uint32_t src_layer,
824
uint32_t width,
825
uint32_t height,
826
const struct v3dv_format *format)
827
{
828
const struct v3d_resource_slice *src_slice = &src->slices[src_mip_level];
829
const struct v3d_resource_slice *dst_slice = &dst->slices[dst_mip_level];
830
831
assert(dst->mem && dst->mem->bo);
832
const struct v3dv_bo *dst_bo = dst->mem->bo;
833
834
assert(src->mem && src->mem->bo);
835
const struct v3dv_bo *src_bo = src->mem->bo;
836
837
struct drm_v3d_submit_tfu tfu = {
838
.ios = (height << 16) | width,
839
.bo_handles = {
840
dst_bo->handle,
841
src_bo->handle != dst_bo->handle ? src_bo->handle : 0
842
},
843
};
844
845
const uint32_t src_offset =
846
src_bo->offset + v3dv_layer_offset(src, src_mip_level, src_layer);
847
tfu.iia |= src_offset;
848
849
uint32_t icfg;
850
if (src_slice->tiling == V3D_TILING_RASTER) {
851
icfg = V3D_TFU_ICFG_FORMAT_RASTER;
852
} else {
853
icfg = V3D_TFU_ICFG_FORMAT_LINEARTILE +
854
(src_slice->tiling - V3D_TILING_LINEARTILE);
855
}
856
tfu.icfg |= icfg << V3D_TFU_ICFG_FORMAT_SHIFT;
857
858
const uint32_t dst_offset =
859
dst_bo->offset + v3dv_layer_offset(dst, dst_mip_level, dst_layer);
860
tfu.ioa |= dst_offset;
861
862
tfu.ioa |= (V3D_TFU_IOA_FORMAT_LINEARTILE +
863
(dst_slice->tiling - V3D_TILING_LINEARTILE)) <<
864
V3D_TFU_IOA_FORMAT_SHIFT;
865
tfu.icfg |= format->tex_type << V3D_TFU_ICFG_TTYPE_SHIFT;
866
867
switch (src_slice->tiling) {
868
case V3D_TILING_UIF_NO_XOR:
869
case V3D_TILING_UIF_XOR:
870
tfu.iis |= src_slice->padded_height / (2 * v3d_utile_height(src->cpp));
871
break;
872
case V3D_TILING_RASTER:
873
tfu.iis |= src_slice->stride / src->cpp;
874
break;
875
default:
876
break;
877
}
878
879
/* If we're writing level 0 (!IOA_DIMTW), then we need to supply the
880
* OPAD field for the destination (how many extra UIF blocks beyond
881
* those necessary to cover the height).
882
*/
883
if (dst_slice->tiling == V3D_TILING_UIF_NO_XOR ||
884
dst_slice->tiling == V3D_TILING_UIF_XOR) {
885
uint32_t uif_block_h = 2 * v3d_utile_height(dst->cpp);
886
uint32_t implicit_padded_height = align(height, uif_block_h);
887
uint32_t icfg =
888
(dst_slice->padded_height - implicit_padded_height) / uif_block_h;
889
tfu.icfg |= icfg << V3D_TFU_ICFG_OPAD_SHIFT;
890
}
891
892
v3dv_cmd_buffer_add_tfu_job(cmd_buffer, &tfu);
893
}
894
895
static void
896
emit_clear_image_per_tile_list(struct v3dv_job *job,
897
struct framebuffer_data *framebuffer,
898
struct v3dv_image *image,
899
VkImageAspectFlags aspects,
900
uint32_t layer,
901
uint32_t level)
902
{
903
struct v3dv_cl *cl = &job->indirect;
904
v3dv_cl_ensure_space(cl, 200, 1);
905
v3dv_return_if_oom(NULL, job);
906
907
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
908
909
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
910
911
cl_emit(cl, END_OF_LOADS, end);
912
913
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
914
915
emit_image_store(job->device, cl, framebuffer, image, aspects,
916
layer, level, false, false, false);
917
918
cl_emit(cl, END_OF_TILE_MARKER, end);
919
920
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
921
922
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
923
branch.start = tile_list_start;
924
branch.end = v3dv_cl_get_address(cl);
925
}
926
}
927
928
static void
929
emit_clear_image(struct v3dv_job *job,
930
struct v3dv_image *image,
931
struct framebuffer_data *framebuffer,
932
VkImageAspectFlags aspects,
933
uint32_t layer,
934
uint32_t level)
935
{
936
emit_clear_image_per_tile_list(job, framebuffer, image, aspects, layer, level);
937
emit_supertile_coordinates(job, framebuffer);
938
}
939
940
void
941
v3dX(job_emit_clear_image_rcl)(struct v3dv_job *job,
942
struct v3dv_image *image,
943
struct framebuffer_data *framebuffer,
944
const union v3dv_clear_value *clear_value,
945
VkImageAspectFlags aspects,
946
uint32_t layer,
947
uint32_t level)
948
{
949
const struct rcl_clear_info clear_info = {
950
.clear_value = clear_value,
951
.image = image,
952
.aspects = aspects,
953
.layer = layer,
954
.level = level,
955
};
956
957
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
958
v3dv_return_if_oom(NULL, job);
959
960
emit_frame_setup(job, 0, clear_value);
961
emit_clear_image(job, image, framebuffer, aspects, layer, level);
962
cl_emit(rcl, END_OF_RENDERING, end);
963
}
964
965
static void
966
emit_fill_buffer_per_tile_list(struct v3dv_job *job,
967
struct v3dv_bo *bo,
968
uint32_t offset,
969
uint32_t stride)
970
{
971
struct v3dv_cl *cl = &job->indirect;
972
v3dv_cl_ensure_space(cl, 200, 1);
973
v3dv_return_if_oom(NULL, job);
974
975
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
976
977
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
978
979
cl_emit(cl, END_OF_LOADS, end);
980
981
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
982
983
emit_linear_store(cl, RENDER_TARGET_0, bo, offset, stride, false,
984
V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI);
985
986
cl_emit(cl, END_OF_TILE_MARKER, end);
987
988
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
989
990
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
991
branch.start = tile_list_start;
992
branch.end = v3dv_cl_get_address(cl);
993
}
994
}
995
996
static void
997
emit_fill_buffer(struct v3dv_job *job,
998
struct v3dv_bo *bo,
999
uint32_t offset,
1000
struct framebuffer_data *framebuffer)
1001
{
1002
const uint32_t stride = job->frame_tiling.width * 4;
1003
emit_fill_buffer_per_tile_list(job, bo, offset, stride);
1004
emit_supertile_coordinates(job, framebuffer);
1005
}
1006
1007
void
1008
v3dX(job_emit_fill_buffer_rcl)(struct v3dv_job *job,
1009
struct v3dv_bo *bo,
1010
uint32_t offset,
1011
struct framebuffer_data *framebuffer,
1012
uint32_t data)
1013
{
1014
const union v3dv_clear_value clear_value = {
1015
.color = { data, 0, 0, 0 },
1016
};
1017
1018
const struct rcl_clear_info clear_info = {
1019
.clear_value = &clear_value,
1020
.image = NULL,
1021
.aspects = VK_IMAGE_ASPECT_COLOR_BIT,
1022
.layer = 0,
1023
.level = 0,
1024
};
1025
1026
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, &clear_info);
1027
v3dv_return_if_oom(NULL, job);
1028
1029
emit_frame_setup(job, 0, &clear_value);
1030
emit_fill_buffer(job, bo, offset, framebuffer);
1031
cl_emit(rcl, END_OF_RENDERING, end);
1032
}
1033
1034
1035
static void
1036
emit_copy_buffer_to_layer_per_tile_list(struct v3dv_job *job,
1037
struct framebuffer_data *framebuffer,
1038
struct v3dv_image *image,
1039
struct v3dv_buffer *buffer,
1040
uint32_t layer,
1041
const VkBufferImageCopy2KHR *region)
1042
{
1043
struct v3dv_cl *cl = &job->indirect;
1044
v3dv_cl_ensure_space(cl, 200, 1);
1045
v3dv_return_if_oom(NULL, job);
1046
1047
struct v3dv_cl_reloc tile_list_start = v3dv_cl_get_address(cl);
1048
1049
cl_emit(cl, TILE_COORDINATES_IMPLICIT, coords);
1050
1051
const VkImageSubresourceLayers *imgrsc = &region->imageSubresource;
1052
assert((image->type != VK_IMAGE_TYPE_3D && layer < imgrsc->layerCount) ||
1053
layer < image->extent.depth);
1054
1055
/* Load TLB from buffer */
1056
uint32_t width, height;
1057
if (region->bufferRowLength == 0)
1058
width = region->imageExtent.width;
1059
else
1060
width = region->bufferRowLength;
1061
1062
if (region->bufferImageHeight == 0)
1063
height = region->imageExtent.height;
1064
else
1065
height = region->bufferImageHeight;
1066
1067
/* Handle copy to compressed format using a compatible format */
1068
width = DIV_ROUND_UP(width, vk_format_get_blockwidth(image->vk_format));
1069
height = DIV_ROUND_UP(height, vk_format_get_blockheight(image->vk_format));
1070
1071
uint32_t cpp = imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT ?
1072
1 : image->cpp;
1073
uint32_t buffer_stride = width * cpp;
1074
uint32_t buffer_offset =
1075
buffer->mem_offset + region->bufferOffset + height * buffer_stride * layer;
1076
1077
uint32_t format = choose_tlb_format(framebuffer, imgrsc->aspectMask,
1078
false, false, true);
1079
1080
emit_linear_load(cl, RENDER_TARGET_0, buffer->mem->bo,
1081
buffer_offset, buffer_stride, format);
1082
1083
/* Because we can't do raster loads/stores of Z/S formats we need to
1084
* use a color tile buffer with a compatible RGBA color format instead.
1085
* However, when we are uploading a single aspect to a combined
1086
* depth/stencil image we have the problem that our tile buffer stores don't
1087
* allow us to mask out the other aspect, so we always write all four RGBA
1088
* channels to the image and we end up overwriting that other aspect with
1089
* undefined values. To work around that, we first load the aspect we are
1090
* not copying from the image memory into a proper Z/S tile buffer. Then we
1091
* do our store from the color buffer for the aspect we are copying, and
1092
* after that, we do another store from the Z/S tile buffer to restore the
1093
* other aspect to its original value.
1094
*/
1095
if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1096
if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1097
emit_image_load(job->device, cl, framebuffer, image,
1098
VK_IMAGE_ASPECT_STENCIL_BIT,
1099
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1100
false, false);
1101
} else {
1102
assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1103
emit_image_load(job->device, cl, framebuffer, image,
1104
VK_IMAGE_ASPECT_DEPTH_BIT,
1105
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1106
false, false);
1107
}
1108
}
1109
1110
cl_emit(cl, END_OF_LOADS, end);
1111
1112
cl_emit(cl, BRANCH_TO_IMPLICIT_TILE_LIST, branch);
1113
1114
/* Store TLB to image */
1115
emit_image_store(job->device, cl, framebuffer, image, imgrsc->aspectMask,
1116
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1117
false, true, false);
1118
1119
if (framebuffer->vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
1120
if (imgrsc->aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
1121
emit_image_store(job->device, cl, framebuffer, image,
1122
VK_IMAGE_ASPECT_STENCIL_BIT,
1123
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1124
false, false, false);
1125
} else {
1126
assert(imgrsc->aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT);
1127
emit_image_store(job->device, cl, framebuffer, image,
1128
VK_IMAGE_ASPECT_DEPTH_BIT,
1129
imgrsc->baseArrayLayer + layer, imgrsc->mipLevel,
1130
false, false, false);
1131
}
1132
}
1133
1134
cl_emit(cl, END_OF_TILE_MARKER, end);
1135
1136
cl_emit(cl, RETURN_FROM_SUB_LIST, ret);
1137
1138
cl_emit(&job->rcl, START_ADDRESS_OF_GENERIC_TILE_LIST, branch) {
1139
branch.start = tile_list_start;
1140
branch.end = v3dv_cl_get_address(cl);
1141
}
1142
}
1143
1144
static void
1145
emit_copy_buffer_to_layer(struct v3dv_job *job,
1146
struct v3dv_image *image,
1147
struct v3dv_buffer *buffer,
1148
struct framebuffer_data *framebuffer,
1149
uint32_t layer,
1150
const VkBufferImageCopy2KHR *region)
1151
{
1152
emit_frame_setup(job, layer, NULL);
1153
emit_copy_buffer_to_layer_per_tile_list(job, framebuffer, image, buffer,
1154
layer, region);
1155
emit_supertile_coordinates(job, framebuffer);
1156
}
1157
1158
void
1159
v3dX(job_emit_copy_buffer_to_image_rcl)(struct v3dv_job *job,
1160
struct v3dv_image *image,
1161
struct v3dv_buffer *buffer,
1162
struct framebuffer_data *framebuffer,
1163
const VkBufferImageCopy2KHR *region)
1164
{
1165
struct v3dv_cl *rcl = emit_rcl_prologue(job, framebuffer, NULL);
1166
v3dv_return_if_oom(NULL, job);
1167
1168
for (int layer = 0; layer < job->frame_tiling.layers; layer++)
1169
emit_copy_buffer_to_layer(job, image, buffer, framebuffer, layer, region);
1170
cl_emit(rcl, END_OF_RENDERING, end);
1171
}
1172
1173
/* Figure out a TLB size configuration for a number of pixels to process.
1174
* Beware that we can't "render" more than 4096x4096 pixels in a single job,
1175
* if the pixel count is larger than this, the caller might need to split
1176
* the job and call this function multiple times.
1177
*/
1178
static void
1179
framebuffer_size_for_pixel_count(uint32_t num_pixels,
1180
uint32_t *width,
1181
uint32_t *height)
1182
{
1183
assert(num_pixels > 0);
1184
1185
const uint32_t max_dim_pixels = 4096;
1186
const uint32_t max_pixels = max_dim_pixels * max_dim_pixels;
1187
1188
uint32_t w, h;
1189
if (num_pixels > max_pixels) {
1190
w = max_dim_pixels;
1191
h = max_dim_pixels;
1192
} else {
1193
w = num_pixels;
1194
h = 1;
1195
while (w > max_dim_pixels || ((w % 2) == 0 && w > 2 * h)) {
1196
w >>= 1;
1197
h <<= 1;
1198
}
1199
}
1200
assert(w <= max_dim_pixels && h <= max_dim_pixels);
1201
assert(w * h <= num_pixels);
1202
assert(w > 0 && h > 0);
1203
1204
*width = w;
1205
*height = h;
1206
}
1207
1208
struct v3dv_job *
1209
v3dX(cmd_buffer_copy_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1210
struct v3dv_bo *dst,
1211
uint32_t dst_offset,
1212
struct v3dv_bo *src,
1213
uint32_t src_offset,
1214
const VkBufferCopy2KHR *region)
1215
{
1216
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1217
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1218
1219
/* Select appropriate pixel format for the copy operation based on the
1220
* size to copy and the alignment of the source and destination offsets.
1221
*/
1222
src_offset += region->srcOffset;
1223
dst_offset += region->dstOffset;
1224
uint32_t item_size = 4;
1225
while (item_size > 1 &&
1226
(src_offset % item_size != 0 || dst_offset % item_size != 0)) {
1227
item_size /= 2;
1228
}
1229
1230
while (item_size > 1 && region->size % item_size != 0)
1231
item_size /= 2;
1232
1233
assert(region->size % item_size == 0);
1234
uint32_t num_items = region->size / item_size;
1235
assert(num_items > 0);
1236
1237
uint32_t format;
1238
VkFormat vk_format;
1239
switch (item_size) {
1240
case 4:
1241
format = V3D_OUTPUT_IMAGE_FORMAT_RGBA8UI;
1242
vk_format = VK_FORMAT_R8G8B8A8_UINT;
1243
break;
1244
case 2:
1245
format = V3D_OUTPUT_IMAGE_FORMAT_RG8UI;
1246
vk_format = VK_FORMAT_R8G8_UINT;
1247
break;
1248
default:
1249
format = V3D_OUTPUT_IMAGE_FORMAT_R8UI;
1250
vk_format = VK_FORMAT_R8_UINT;
1251
break;
1252
}
1253
1254
struct v3dv_job *job = NULL;
1255
while (num_items > 0) {
1256
job = v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1257
if (!job)
1258
return NULL;
1259
1260
uint32_t width, height;
1261
framebuffer_size_for_pixel_count(num_items, &width, &height);
1262
1263
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
1264
1265
struct framebuffer_data framebuffer;
1266
v3dX(setup_framebuffer_data)(&framebuffer, vk_format, internal_type,
1267
&job->frame_tiling);
1268
1269
v3dX(job_emit_binning_flush)(job);
1270
1271
v3dX(job_emit_copy_buffer_rcl)(job, dst, src, dst_offset, src_offset,
1272
&framebuffer, format, item_size);
1273
1274
v3dv_cmd_buffer_finish_job(cmd_buffer);
1275
1276
const uint32_t items_copied = width * height;
1277
const uint32_t bytes_copied = items_copied * item_size;
1278
num_items -= items_copied;
1279
src_offset += bytes_copied;
1280
dst_offset += bytes_copied;
1281
}
1282
1283
return job;
1284
}
1285
1286
void
1287
v3dX(cmd_buffer_fill_buffer)(struct v3dv_cmd_buffer *cmd_buffer,
1288
struct v3dv_bo *bo,
1289
uint32_t offset,
1290
uint32_t size,
1291
uint32_t data)
1292
{
1293
assert(size > 0 && size % 4 == 0);
1294
assert(offset + size <= bo->size);
1295
1296
const uint32_t internal_bpp = V3D_INTERNAL_BPP_32;
1297
const uint32_t internal_type = V3D_INTERNAL_TYPE_8UI;
1298
uint32_t num_items = size / 4;
1299
1300
while (num_items > 0) {
1301
struct v3dv_job *job =
1302
v3dv_cmd_buffer_start_job(cmd_buffer, -1, V3DV_JOB_TYPE_GPU_CL);
1303
if (!job)
1304
return;
1305
1306
uint32_t width, height;
1307
framebuffer_size_for_pixel_count(num_items, &width, &height);
1308
1309
v3dv_job_start_frame(job, width, height, 1, 1, internal_bpp, false);
1310
1311
struct framebuffer_data framebuffer;
1312
v3dX(setup_framebuffer_data)(&framebuffer, VK_FORMAT_R8G8B8A8_UINT,
1313
internal_type, &job->frame_tiling);
1314
1315
v3dX(job_emit_binning_flush)(job);
1316
1317
v3dX(job_emit_fill_buffer_rcl)(job, bo, offset, &framebuffer, data);
1318
1319
v3dv_cmd_buffer_finish_job(cmd_buffer);
1320
1321
const uint32_t items_copied = width * height;
1322
const uint32_t bytes_copied = items_copied * 4;
1323
num_items -= items_copied;
1324
offset += bytes_copied;
1325
}
1326
}
1327
1328
void
1329
v3dX(setup_framebuffer_data)(struct framebuffer_data *fb,
1330
VkFormat vk_format,
1331
uint32_t internal_type,
1332
const struct v3dv_frame_tiling *tiling)
1333
{
1334
fb->internal_type = internal_type;
1335
1336
/* Supertile coverage always starts at 0,0 */
1337
uint32_t supertile_w_in_pixels =
1338
tiling->tile_width * tiling->supertile_width;
1339
uint32_t supertile_h_in_pixels =
1340
tiling->tile_height * tiling->supertile_height;
1341
1342
fb->min_x_supertile = 0;
1343
fb->min_y_supertile = 0;
1344
fb->max_x_supertile = (tiling->width - 1) / supertile_w_in_pixels;
1345
fb->max_y_supertile = (tiling->height - 1) / supertile_h_in_pixels;
1346
1347
fb->vk_format = vk_format;
1348
fb->format = v3dX(get_format)(vk_format);
1349
1350
fb->internal_depth_type = V3D_INTERNAL_TYPE_DEPTH_32F;
1351
if (vk_format_is_depth_or_stencil(vk_format))
1352
fb->internal_depth_type = v3dX(get_internal_depth_type)(vk_format);
1353
}
1354
1355