Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/freedreno/vulkan/tu_clear_blit.c
4565 views
1
/*
2
* Copyright 2019-2020 Valve Corporation
3
* SPDX-License-Identifier: MIT
4
*
5
* Authors:
6
* Jonathan Marek <[email protected]>
7
*/
8
9
#include "tu_private.h"
10
11
#include "tu_cs.h"
12
#include "vk_format.h"
13
14
#include "util/format_r11g11b10f.h"
15
#include "util/format_rgb9e5.h"
16
#include "util/format_srgb.h"
17
#include "util/half_float.h"
18
19
static uint32_t
20
tu_pack_float32_for_unorm(float val, int bits)
21
{
22
return _mesa_lroundevenf(CLAMP(val, 0.0f, 1.0f) * (float) ((1 << bits) - 1));
23
}
24
25
/* r2d_ = BLIT_OP_SCALE operations */
26
27
static enum a6xx_2d_ifmt
28
format_to_ifmt(VkFormat format)
29
{
30
if (format == VK_FORMAT_D24_UNORM_S8_UINT ||
31
format == VK_FORMAT_X8_D24_UNORM_PACK32)
32
return R2D_UNORM8;
33
34
/* get_component_bits doesn't work with depth/stencil formats: */
35
if (format == VK_FORMAT_D16_UNORM || format == VK_FORMAT_D32_SFLOAT)
36
return R2D_FLOAT32;
37
if (format == VK_FORMAT_S8_UINT)
38
return R2D_INT8;
39
40
/* use the size of the red channel to find the corresponding "ifmt" */
41
bool is_int = vk_format_is_int(format);
42
switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
43
case 4: case 5: case 8:
44
return is_int ? R2D_INT8 : R2D_UNORM8;
45
case 10: case 11:
46
return is_int ? R2D_INT16 : R2D_FLOAT16;
47
case 16:
48
if (vk_format_is_float(format))
49
return R2D_FLOAT16;
50
return is_int ? R2D_INT16 : R2D_FLOAT32;
51
case 32:
52
return is_int ? R2D_INT32 : R2D_FLOAT32;
53
default:
54
unreachable("bad format");
55
return 0;
56
}
57
}
58
59
static void
60
r2d_coords(struct tu_cs *cs,
61
const VkOffset2D *dst,
62
const VkOffset2D *src,
63
const VkExtent2D *extent)
64
{
65
tu_cs_emit_regs(cs,
66
A6XX_GRAS_2D_DST_TL(.x = dst->x, .y = dst->y),
67
A6XX_GRAS_2D_DST_BR(.x = dst->x + extent->width - 1, .y = dst->y + extent->height - 1));
68
69
if (!src)
70
return;
71
72
tu_cs_emit_regs(cs,
73
A6XX_GRAS_2D_SRC_TL_X(src->x),
74
A6XX_GRAS_2D_SRC_BR_X(src->x + extent->width - 1),
75
A6XX_GRAS_2D_SRC_TL_Y(src->y),
76
A6XX_GRAS_2D_SRC_BR_Y(src->y + extent->height - 1));
77
}
78
79
static void
80
r2d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
81
{
82
uint32_t clear_value[4] = {};
83
84
switch (format) {
85
case VK_FORMAT_X8_D24_UNORM_PACK32:
86
case VK_FORMAT_D24_UNORM_S8_UINT:
87
/* cleared as r8g8b8a8_unorm using special format */
88
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24);
89
clear_value[1] = clear_value[0] >> 8;
90
clear_value[2] = clear_value[0] >> 16;
91
clear_value[3] = val->depthStencil.stencil;
92
break;
93
case VK_FORMAT_D16_UNORM:
94
case VK_FORMAT_D32_SFLOAT:
95
/* R2D_FLOAT32 */
96
clear_value[0] = fui(val->depthStencil.depth);
97
break;
98
case VK_FORMAT_S8_UINT:
99
clear_value[0] = val->depthStencil.stencil;
100
break;
101
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
102
/* cleared as UINT32 */
103
clear_value[0] = float3_to_rgb9e5(val->color.float32);
104
break;
105
default:
106
assert(!vk_format_is_depth_or_stencil(format));
107
const struct util_format_description *desc = vk_format_description(format);
108
enum a6xx_2d_ifmt ifmt = format_to_ifmt(format);
109
110
assert(desc && (desc->layout == UTIL_FORMAT_LAYOUT_PLAIN ||
111
format == VK_FORMAT_B10G11R11_UFLOAT_PACK32));
112
113
for (unsigned i = 0; i < desc->nr_channels; i++) {
114
const struct util_format_channel_description *ch = &desc->channel[i];
115
if (ifmt == R2D_UNORM8) {
116
float linear = val->color.float32[i];
117
if (desc->colorspace == UTIL_FORMAT_COLORSPACE_SRGB && i < 3)
118
linear = util_format_linear_to_srgb_float(val->color.float32[i]);
119
120
if (ch->type == UTIL_FORMAT_TYPE_SIGNED)
121
clear_value[i] = _mesa_lroundevenf(CLAMP(linear, -1.0f, 1.0f) * 127.0f);
122
else
123
clear_value[i] = tu_pack_float32_for_unorm(linear, 8);
124
} else if (ifmt == R2D_FLOAT16) {
125
clear_value[i] = _mesa_float_to_half(val->color.float32[i]);
126
} else {
127
assert(ifmt == R2D_FLOAT32 || ifmt == R2D_INT32 ||
128
ifmt == R2D_INT16 || ifmt == R2D_INT8);
129
clear_value[i] = val->color.uint32[i];
130
}
131
}
132
break;
133
}
134
135
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_SRC_SOLID_C0, 4);
136
tu_cs_emit_array(cs, clear_value, 4);
137
}
138
139
static void
140
r2d_src(struct tu_cmd_buffer *cmd,
141
struct tu_cs *cs,
142
const struct tu_image_view *iview,
143
uint32_t layer,
144
VkFilter filter)
145
{
146
uint32_t src_info = iview->SP_PS_2D_SRC_INFO;
147
if (filter != VK_FILTER_NEAREST)
148
src_info |= A6XX_SP_PS_2D_SRC_INFO_FILTER;
149
150
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5);
151
tu_cs_emit(cs, src_info);
152
tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE);
153
tu_cs_image_ref_2d(cs, iview, layer, true);
154
155
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_FLAGS, 3);
156
tu_cs_image_flag_ref(cs, iview, layer);
157
}
158
159
static void
160
r2d_src_stencil(struct tu_cmd_buffer *cmd,
161
struct tu_cs *cs,
162
const struct tu_image_view *iview,
163
uint32_t layer,
164
VkFilter filter)
165
{
166
tu_cs_emit_pkt4(cs, REG_A6XX_SP_PS_2D_SRC_INFO, 5);
167
tu_cs_emit(cs, tu_image_view_stencil(iview, SP_PS_2D_SRC_INFO) & ~A6XX_SP_PS_2D_SRC_INFO_FLAGS);
168
tu_cs_emit(cs, iview->SP_PS_2D_SRC_SIZE);
169
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
170
/* SP_PS_2D_SRC_PITCH has shifted pitch field */
171
tu_cs_emit(cs, iview->stencil_PITCH << 9);
172
}
173
174
static void
175
r2d_src_buffer(struct tu_cmd_buffer *cmd,
176
struct tu_cs *cs,
177
VkFormat vk_format,
178
uint64_t va, uint32_t pitch,
179
uint32_t width, uint32_t height)
180
{
181
struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR);
182
183
tu_cs_emit_regs(cs,
184
A6XX_SP_PS_2D_SRC_INFO(
185
.color_format = format.fmt,
186
.color_swap = format.swap,
187
.srgb = vk_format_is_srgb(vk_format),
188
.unk20 = 1,
189
.unk22 = 1),
190
A6XX_SP_PS_2D_SRC_SIZE(.width = width, .height = height),
191
A6XX_SP_PS_2D_SRC(.qword = va),
192
A6XX_SP_PS_2D_SRC_PITCH(.pitch = pitch));
193
}
194
195
static void
196
r2d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
197
{
198
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
199
tu_cs_emit(cs, iview->RB_2D_DST_INFO);
200
tu_cs_image_ref_2d(cs, iview, layer, false);
201
202
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_FLAGS, 3);
203
tu_cs_image_flag_ref(cs, iview, layer);
204
}
205
206
static void
207
r2d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
208
{
209
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_DST_INFO, 4);
210
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_2D_DST_INFO) & ~A6XX_RB_2D_DST_INFO_FLAGS);
211
tu_cs_emit_qw(cs, iview->stencil_base_addr + iview->stencil_layer_size * layer);
212
tu_cs_emit(cs, iview->stencil_PITCH);
213
}
214
215
static void
216
r2d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
217
{
218
struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR);
219
220
tu_cs_emit_regs(cs,
221
A6XX_RB_2D_DST_INFO(
222
.color_format = format.fmt,
223
.color_swap = format.swap,
224
.srgb = vk_format_is_srgb(vk_format)),
225
A6XX_RB_2D_DST(.qword = va),
226
A6XX_RB_2D_DST_PITCH(pitch));
227
}
228
229
static void
230
r2d_setup_common(struct tu_cmd_buffer *cmd,
231
struct tu_cs *cs,
232
VkFormat vk_format,
233
VkImageAspectFlags aspect_mask,
234
unsigned blit_param,
235
bool clear,
236
bool ubwc,
237
bool scissor)
238
{
239
enum a6xx_format format = tu6_base_format(vk_format);
240
enum a6xx_2d_ifmt ifmt = format_to_ifmt(vk_format);
241
uint32_t unknown_8c01 = 0;
242
243
if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
244
vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) {
245
format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
246
}
247
248
/* note: the only format with partial clearing is D24S8 */
249
if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
250
/* preserve stencil channel */
251
if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
252
unknown_8c01 = 0x08000041;
253
/* preserve depth channels */
254
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
255
unknown_8c01 = 0x00084001;
256
}
257
258
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_UNKNOWN_8C01, 1);
259
tu_cs_emit(cs, unknown_8c01);
260
261
uint32_t blit_cntl = A6XX_RB_2D_BLIT_CNTL(
262
.scissor = scissor,
263
.rotate = blit_param,
264
.solid_color = clear,
265
.d24s8 = format == FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8 && !clear,
266
.color_format = format,
267
.mask = 0xf,
268
.ifmt = vk_format_is_srgb(vk_format) ? R2D_UNORM8_SRGB : ifmt,
269
).value;
270
271
tu_cs_emit_pkt4(cs, REG_A6XX_RB_2D_BLIT_CNTL, 1);
272
tu_cs_emit(cs, blit_cntl);
273
274
tu_cs_emit_pkt4(cs, REG_A6XX_GRAS_2D_BLIT_CNTL, 1);
275
tu_cs_emit(cs, blit_cntl);
276
277
if (format == FMT6_10_10_10_2_UNORM_DEST)
278
format = FMT6_16_16_16_16_FLOAT;
279
280
tu_cs_emit_regs(cs, A6XX_SP_2D_DST_FORMAT(
281
.sint = vk_format_is_sint(vk_format),
282
.uint = vk_format_is_uint(vk_format),
283
.color_format = format,
284
.srgb = vk_format_is_srgb(vk_format),
285
.mask = 0xf));
286
}
287
288
static void
289
r2d_setup(struct tu_cmd_buffer *cmd,
290
struct tu_cs *cs,
291
VkFormat vk_format,
292
VkImageAspectFlags aspect_mask,
293
unsigned blit_param,
294
bool clear,
295
bool ubwc)
296
{
297
tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
298
299
r2d_setup_common(cmd, cs, vk_format, aspect_mask, blit_param, clear, ubwc, false);
300
}
301
302
static void
303
r2d_teardown(struct tu_cmd_buffer *cmd,
304
struct tu_cs *cs)
305
{
306
/* nothing to do here */
307
}
308
309
static void
310
r2d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
311
{
312
tu_cs_emit_pkt7(cs, CP_BLIT, 1);
313
tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
314
}
315
316
/* r3d_ = shader path operations */
317
318
void
319
tu_init_clear_blit_shaders(struct tu6_global *global)
320
{
321
#define MOV(args...) { .cat1 = { .opc_cat = 1, .src_type = TYPE_S32, .dst_type = TYPE_S32, args } }
322
#define CAT2(op, args...) { .cat2 = { .opc_cat = 2, .opc = (op) & 63, .full = 1, args } }
323
#define CAT3(op, args...) { .cat3 = { .opc_cat = 3, .opc = (op) & 63, args } }
324
325
static const instr_t vs_code[] = {
326
/* r0.xyz = r0.w ? c1.xyz : c0.xyz
327
* r1.xy = r0.w ? c1.zw : c0.zw
328
* r1.z = c2.x (for z_scale path)
329
* r0.w = 1.0f
330
*/
331
CAT3(OPC_SEL_B32, .repeat = 2, .dst = 0,
332
.c1 = {.src1_c = 1, .src1 = 4}, .src1_r = 1,
333
.src2 = 3,
334
.c2 = {.src3_c = 1, .dummy = 1, .src3 = 0}),
335
CAT3(OPC_SEL_B32, .repeat = 1, .dst = 4,
336
.c1 = {.src1_c = 1, .src1 = 6}, .src1_r = 1,
337
.src2 = 3,
338
.c2 = {.src3_c = 1, .dummy = 1, .src3 = 2}),
339
MOV(.dst = 6, .src_c = 1, .src = 8 ),
340
MOV(.dst = 3, .src_im = 1, .fim_val = 1.0f ),
341
{ .cat0 = { .opc = OPC_END } },
342
};
343
344
static const instr_t fs_blit[] = {
345
/* " bary.f (ei)r63.x, 0, r0.x" note the blob doesn't have this in its
346
* blit path (its not clear what allows it to not have it)
347
*/
348
CAT2(OPC_BARY_F, .ei = 1, .full = 1, .dst = 63 * 4, .src1_im = 1),
349
{ .cat0 = { .opc = OPC_END } },
350
};
351
352
static const instr_t fs_blit_zscale[] = {
353
/* (rpt2)bary.f (ei)r0.x, (r)0, r0.x
354
* (rpt5)nop
355
* sam.3d (s32)(xyzw)r0.x, r0.x, s#0, t#0
356
*/
357
CAT2(OPC_BARY_F, .ei = 1, .full = 1, .dst = 0, .src1_im = 1, .src1 = 0, .repeat = 2, .src1_r = 1),
358
{ .cat0 = { .repeat = 5 } },
359
{ .cat5 = { .opc_cat = 5, .opc = OPC_SAM & 31, .dst = 0, .wrmask = 0xf, .type = TYPE_S32,
360
.is_3d = 1, .norm = { .full = 1, .src1 = 0 } } },
361
{ .cat0 = { .opc = OPC_END } },
362
};
363
364
memcpy(&global->shaders[GLOBAL_SH_VS], vs_code, sizeof(vs_code));
365
memcpy(&global->shaders[GLOBAL_SH_FS_BLIT], fs_blit, sizeof(fs_blit));
366
memcpy(&global->shaders[GLOBAL_SH_FS_BLIT_ZSCALE], fs_blit_zscale, sizeof(fs_blit_zscale));
367
368
for (uint32_t num_rts = 0; num_rts <= MAX_RTS; num_rts++) {
369
instr_t *code = global->shaders[GLOBAL_SH_FS_CLEAR0 + num_rts];
370
for (uint32_t i = 0; i < num_rts; i++) {
371
/* (rpt3)mov.s32s32 r0.x, (r)c[i].x */
372
*code++ = (instr_t) MOV(.repeat = 3, .dst = i * 4, .src_c = 1, .src_r = 1, .src = i * 4);
373
}
374
*code++ = (instr_t) { .cat0 = { .opc = OPC_END } };
375
}
376
}
377
378
static void
379
r3d_common(struct tu_cmd_buffer *cmd, struct tu_cs *cs, bool blit, uint32_t num_rts,
380
bool layered_clear, bool z_scale)
381
{
382
struct ir3_const_state dummy_const_state = {};
383
struct ir3_shader dummy_shader = {
384
.compiler = cmd->device->compiler,
385
};
386
387
struct ir3_shader_variant vs = {
388
.type = MESA_SHADER_VERTEX,
389
.instrlen = 1,
390
.constlen = 4,
391
.info.max_reg = 1,
392
.inputs_count = 1,
393
.inputs[0] = {
394
.slot = SYSTEM_VALUE_VERTEX_ID,
395
.regid = regid(0, 3),
396
.sysval = true,
397
},
398
.outputs_count = blit ? 2 : 1,
399
.outputs[0] = {
400
.slot = VARYING_SLOT_POS,
401
.regid = regid(0, 0),
402
},
403
.outputs[1] = {
404
.slot = VARYING_SLOT_VAR0,
405
.regid = regid(1, 0),
406
},
407
.shader = &dummy_shader,
408
.const_state = &dummy_const_state,
409
};
410
if (layered_clear) {
411
vs.outputs[1].slot = VARYING_SLOT_LAYER;
412
vs.outputs[1].regid = regid(1, 1);
413
vs.outputs_count = 2;
414
}
415
416
struct ir3_shader_variant fs = {
417
.type = MESA_SHADER_FRAGMENT,
418
.instrlen = 1, /* max of 9 instructions with num_rts = 8 */
419
.constlen = align(num_rts, 4),
420
.info.max_reg = MAX2(num_rts, 1) - 1,
421
.total_in = blit ? 2 : 0,
422
.num_samp = blit ? 1 : 0,
423
.inputs_count = blit ? 2 : 0,
424
.inputs[0] = {
425
.slot = VARYING_SLOT_VAR0,
426
.inloc = 0,
427
.compmask = 3,
428
.bary = true,
429
},
430
.inputs[1] = {
431
.slot = SYSTEM_VALUE_BARYCENTRIC_PERSP_PIXEL,
432
.regid = regid(0, 0),
433
.sysval = 1,
434
},
435
.num_sampler_prefetch = blit ? 1 : 0,
436
.sampler_prefetch[0] = {
437
.src = 0,
438
.wrmask = 0xf,
439
.cmd = 4,
440
},
441
.shader = &dummy_shader,
442
.const_state = &dummy_const_state,
443
};
444
445
enum global_shader fs_id = GLOBAL_SH_FS_BLIT;
446
447
if (!blit)
448
fs_id = GLOBAL_SH_FS_CLEAR0 + num_rts;
449
450
/* z_scale blit path has an extra varying and doesn't use prefetch */
451
if (z_scale) {
452
assert(blit);
453
fs.total_in = 3;
454
fs.num_sampler_prefetch = 0;
455
fs.inputs[0].compmask = 7;
456
fs_id = GLOBAL_SH_FS_BLIT_ZSCALE;
457
}
458
459
tu_cs_emit_regs(cs, A6XX_HLSQ_INVALIDATE_CMD(
460
.vs_state = true,
461
.hs_state = true,
462
.ds_state = true,
463
.gs_state = true,
464
.fs_state = true,
465
.cs_state = true,
466
.gfx_ibo = true,
467
.cs_ibo = true,
468
.gfx_shared_const = true,
469
.gfx_bindless = 0x1f,
470
.cs_bindless = 0x1f));
471
472
tu6_emit_xs_config(cs, MESA_SHADER_VERTEX, &vs);
473
tu6_emit_xs_config(cs, MESA_SHADER_TESS_CTRL, NULL);
474
tu6_emit_xs_config(cs, MESA_SHADER_TESS_EVAL, NULL);
475
tu6_emit_xs_config(cs, MESA_SHADER_GEOMETRY, NULL);
476
tu6_emit_xs_config(cs, MESA_SHADER_FRAGMENT, &fs);
477
478
struct tu_pvtmem_config pvtmem = {};
479
tu6_emit_xs(cs, MESA_SHADER_VERTEX, &vs, &pvtmem, global_iova(cmd, shaders[GLOBAL_SH_VS]));
480
tu6_emit_xs(cs, MESA_SHADER_FRAGMENT, &fs, &pvtmem, global_iova(cmd, shaders[fs_id]));
481
482
tu_cs_emit_regs(cs, A6XX_PC_PRIMITIVE_CNTL_0());
483
tu_cs_emit_regs(cs, A6XX_VFD_CONTROL_0());
484
485
if (cmd->device->physical_device->info->a6xx.has_cp_reg_write) {
486
/* Copy what the blob does here. This will emit an extra 0x3f
487
* CP_EVENT_WRITE when multiview is disabled. I'm not exactly sure what
488
* this is working around yet.
489
*/
490
tu_cs_emit_pkt7(cs, CP_REG_WRITE, 3);
491
tu_cs_emit(cs, CP_REG_WRITE_0_TRACKER(UNK_EVENT_WRITE));
492
tu_cs_emit(cs, REG_A6XX_PC_MULTIVIEW_CNTL);
493
tu_cs_emit(cs, 0);
494
} else {
495
tu_cs_emit_regs(cs, A6XX_PC_MULTIVIEW_CNTL());
496
}
497
tu_cs_emit_regs(cs, A6XX_VFD_MULTIVIEW_CNTL());
498
499
tu6_emit_vpc(cs, &vs, NULL, NULL, NULL, &fs, 0);
500
501
/* REPL_MODE for varying with RECTLIST (2 vertices only) */
502
tu_cs_emit_regs(cs, A6XX_VPC_VARYING_INTERP_MODE(0, 0));
503
tu_cs_emit_regs(cs, A6XX_VPC_VARYING_PS_REPL_MODE(0, 2 << 2 | 1 << 0));
504
505
tu6_emit_fs_inputs(cs, &fs);
506
507
tu_cs_emit_regs(cs,
508
A6XX_GRAS_CL_CNTL(
509
.persp_division_disable = 1,
510
.vp_xform_disable = 1,
511
.vp_clip_code_ignore = 1,
512
.clip_disable = 1));
513
tu_cs_emit_regs(cs, A6XX_GRAS_SU_CNTL()); // XXX msaa enable?
514
515
tu_cs_emit_regs(cs, A6XX_PC_RASTER_CNTL());
516
tu_cs_emit_regs(cs, A6XX_VPC_UNKNOWN_9107());
517
518
tu_cs_emit_regs(cs,
519
A6XX_GRAS_SC_VIEWPORT_SCISSOR_TL(0, .x = 0, .y = 0),
520
A6XX_GRAS_SC_VIEWPORT_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff));
521
tu_cs_emit_regs(cs,
522
A6XX_GRAS_SC_SCREEN_SCISSOR_TL(0, .x = 0, .y = 0),
523
A6XX_GRAS_SC_SCREEN_SCISSOR_BR(0, .x = 0x7fff, .y = 0x7fff));
524
525
tu_cs_emit_regs(cs,
526
A6XX_VFD_INDEX_OFFSET(),
527
A6XX_VFD_INSTANCE_START_OFFSET());
528
}
529
530
static void
531
r3d_coords_raw(struct tu_cs *cs, const float *coords)
532
{
533
tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 8);
534
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
535
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
536
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
537
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
538
CP_LOAD_STATE6_0_NUM_UNIT(2));
539
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
540
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
541
tu_cs_emit_array(cs, (const uint32_t *) coords, 8);
542
}
543
544
/* z coordinate for "z scale" blit path which uses a 3d texture */
545
static void
546
r3d_coord_z(struct tu_cs *cs, float z)
547
{
548
tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_GEOM, 3 + 4);
549
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(2) |
550
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
551
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
552
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_VS_SHADER) |
553
CP_LOAD_STATE6_0_NUM_UNIT(1));
554
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
555
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
556
tu_cs_emit(cs, fui(z));
557
tu_cs_emit(cs, 0);
558
tu_cs_emit(cs, 0);
559
tu_cs_emit(cs, 0);
560
}
561
562
static void
563
r3d_coords(struct tu_cs *cs,
564
const VkOffset2D *dst,
565
const VkOffset2D *src,
566
const VkExtent2D *extent)
567
{
568
int32_t src_x1 = src ? src->x : 0;
569
int32_t src_y1 = src ? src->y : 0;
570
r3d_coords_raw(cs, (float[]) {
571
dst->x, dst->y,
572
src_x1, src_y1,
573
dst->x + extent->width, dst->y + extent->height,
574
src_x1 + extent->width, src_y1 + extent->height,
575
});
576
}
577
578
static void
579
r3d_clear_value(struct tu_cs *cs, VkFormat format, const VkClearValue *val)
580
{
581
tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4);
582
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
583
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
584
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
585
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) |
586
CP_LOAD_STATE6_0_NUM_UNIT(1));
587
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
588
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
589
switch (format) {
590
case VK_FORMAT_X8_D24_UNORM_PACK32:
591
case VK_FORMAT_D24_UNORM_S8_UINT: {
592
/* cleared as r8g8b8a8_unorm using special format */
593
uint32_t tmp = tu_pack_float32_for_unorm(val->depthStencil.depth, 24);
594
tu_cs_emit(cs, fui((tmp & 0xff) / 255.0f));
595
tu_cs_emit(cs, fui((tmp >> 8 & 0xff) / 255.0f));
596
tu_cs_emit(cs, fui((tmp >> 16 & 0xff) / 255.0f));
597
tu_cs_emit(cs, fui((val->depthStencil.stencil & 0xff) / 255.0f));
598
} break;
599
case VK_FORMAT_D16_UNORM:
600
case VK_FORMAT_D32_SFLOAT:
601
tu_cs_emit(cs, fui(val->depthStencil.depth));
602
tu_cs_emit(cs, 0);
603
tu_cs_emit(cs, 0);
604
tu_cs_emit(cs, 0);
605
break;
606
case VK_FORMAT_S8_UINT:
607
tu_cs_emit(cs, val->depthStencil.stencil & 0xff);
608
tu_cs_emit(cs, 0);
609
tu_cs_emit(cs, 0);
610
tu_cs_emit(cs, 0);
611
break;
612
default:
613
/* as color formats use clear value as-is */
614
assert(!vk_format_is_depth_or_stencil(format));
615
tu_cs_emit_array(cs, val->color.uint32, 4);
616
break;
617
}
618
}
619
620
static void
621
r3d_src_common(struct tu_cmd_buffer *cmd,
622
struct tu_cs *cs,
623
const uint32_t *tex_const,
624
uint32_t offset_base,
625
uint32_t offset_ubwc,
626
VkFilter filter)
627
{
628
struct tu_cs_memory texture = { };
629
VkResult result = tu_cs_alloc(&cmd->sub_cs,
630
2, /* allocate space for a sampler too */
631
A6XX_TEX_CONST_DWORDS, &texture);
632
if (result != VK_SUCCESS) {
633
cmd->record_result = result;
634
return;
635
}
636
637
memcpy(texture.map, tex_const, A6XX_TEX_CONST_DWORDS * 4);
638
639
/* patch addresses for layer offset */
640
*(uint64_t*) (texture.map + 4) += offset_base;
641
uint64_t ubwc_addr = (texture.map[7] | (uint64_t) texture.map[8] << 32) + offset_ubwc;
642
texture.map[7] = ubwc_addr;
643
texture.map[8] = ubwc_addr >> 32;
644
645
texture.map[A6XX_TEX_CONST_DWORDS + 0] =
646
A6XX_TEX_SAMP_0_XY_MAG(tu6_tex_filter(filter, false)) |
647
A6XX_TEX_SAMP_0_XY_MIN(tu6_tex_filter(filter, false)) |
648
A6XX_TEX_SAMP_0_WRAP_S(A6XX_TEX_CLAMP_TO_EDGE) |
649
A6XX_TEX_SAMP_0_WRAP_T(A6XX_TEX_CLAMP_TO_EDGE) |
650
A6XX_TEX_SAMP_0_WRAP_R(A6XX_TEX_CLAMP_TO_EDGE) |
651
0x60000; /* XXX used by blob, doesn't seem necessary */
652
texture.map[A6XX_TEX_CONST_DWORDS + 1] =
653
0x1 | /* XXX used by blob, doesn't seem necessary */
654
A6XX_TEX_SAMP_1_UNNORM_COORDS |
655
A6XX_TEX_SAMP_1_MIPFILTER_LINEAR_FAR;
656
texture.map[A6XX_TEX_CONST_DWORDS + 2] = 0;
657
texture.map[A6XX_TEX_CONST_DWORDS + 3] = 0;
658
659
tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3);
660
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
661
CP_LOAD_STATE6_0_STATE_TYPE(ST6_SHADER) |
662
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
663
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) |
664
CP_LOAD_STATE6_0_NUM_UNIT(1));
665
tu_cs_emit_qw(cs, texture.iova + A6XX_TEX_CONST_DWORDS * 4);
666
667
tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_SAMP(.qword = texture.iova + A6XX_TEX_CONST_DWORDS * 4));
668
669
tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3);
670
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
671
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
672
CP_LOAD_STATE6_0_STATE_SRC(SS6_INDIRECT) |
673
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_TEX) |
674
CP_LOAD_STATE6_0_NUM_UNIT(1));
675
tu_cs_emit_qw(cs, texture.iova);
676
677
tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_CONST(.qword = texture.iova));
678
tu_cs_emit_regs(cs, A6XX_SP_FS_TEX_COUNT(1));
679
}
680
681
static void
682
r3d_src(struct tu_cmd_buffer *cmd,
683
struct tu_cs *cs,
684
const struct tu_image_view *iview,
685
uint32_t layer,
686
VkFilter filter)
687
{
688
r3d_src_common(cmd, cs, iview->descriptor,
689
iview->layer_size * layer,
690
iview->ubwc_layer_size * layer,
691
filter);
692
}
693
694
static void
695
r3d_src_buffer(struct tu_cmd_buffer *cmd,
696
struct tu_cs *cs,
697
VkFormat vk_format,
698
uint64_t va, uint32_t pitch,
699
uint32_t width, uint32_t height)
700
{
701
uint32_t desc[A6XX_TEX_CONST_DWORDS];
702
703
struct tu_native_format format = tu6_format_texture(vk_format, TILE6_LINEAR);
704
705
desc[0] =
706
COND(vk_format_is_srgb(vk_format), A6XX_TEX_CONST_0_SRGB) |
707
A6XX_TEX_CONST_0_FMT(format.fmt) |
708
A6XX_TEX_CONST_0_SWAP(format.swap) |
709
A6XX_TEX_CONST_0_SWIZ_X(A6XX_TEX_X) |
710
// XXX to swizzle into .w for stencil buffer_to_image
711
A6XX_TEX_CONST_0_SWIZ_Y(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Y) |
712
A6XX_TEX_CONST_0_SWIZ_Z(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_Z) |
713
A6XX_TEX_CONST_0_SWIZ_W(vk_format == VK_FORMAT_R8_UNORM ? A6XX_TEX_X : A6XX_TEX_W);
714
desc[1] = A6XX_TEX_CONST_1_WIDTH(width) | A6XX_TEX_CONST_1_HEIGHT(height);
715
desc[2] =
716
A6XX_TEX_CONST_2_PITCH(pitch) |
717
A6XX_TEX_CONST_2_TYPE(A6XX_TEX_2D);
718
desc[3] = 0;
719
desc[4] = va;
720
desc[5] = va >> 32;
721
for (uint32_t i = 6; i < A6XX_TEX_CONST_DWORDS; i++)
722
desc[i] = 0;
723
724
r3d_src_common(cmd, cs, desc, 0, 0, VK_FILTER_NEAREST);
725
}
726
727
static void
728
r3d_dst(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
729
{
730
tu6_emit_msaa(cs, iview->image->layout[0].nr_samples); /* TODO: move to setup */
731
732
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
733
tu_cs_emit(cs, iview->RB_MRT_BUF_INFO);
734
tu_cs_image_ref(cs, iview, layer);
735
tu_cs_emit(cs, 0);
736
737
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_FLAG_BUFFER(0), 3);
738
tu_cs_image_flag_ref(cs, iview, layer);
739
740
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL(.flag_mrts = iview->ubwc_enabled));
741
}
742
743
static void
744
r3d_dst_stencil(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer)
745
{
746
tu6_emit_msaa(cs, iview->image->layout[0].nr_samples); /* TODO: move to setup */
747
748
tu_cs_emit_pkt4(cs, REG_A6XX_RB_MRT_BUF_INFO(0), 6);
749
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_MRT_BUF_INFO));
750
tu_cs_image_stencil_ref(cs, iview, layer);
751
tu_cs_emit(cs, 0);
752
753
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
754
}
755
756
static void
757
r3d_dst_buffer(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch)
758
{
759
struct tu_native_format format = tu6_format_color(vk_format, TILE6_LINEAR);
760
761
tu6_emit_msaa(cs, 1); /* TODO: move to setup */
762
763
tu_cs_emit_regs(cs,
764
A6XX_RB_MRT_BUF_INFO(0, .color_format = format.fmt, .color_swap = format.swap),
765
A6XX_RB_MRT_PITCH(0, pitch),
766
A6XX_RB_MRT_ARRAY_PITCH(0, 0),
767
A6XX_RB_MRT_BASE(0, .qword = va),
768
A6XX_RB_MRT_BASE_GMEM(0, 0));
769
770
tu_cs_emit_regs(cs, A6XX_RB_RENDER_CNTL());
771
}
772
773
static uint8_t
774
aspect_write_mask(VkFormat vk_format, VkImageAspectFlags aspect_mask)
775
{
776
uint8_t mask = 0xf;
777
assert(aspect_mask);
778
/* note: the only format with partial writing is D24S8,
779
* clear/blit uses the _AS_R8G8B8A8 format to access it
780
*/
781
if (vk_format == VK_FORMAT_D24_UNORM_S8_UINT) {
782
if (aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT)
783
mask = 0x7;
784
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
785
mask = 0x8;
786
}
787
return mask;
788
}
789
790
static void
791
r3d_setup(struct tu_cmd_buffer *cmd,
792
struct tu_cs *cs,
793
VkFormat vk_format,
794
VkImageAspectFlags aspect_mask,
795
unsigned blit_param,
796
bool clear,
797
bool ubwc)
798
{
799
enum a6xx_format format = tu6_base_format(vk_format);
800
801
if ((vk_format == VK_FORMAT_D24_UNORM_S8_UINT ||
802
vk_format == VK_FORMAT_X8_D24_UNORM_PACK32) && ubwc) {
803
format = FMT6_Z24_UNORM_S8_UINT_AS_R8G8B8A8;
804
}
805
806
if (!cmd->state.pass) {
807
tu_emit_cache_flush_ccu(cmd, cs, TU_CMD_CCU_SYSMEM);
808
tu6_emit_window_scissor(cs, 0, 0, 0x3fff, 0x3fff);
809
}
810
811
tu_cs_emit_regs(cs, A6XX_GRAS_BIN_CONTROL(.dword = 0xc00000));
812
tu_cs_emit_regs(cs, A6XX_RB_BIN_CONTROL(.dword = 0xc00000));
813
814
r3d_common(cmd, cs, !clear, clear ? 1 : 0, false, blit_param);
815
816
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
817
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
818
A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
819
0xfc000000);
820
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(1));
821
822
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), 1);
823
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(0));
824
825
tu_cs_emit_regs(cs,
826
A6XX_RB_FS_OUTPUT_CNTL0(),
827
A6XX_RB_FS_OUTPUT_CNTL1(.mrt = 1));
828
829
tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL());
830
tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.sample_mask = 0xffff));
831
832
tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL());
833
tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL());
834
tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
835
tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL());
836
tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK());
837
tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK());
838
tu_cs_emit_regs(cs, A6XX_RB_STENCILREF());
839
840
tu_cs_emit_regs(cs, A6XX_RB_RENDER_COMPONENTS(.rt0 = 0xf));
841
tu_cs_emit_regs(cs, A6XX_SP_FS_RENDER_COMPONENTS(.rt0 = 0xf));
842
843
tu_cs_emit_regs(cs, A6XX_SP_FS_MRT_REG(0,
844
.color_format = format,
845
.color_sint = vk_format_is_sint(vk_format),
846
.color_uint = vk_format_is_uint(vk_format)));
847
848
tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(0,
849
.component_enable = aspect_write_mask(vk_format, aspect_mask)));
850
tu_cs_emit_regs(cs, A6XX_RB_SRGB_CNTL(vk_format_is_srgb(vk_format)));
851
tu_cs_emit_regs(cs, A6XX_SP_SRGB_CNTL(vk_format_is_srgb(vk_format)));
852
853
tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0));
854
tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0));
855
856
if (cmd->state.predication_active) {
857
tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
858
tu_cs_emit(cs, 0);
859
}
860
}
861
862
static void
863
r3d_run(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
864
{
865
tu_cs_emit_pkt7(cs, CP_DRAW_INDX_OFFSET, 3);
866
tu_cs_emit(cs, CP_DRAW_INDX_OFFSET_0_PRIM_TYPE(DI_PT_RECTLIST) |
867
CP_DRAW_INDX_OFFSET_0_SOURCE_SELECT(DI_SRC_SEL_AUTO_INDEX) |
868
CP_DRAW_INDX_OFFSET_0_VIS_CULL(IGNORE_VISIBILITY));
869
tu_cs_emit(cs, 1); /* instance count */
870
tu_cs_emit(cs, 2); /* vertex count */
871
}
872
873
static void
874
r3d_teardown(struct tu_cmd_buffer *cmd, struct tu_cs *cs)
875
{
876
if (cmd->state.predication_active) {
877
tu_cs_emit_pkt7(cs, CP_DRAW_PRED_ENABLE_LOCAL, 1);
878
tu_cs_emit(cs, 1);
879
}
880
}
881
882
/* blit ops - common interface for 2d/shader paths */
883
884
struct blit_ops {
885
void (*coords)(struct tu_cs *cs,
886
const VkOffset2D *dst,
887
const VkOffset2D *src,
888
const VkExtent2D *extent);
889
void (*clear_value)(struct tu_cs *cs, VkFormat format, const VkClearValue *val);
890
void (*src)(
891
struct tu_cmd_buffer *cmd,
892
struct tu_cs *cs,
893
const struct tu_image_view *iview,
894
uint32_t layer,
895
VkFilter filter);
896
void (*src_buffer)(struct tu_cmd_buffer *cmd, struct tu_cs *cs,
897
VkFormat vk_format,
898
uint64_t va, uint32_t pitch,
899
uint32_t width, uint32_t height);
900
void (*dst)(struct tu_cs *cs, const struct tu_image_view *iview, uint32_t layer);
901
void (*dst_buffer)(struct tu_cs *cs, VkFormat vk_format, uint64_t va, uint32_t pitch);
902
void (*setup)(struct tu_cmd_buffer *cmd,
903
struct tu_cs *cs,
904
VkFormat vk_format,
905
VkImageAspectFlags aspect_mask,
906
unsigned blit_param, /* CmdBlitImage: rotation in 2D path and z scaling in 3D path */
907
bool clear,
908
bool ubwc);
909
void (*run)(struct tu_cmd_buffer *cmd, struct tu_cs *cs);
910
void (*teardown)(struct tu_cmd_buffer *cmd,
911
struct tu_cs *cs);
912
};
913
914
static const struct blit_ops r2d_ops = {
915
.coords = r2d_coords,
916
.clear_value = r2d_clear_value,
917
.src = r2d_src,
918
.src_buffer = r2d_src_buffer,
919
.dst = r2d_dst,
920
.dst_buffer = r2d_dst_buffer,
921
.setup = r2d_setup,
922
.run = r2d_run,
923
.teardown = r2d_teardown,
924
};
925
926
static const struct blit_ops r3d_ops = {
927
.coords = r3d_coords,
928
.clear_value = r3d_clear_value,
929
.src = r3d_src,
930
.src_buffer = r3d_src_buffer,
931
.dst = r3d_dst,
932
.dst_buffer = r3d_dst_buffer,
933
.setup = r3d_setup,
934
.run = r3d_run,
935
.teardown = r3d_teardown,
936
};
937
938
/* passthrough set coords from 3D extents */
939
static void
940
coords(const struct blit_ops *ops,
941
struct tu_cs *cs,
942
const VkOffset3D *dst,
943
const VkOffset3D *src,
944
const VkExtent3D *extent)
945
{
946
ops->coords(cs, (const VkOffset2D*) dst, (const VkOffset2D*) src, (const VkExtent2D*) extent);
947
}
948
949
/* Decides the VK format to treat our data as for a memcpy-style blit. We have
950
* to be a bit careful because we have to pick a format with matching UBWC
951
* compression behavior, so no just returning R8_UINT/R16_UINT/R32_UINT for
952
* everything.
953
*/
954
static VkFormat
955
copy_format(VkFormat format, VkImageAspectFlags aspect_mask, bool copy_buffer)
956
{
957
if (vk_format_is_compressed(format)) {
958
switch (vk_format_get_blocksize(format)) {
959
case 1: return VK_FORMAT_R8_UINT;
960
case 2: return VK_FORMAT_R16_UINT;
961
case 4: return VK_FORMAT_R32_UINT;
962
case 8: return VK_FORMAT_R32G32_UINT;
963
case 16:return VK_FORMAT_R32G32B32A32_UINT;
964
default:
965
unreachable("unhandled format size");
966
}
967
}
968
969
switch (format) {
970
/* For SNORM formats, copy them as the equivalent UNORM format. If we treat
971
* them as snorm then the 0x80 (-1.0 snorm8) value will get clamped to 0x81
972
* (also -1.0), when we're supposed to be memcpying the bits. See
973
* https://gitlab.khronos.org/Tracker/vk-gl-cts/-/issues/2917 for discussion.
974
*/
975
case VK_FORMAT_R8_SNORM:
976
return VK_FORMAT_R8_UNORM;
977
case VK_FORMAT_R8G8_SNORM:
978
return VK_FORMAT_R8G8_UNORM;
979
case VK_FORMAT_R8G8B8_SNORM:
980
return VK_FORMAT_R8G8B8_UNORM;
981
case VK_FORMAT_B8G8R8_SNORM:
982
return VK_FORMAT_B8G8R8_UNORM;
983
case VK_FORMAT_R8G8B8A8_SNORM:
984
return VK_FORMAT_R8G8B8A8_UNORM;
985
case VK_FORMAT_B8G8R8A8_SNORM:
986
return VK_FORMAT_B8G8R8A8_UNORM;
987
case VK_FORMAT_A8B8G8R8_SNORM_PACK32:
988
return VK_FORMAT_A8B8G8R8_UNORM_PACK32;
989
case VK_FORMAT_A2R10G10B10_SNORM_PACK32:
990
return VK_FORMAT_A2R10G10B10_UNORM_PACK32;
991
case VK_FORMAT_A2B10G10R10_SNORM_PACK32:
992
return VK_FORMAT_A2B10G10R10_UNORM_PACK32;
993
case VK_FORMAT_R16_SNORM:
994
return VK_FORMAT_R16_UNORM;
995
case VK_FORMAT_R16G16_SNORM:
996
return VK_FORMAT_R16G16_UNORM;
997
case VK_FORMAT_R16G16B16_SNORM:
998
return VK_FORMAT_R16G16B16_UNORM;
999
case VK_FORMAT_R16G16B16A16_SNORM:
1000
return VK_FORMAT_R16G16B16A16_UNORM;
1001
1002
case VK_FORMAT_E5B9G9R9_UFLOAT_PACK32:
1003
return VK_FORMAT_R32_UINT;
1004
1005
case VK_FORMAT_G8_B8R8_2PLANE_420_UNORM:
1006
if (aspect_mask == VK_IMAGE_ASPECT_PLANE_1_BIT)
1007
return VK_FORMAT_R8G8_UNORM;
1008
else
1009
return VK_FORMAT_R8_UNORM;
1010
case VK_FORMAT_G8_B8_R8_3PLANE_420_UNORM:
1011
return VK_FORMAT_R8_UNORM;
1012
1013
case VK_FORMAT_D24_UNORM_S8_UINT:
1014
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT && copy_buffer)
1015
return VK_FORMAT_R8_UNORM;
1016
else
1017
return format;
1018
1019
case VK_FORMAT_D32_SFLOAT_S8_UINT:
1020
if (aspect_mask == VK_IMAGE_ASPECT_STENCIL_BIT)
1021
return VK_FORMAT_S8_UINT;
1022
assert(aspect_mask == VK_IMAGE_ASPECT_DEPTH_BIT);
1023
return VK_FORMAT_D32_SFLOAT;
1024
1025
default:
1026
return format;
1027
}
1028
}
1029
1030
void
1031
tu6_clear_lrz(struct tu_cmd_buffer *cmd,
1032
struct tu_cs *cs,
1033
struct tu_image *image,
1034
const VkClearValue *value)
1035
{
1036
const struct blit_ops *ops = &r2d_ops;
1037
1038
ops->setup(cmd, cs, VK_FORMAT_D16_UNORM, VK_IMAGE_ASPECT_DEPTH_BIT, 0, true, false);
1039
ops->clear_value(cs, VK_FORMAT_D16_UNORM, value);
1040
ops->dst_buffer(cs, VK_FORMAT_D16_UNORM,
1041
image->bo->iova + image->bo_offset + image->lrz_offset,
1042
image->lrz_pitch * 2);
1043
ops->coords(cs, &(VkOffset2D) {}, NULL, &(VkExtent2D) {image->lrz_pitch, image->lrz_height});
1044
ops->run(cmd, cs);
1045
ops->teardown(cmd, cs);
1046
}
1047
1048
static void
1049
tu_image_view_copy_blit(struct tu_image_view *iview,
1050
struct tu_image *image,
1051
VkFormat format,
1052
const VkImageSubresourceLayers *subres,
1053
uint32_t layer,
1054
bool stencil_read,
1055
bool z_scale)
1056
{
1057
VkImageAspectFlags aspect_mask = subres->aspectMask;
1058
1059
/* always use the AS_R8G8B8A8 format for these */
1060
if (format == VK_FORMAT_D24_UNORM_S8_UINT ||
1061
format == VK_FORMAT_X8_D24_UNORM_PACK32) {
1062
aspect_mask = VK_IMAGE_ASPECT_COLOR_BIT;
1063
}
1064
1065
tu_image_view_init(iview, &(VkImageViewCreateInfo) {
1066
.image = tu_image_to_handle(image),
1067
.viewType = z_scale ? VK_IMAGE_VIEW_TYPE_3D : VK_IMAGE_VIEW_TYPE_2D,
1068
.format = format,
1069
/* image_to_buffer from d24s8 with stencil aspect mask writes out to r8 */
1070
.components.r = stencil_read ? VK_COMPONENT_SWIZZLE_A : VK_COMPONENT_SWIZZLE_R,
1071
.subresourceRange = {
1072
.aspectMask = aspect_mask,
1073
.baseMipLevel = subres->mipLevel,
1074
.levelCount = 1,
1075
.baseArrayLayer = subres->baseArrayLayer + layer,
1076
.layerCount = 1,
1077
},
1078
}, false);
1079
}
1080
1081
static void
1082
tu_image_view_copy(struct tu_image_view *iview,
1083
struct tu_image *image,
1084
VkFormat format,
1085
const VkImageSubresourceLayers *subres,
1086
uint32_t layer,
1087
bool stencil_read)
1088
{
1089
format = copy_format(format, subres->aspectMask, false);
1090
tu_image_view_copy_blit(iview, image, format, subres, layer, stencil_read, false);
1091
}
1092
1093
static void
1094
tu_image_view_blit(struct tu_image_view *iview,
1095
struct tu_image *image,
1096
const VkImageSubresourceLayers *subres,
1097
uint32_t layer)
1098
{
1099
tu_image_view_copy_blit(iview, image, image->vk_format, subres, layer, false, false);
1100
}
1101
1102
static void
1103
tu6_blit_image(struct tu_cmd_buffer *cmd,
1104
struct tu_image *src_image,
1105
struct tu_image *dst_image,
1106
const VkImageBlit *info,
1107
VkFilter filter)
1108
{
1109
const struct blit_ops *ops = &r2d_ops;
1110
struct tu_cs *cs = &cmd->cs;
1111
bool z_scale = false;
1112
uint32_t layers = info->dstOffsets[1].z - info->dstOffsets[0].z;
1113
1114
/* 2D blit can't do rotation mirroring from just coordinates */
1115
static const enum a6xx_rotation rotate[2][2] = {
1116
{ROTATE_0, ROTATE_HFLIP},
1117
{ROTATE_VFLIP, ROTATE_180},
1118
};
1119
1120
bool mirror_x = (info->srcOffsets[1].x < info->srcOffsets[0].x) !=
1121
(info->dstOffsets[1].x < info->dstOffsets[0].x);
1122
bool mirror_y = (info->srcOffsets[1].y < info->srcOffsets[0].y) !=
1123
(info->dstOffsets[1].y < info->dstOffsets[0].y);
1124
1125
int32_t src0_z = info->srcOffsets[0].z;
1126
int32_t src1_z = info->srcOffsets[1].z;
1127
1128
if ((info->srcOffsets[1].z - info->srcOffsets[0].z !=
1129
info->dstOffsets[1].z - info->dstOffsets[0].z) ||
1130
info->srcOffsets[1].z < info->srcOffsets[0].z) {
1131
z_scale = true;
1132
}
1133
1134
if (info->dstOffsets[1].z < info->dstOffsets[0].z) {
1135
layers = info->dstOffsets[0].z - info->dstOffsets[1].z;
1136
src0_z = info->srcOffsets[1].z;
1137
src1_z = info->srcOffsets[0].z;
1138
}
1139
1140
if (info->dstSubresource.layerCount > 1) {
1141
assert(layers <= 1);
1142
layers = info->dstSubresource.layerCount;
1143
}
1144
1145
/* BC1_RGB_* formats need to have their last components overriden with 1
1146
* when sampling, which is normally handled with the texture descriptor
1147
* swizzle. The 2d path can't handle that, so use the 3d path.
1148
*
1149
* TODO: we could use RB_2D_BLIT_CNTL::MASK to make these formats work with
1150
* the 2d path.
1151
*/
1152
1153
unsigned blit_param = rotate[mirror_y][mirror_x];
1154
if (dst_image->layout[0].nr_samples > 1 ||
1155
src_image->vk_format == VK_FORMAT_BC1_RGB_UNORM_BLOCK ||
1156
src_image->vk_format == VK_FORMAT_BC1_RGB_SRGB_BLOCK ||
1157
filter == VK_FILTER_CUBIC_EXT ||
1158
z_scale) {
1159
ops = &r3d_ops;
1160
blit_param = z_scale;
1161
}
1162
1163
/* use the right format in setup() for D32_S8
1164
* TODO: this probably should use a helper
1165
*/
1166
VkFormat format = dst_image->vk_format;
1167
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
1168
if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_DEPTH_BIT)
1169
format = VK_FORMAT_D32_SFLOAT;
1170
else if (info->dstSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT)
1171
format = VK_FORMAT_S8_UINT;
1172
else
1173
unreachable("unexpected D32_S8 aspect mask in blit_image");
1174
}
1175
1176
ops->setup(cmd, cs, format, info->dstSubresource.aspectMask,
1177
blit_param, false, dst_image->layout[0].ubwc);
1178
1179
if (ops == &r3d_ops) {
1180
r3d_coords_raw(cs, (float[]) {
1181
info->dstOffsets[0].x, info->dstOffsets[0].y,
1182
info->srcOffsets[0].x, info->srcOffsets[0].y,
1183
info->dstOffsets[1].x, info->dstOffsets[1].y,
1184
info->srcOffsets[1].x, info->srcOffsets[1].y
1185
});
1186
} else {
1187
tu_cs_emit_regs(cs,
1188
A6XX_GRAS_2D_DST_TL(.x = MIN2(info->dstOffsets[0].x, info->dstOffsets[1].x),
1189
.y = MIN2(info->dstOffsets[0].y, info->dstOffsets[1].y)),
1190
A6XX_GRAS_2D_DST_BR(.x = MAX2(info->dstOffsets[0].x, info->dstOffsets[1].x) - 1,
1191
.y = MAX2(info->dstOffsets[0].y, info->dstOffsets[1].y) - 1));
1192
tu_cs_emit_regs(cs,
1193
A6XX_GRAS_2D_SRC_TL_X(MIN2(info->srcOffsets[0].x, info->srcOffsets[1].x)),
1194
A6XX_GRAS_2D_SRC_BR_X(MAX2(info->srcOffsets[0].x, info->srcOffsets[1].x) - 1),
1195
A6XX_GRAS_2D_SRC_TL_Y(MIN2(info->srcOffsets[0].y, info->srcOffsets[1].y)),
1196
A6XX_GRAS_2D_SRC_BR_Y(MAX2(info->srcOffsets[0].y, info->srcOffsets[1].y) - 1));
1197
}
1198
1199
struct tu_image_view dst, src;
1200
tu_image_view_blit(&dst, dst_image, &info->dstSubresource,
1201
MIN2(info->dstOffsets[0].z, info->dstOffsets[1].z));
1202
1203
if (z_scale) {
1204
tu_image_view_copy_blit(&src, src_image, src_image->vk_format,
1205
&info->srcSubresource, 0, false, true);
1206
ops->src(cmd, cs, &src, 0, filter);
1207
} else {
1208
tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffsets[0].z);
1209
}
1210
1211
for (uint32_t i = 0; i < layers; i++) {
1212
if (z_scale) {
1213
float t = ((float) i + 0.5f) / (float) layers;
1214
r3d_coord_z(cs, t * (src1_z - src0_z) + src0_z);
1215
} else {
1216
ops->src(cmd, cs, &src, i, filter);
1217
}
1218
ops->dst(cs, &dst, i);
1219
ops->run(cmd, cs);
1220
}
1221
1222
ops->teardown(cmd, cs);
1223
}
1224
1225
VKAPI_ATTR void VKAPI_CALL
1226
tu_CmdBlitImage(VkCommandBuffer commandBuffer,
1227
VkImage srcImage,
1228
VkImageLayout srcImageLayout,
1229
VkImage dstImage,
1230
VkImageLayout dstImageLayout,
1231
uint32_t regionCount,
1232
const VkImageBlit *pRegions,
1233
VkFilter filter)
1234
1235
{
1236
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1237
TU_FROM_HANDLE(tu_image, src_image, srcImage);
1238
TU_FROM_HANDLE(tu_image, dst_image, dstImage);
1239
1240
for (uint32_t i = 0; i < regionCount; ++i) {
1241
/* can't blit both depth and stencil at once with D32_S8
1242
* TODO: more advanced 3D blit path to support it instead?
1243
*/
1244
if (src_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT ||
1245
dst_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
1246
VkImageBlit region = pRegions[i];
1247
u_foreach_bit(b, pRegions[i].dstSubresource.aspectMask) {
1248
region.srcSubresource.aspectMask = BIT(b);
1249
region.dstSubresource.aspectMask = BIT(b);
1250
tu6_blit_image(cmd, src_image, dst_image, &region, filter);
1251
}
1252
continue;
1253
}
1254
tu6_blit_image(cmd, src_image, dst_image, pRegions + i, filter);
1255
}
1256
}
1257
1258
static void
1259
copy_compressed(VkFormat format,
1260
VkOffset3D *offset,
1261
VkExtent3D *extent,
1262
uint32_t *width,
1263
uint32_t *height)
1264
{
1265
if (!vk_format_is_compressed(format))
1266
return;
1267
1268
uint32_t block_width = vk_format_get_blockwidth(format);
1269
uint32_t block_height = vk_format_get_blockheight(format);
1270
1271
offset->x /= block_width;
1272
offset->y /= block_height;
1273
1274
if (extent) {
1275
extent->width = DIV_ROUND_UP(extent->width, block_width);
1276
extent->height = DIV_ROUND_UP(extent->height, block_height);
1277
}
1278
if (width)
1279
*width = DIV_ROUND_UP(*width, block_width);
1280
if (height)
1281
*height = DIV_ROUND_UP(*height, block_height);
1282
}
1283
1284
static void
1285
tu_copy_buffer_to_image(struct tu_cmd_buffer *cmd,
1286
struct tu_buffer *src_buffer,
1287
struct tu_image *dst_image,
1288
const VkBufferImageCopy *info)
1289
{
1290
struct tu_cs *cs = &cmd->cs;
1291
uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
1292
VkFormat src_format =
1293
copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, true);
1294
const struct blit_ops *ops = &r2d_ops;
1295
1296
/* special case for buffer to stencil */
1297
if (dst_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
1298
info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1299
ops = &r3d_ops;
1300
}
1301
1302
/* TODO: G8_B8R8_2PLANE_420_UNORM Y plane has different hardware format,
1303
* which matters for UBWC. buffer_to_image/etc can fail because of this
1304
*/
1305
1306
VkOffset3D offset = info->imageOffset;
1307
VkExtent3D extent = info->imageExtent;
1308
uint32_t src_width = info->bufferRowLength ?: extent.width;
1309
uint32_t src_height = info->bufferImageHeight ?: extent.height;
1310
1311
copy_compressed(dst_image->vk_format, &offset, &extent, &src_width, &src_height);
1312
1313
uint32_t pitch = src_width * vk_format_get_blocksize(src_format);
1314
uint32_t layer_size = src_height * pitch;
1315
1316
ops->setup(cmd, cs,
1317
copy_format(dst_image->vk_format, info->imageSubresource.aspectMask, false),
1318
info->imageSubresource.aspectMask, 0, false, dst_image->layout[0].ubwc);
1319
1320
struct tu_image_view dst;
1321
tu_image_view_copy(&dst, dst_image, dst_image->vk_format, &info->imageSubresource, offset.z, false);
1322
1323
for (uint32_t i = 0; i < layers; i++) {
1324
ops->dst(cs, &dst, i);
1325
1326
uint64_t src_va = tu_buffer_iova(src_buffer) + info->bufferOffset + layer_size * i;
1327
if ((src_va & 63) || (pitch & 63)) {
1328
for (uint32_t y = 0; y < extent.height; y++) {
1329
uint32_t x = (src_va & 63) / vk_format_get_blocksize(src_format);
1330
ops->src_buffer(cmd, cs, src_format, src_va & ~63, pitch,
1331
x + extent.width, 1);
1332
ops->coords(cs, &(VkOffset2D){offset.x, offset.y + y}, &(VkOffset2D){x},
1333
&(VkExtent2D) {extent.width, 1});
1334
ops->run(cmd, cs);
1335
src_va += pitch;
1336
}
1337
} else {
1338
ops->src_buffer(cmd, cs, src_format, src_va, pitch, extent.width, extent.height);
1339
coords(ops, cs, &offset, &(VkOffset3D){}, &extent);
1340
ops->run(cmd, cs);
1341
}
1342
}
1343
1344
ops->teardown(cmd, cs);
1345
}
1346
1347
VKAPI_ATTR void VKAPI_CALL
1348
tu_CmdCopyBufferToImage(VkCommandBuffer commandBuffer,
1349
VkBuffer srcBuffer,
1350
VkImage dstImage,
1351
VkImageLayout dstImageLayout,
1352
uint32_t regionCount,
1353
const VkBufferImageCopy *pRegions)
1354
{
1355
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1356
TU_FROM_HANDLE(tu_image, dst_image, dstImage);
1357
TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
1358
1359
for (unsigned i = 0; i < regionCount; ++i)
1360
tu_copy_buffer_to_image(cmd, src_buffer, dst_image, pRegions + i);
1361
}
1362
1363
static void
1364
tu_copy_image_to_buffer(struct tu_cmd_buffer *cmd,
1365
struct tu_image *src_image,
1366
struct tu_buffer *dst_buffer,
1367
const VkBufferImageCopy *info)
1368
{
1369
struct tu_cs *cs = &cmd->cs;
1370
uint32_t layers = MAX2(info->imageExtent.depth, info->imageSubresource.layerCount);
1371
VkFormat dst_format =
1372
copy_format(src_image->vk_format, info->imageSubresource.aspectMask, true);
1373
bool stencil_read = false;
1374
1375
if (src_image->vk_format == VK_FORMAT_D24_UNORM_S8_UINT &&
1376
info->imageSubresource.aspectMask == VK_IMAGE_ASPECT_STENCIL_BIT) {
1377
stencil_read = true;
1378
}
1379
1380
const struct blit_ops *ops = stencil_read ? &r3d_ops : &r2d_ops;
1381
VkOffset3D offset = info->imageOffset;
1382
VkExtent3D extent = info->imageExtent;
1383
uint32_t dst_width = info->bufferRowLength ?: extent.width;
1384
uint32_t dst_height = info->bufferImageHeight ?: extent.height;
1385
1386
copy_compressed(src_image->vk_format, &offset, &extent, &dst_width, &dst_height);
1387
1388
uint32_t pitch = dst_width * vk_format_get_blocksize(dst_format);
1389
uint32_t layer_size = pitch * dst_height;
1390
1391
ops->setup(cmd, cs, dst_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false);
1392
1393
struct tu_image_view src;
1394
tu_image_view_copy(&src, src_image, src_image->vk_format, &info->imageSubresource, offset.z, stencil_read);
1395
1396
for (uint32_t i = 0; i < layers; i++) {
1397
ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST);
1398
1399
uint64_t dst_va = tu_buffer_iova(dst_buffer) + info->bufferOffset + layer_size * i;
1400
if ((dst_va & 63) || (pitch & 63)) {
1401
for (uint32_t y = 0; y < extent.height; y++) {
1402
uint32_t x = (dst_va & 63) / vk_format_get_blocksize(dst_format);
1403
ops->dst_buffer(cs, dst_format, dst_va & ~63, 0);
1404
ops->coords(cs, &(VkOffset2D) {x}, &(VkOffset2D){offset.x, offset.y + y},
1405
&(VkExtent2D) {extent.width, 1});
1406
ops->run(cmd, cs);
1407
dst_va += pitch;
1408
}
1409
} else {
1410
ops->dst_buffer(cs, dst_format, dst_va, pitch);
1411
coords(ops, cs, &(VkOffset3D) {0, 0}, &offset, &extent);
1412
ops->run(cmd, cs);
1413
}
1414
}
1415
1416
ops->teardown(cmd, cs);
1417
}
1418
1419
VKAPI_ATTR void VKAPI_CALL
1420
tu_CmdCopyImageToBuffer(VkCommandBuffer commandBuffer,
1421
VkImage srcImage,
1422
VkImageLayout srcImageLayout,
1423
VkBuffer dstBuffer,
1424
uint32_t regionCount,
1425
const VkBufferImageCopy *pRegions)
1426
{
1427
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1428
TU_FROM_HANDLE(tu_image, src_image, srcImage);
1429
TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer);
1430
1431
for (unsigned i = 0; i < regionCount; ++i)
1432
tu_copy_image_to_buffer(cmd, src_image, dst_buffer, pRegions + i);
1433
}
1434
1435
/* Tiled formats don't support swapping, which means that we can't support
1436
* formats that require a non-WZYX swap like B8G8R8A8 natively. Also, some
1437
* formats like B5G5R5A1 have a separate linear-only format when sampling.
1438
* Currently we fake support for tiled swapped formats and use the unswapped
1439
* format instead, but this means that reinterpreting copies to and from
1440
* swapped formats can't be performed correctly unless we can swizzle the
1441
* components by reinterpreting the other image as the "correct" swapped
1442
* format, i.e. only when the other image is linear.
1443
*/
1444
1445
static bool
1446
is_swapped_format(VkFormat format)
1447
{
1448
struct tu_native_format linear = tu6_format_texture(format, TILE6_LINEAR);
1449
struct tu_native_format tiled = tu6_format_texture(format, TILE6_3);
1450
return linear.fmt != tiled.fmt || linear.swap != tiled.swap;
1451
}
1452
1453
/* R8G8_* formats have a different tiling layout than other cpp=2 formats, and
1454
* therefore R8G8 images can't be reinterpreted as non-R8G8 images (and vice
1455
* versa). This should mirror the logic in fdl6_layout.
1456
*/
1457
static bool
1458
image_is_r8g8(struct tu_image *image)
1459
{
1460
return image->layout[0].cpp == 2 &&
1461
vk_format_get_nr_components(image->vk_format) == 2;
1462
}
1463
1464
static void
1465
tu_copy_image_to_image(struct tu_cmd_buffer *cmd,
1466
struct tu_image *src_image,
1467
struct tu_image *dst_image,
1468
const VkImageCopy *info)
1469
{
1470
const struct blit_ops *ops = &r2d_ops;
1471
struct tu_cs *cs = &cmd->cs;
1472
1473
if (dst_image->layout[0].nr_samples > 1)
1474
ops = &r3d_ops;
1475
1476
VkFormat format = VK_FORMAT_UNDEFINED;
1477
VkOffset3D src_offset = info->srcOffset;
1478
VkOffset3D dst_offset = info->dstOffset;
1479
VkExtent3D extent = info->extent;
1480
uint32_t layers_to_copy = MAX2(info->extent.depth, info->srcSubresource.layerCount);
1481
1482
/* From the Vulkan 1.2.140 spec, section 19.3 "Copying Data Between
1483
* Images":
1484
*
1485
* When copying between compressed and uncompressed formats the extent
1486
* members represent the texel dimensions of the source image and not
1487
* the destination. When copying from a compressed image to an
1488
* uncompressed image the image texel dimensions written to the
1489
* uncompressed image will be source extent divided by the compressed
1490
* texel block dimensions. When copying from an uncompressed image to a
1491
* compressed image the image texel dimensions written to the compressed
1492
* image will be the source extent multiplied by the compressed texel
1493
* block dimensions.
1494
*
1495
* This means we only have to adjust the extent if the source image is
1496
* compressed.
1497
*/
1498
copy_compressed(src_image->vk_format, &src_offset, &extent, NULL, NULL);
1499
copy_compressed(dst_image->vk_format, &dst_offset, NULL, NULL, NULL);
1500
1501
VkFormat dst_format = copy_format(dst_image->vk_format, info->dstSubresource.aspectMask, false);
1502
VkFormat src_format = copy_format(src_image->vk_format, info->srcSubresource.aspectMask, false);
1503
1504
bool use_staging_blit = false;
1505
1506
if (src_format == dst_format) {
1507
/* Images that share a format can always be copied directly because it's
1508
* the same as a blit.
1509
*/
1510
format = src_format;
1511
} else if (!src_image->layout[0].tile_mode) {
1512
/* If an image is linear, we can always safely reinterpret it with the
1513
* other image's format and then do a regular blit.
1514
*/
1515
format = dst_format;
1516
} else if (!dst_image->layout[0].tile_mode) {
1517
format = src_format;
1518
} else if (image_is_r8g8(src_image) != image_is_r8g8(dst_image)) {
1519
/* We can't currently copy r8g8 images to/from other cpp=2 images,
1520
* due to the different tile layout.
1521
*/
1522
use_staging_blit = true;
1523
} else if (is_swapped_format(src_format) ||
1524
is_swapped_format(dst_format)) {
1525
/* If either format has a non-identity swap, then we can't copy
1526
* to/from it.
1527
*/
1528
use_staging_blit = true;
1529
} else if (!src_image->layout[0].ubwc) {
1530
format = dst_format;
1531
} else if (!dst_image->layout[0].ubwc) {
1532
format = src_format;
1533
} else {
1534
/* Both formats use UBWC and so neither can be reinterpreted.
1535
* TODO: We could do an in-place decompression of the dst instead.
1536
*/
1537
use_staging_blit = true;
1538
}
1539
1540
struct tu_image_view dst, src;
1541
1542
if (use_staging_blit) {
1543
tu_image_view_copy(&dst, dst_image, dst_format, &info->dstSubresource, dst_offset.z, false);
1544
tu_image_view_copy(&src, src_image, src_format, &info->srcSubresource, src_offset.z, false);
1545
1546
struct tu_image staging_image = {
1547
.vk_format = src_format,
1548
.level_count = 1,
1549
.layer_count = info->srcSubresource.layerCount,
1550
.bo_offset = 0,
1551
};
1552
1553
VkImageSubresourceLayers staging_subresource = {
1554
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
1555
.mipLevel = 0,
1556
.baseArrayLayer = 0,
1557
.layerCount = info->srcSubresource.layerCount,
1558
};
1559
1560
VkOffset3D staging_offset = { 0 };
1561
1562
staging_image.layout[0].tile_mode = TILE6_LINEAR;
1563
staging_image.layout[0].ubwc = false;
1564
1565
fdl6_layout(&staging_image.layout[0],
1566
vk_format_to_pipe_format(staging_image.vk_format),
1567
src_image->layout[0].nr_samples,
1568
extent.width,
1569
extent.height,
1570
extent.depth,
1571
staging_image.level_count,
1572
staging_image.layer_count,
1573
extent.depth > 1,
1574
NULL);
1575
1576
VkResult result = tu_get_scratch_bo(cmd->device,
1577
staging_image.layout[0].size,
1578
&staging_image.bo);
1579
if (result != VK_SUCCESS) {
1580
cmd->record_result = result;
1581
return;
1582
}
1583
1584
struct tu_image_view staging;
1585
tu_image_view_copy(&staging, &staging_image, src_format,
1586
&staging_subresource, 0, false);
1587
1588
ops->setup(cmd, cs, src_format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false);
1589
coords(ops, cs, &staging_offset, &src_offset, &extent);
1590
1591
for (uint32_t i = 0; i < layers_to_copy; i++) {
1592
ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST);
1593
ops->dst(cs, &staging, i);
1594
ops->run(cmd, cs);
1595
}
1596
1597
/* When executed by the user there has to be a pipeline barrier here,
1598
* but since we're doing it manually we'll have to flush ourselves.
1599
*/
1600
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
1601
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
1602
1603
tu_image_view_copy(&staging, &staging_image, dst_format,
1604
&staging_subresource, 0, false);
1605
1606
ops->setup(cmd, cs, dst_format, info->dstSubresource.aspectMask,
1607
0, false, dst_image->layout[0].ubwc);
1608
coords(ops, cs, &dst_offset, &staging_offset, &extent);
1609
1610
for (uint32_t i = 0; i < layers_to_copy; i++) {
1611
ops->src(cmd, cs, &staging, i, VK_FILTER_NEAREST);
1612
ops->dst(cs, &dst, i);
1613
ops->run(cmd, cs);
1614
}
1615
} else {
1616
tu_image_view_copy(&dst, dst_image, format, &info->dstSubresource, dst_offset.z, false);
1617
tu_image_view_copy(&src, src_image, format, &info->srcSubresource, src_offset.z, false);
1618
1619
ops->setup(cmd, cs, format, info->dstSubresource.aspectMask,
1620
0, false, dst_image->layout[0].ubwc);
1621
coords(ops, cs, &dst_offset, &src_offset, &extent);
1622
1623
for (uint32_t i = 0; i < layers_to_copy; i++) {
1624
ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST);
1625
ops->dst(cs, &dst, i);
1626
ops->run(cmd, cs);
1627
}
1628
}
1629
1630
ops->teardown(cmd, cs);
1631
}
1632
1633
VKAPI_ATTR void VKAPI_CALL
1634
tu_CmdCopyImage(VkCommandBuffer commandBuffer,
1635
VkImage srcImage,
1636
VkImageLayout srcImageLayout,
1637
VkImage destImage,
1638
VkImageLayout destImageLayout,
1639
uint32_t regionCount,
1640
const VkImageCopy *pRegions)
1641
{
1642
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1643
TU_FROM_HANDLE(tu_image, src_image, srcImage);
1644
TU_FROM_HANDLE(tu_image, dst_image, destImage);
1645
1646
for (uint32_t i = 0; i < regionCount; ++i) {
1647
if (src_image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
1648
VkImageCopy info = pRegions[i];
1649
u_foreach_bit(b, pRegions[i].dstSubresource.aspectMask) {
1650
info.srcSubresource.aspectMask = BIT(b);
1651
info.dstSubresource.aspectMask = BIT(b);
1652
tu_copy_image_to_image(cmd, src_image, dst_image, &info);
1653
}
1654
continue;
1655
}
1656
1657
tu_copy_image_to_image(cmd, src_image, dst_image, pRegions + i);
1658
}
1659
}
1660
1661
static void
1662
copy_buffer(struct tu_cmd_buffer *cmd,
1663
uint64_t dst_va,
1664
uint64_t src_va,
1665
uint64_t size,
1666
uint32_t block_size)
1667
{
1668
const struct blit_ops *ops = &r2d_ops;
1669
struct tu_cs *cs = &cmd->cs;
1670
VkFormat format = block_size == 4 ? VK_FORMAT_R32_UINT : VK_FORMAT_R8_UNORM;
1671
uint64_t blocks = size / block_size;
1672
1673
ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false, false);
1674
1675
while (blocks) {
1676
uint32_t src_x = (src_va & 63) / block_size;
1677
uint32_t dst_x = (dst_va & 63) / block_size;
1678
uint32_t width = MIN2(MIN2(blocks, 0x4000 - src_x), 0x4000 - dst_x);
1679
1680
ops->src_buffer(cmd, cs, format, src_va & ~63, 0, src_x + width, 1);
1681
ops->dst_buffer( cs, format, dst_va & ~63, 0);
1682
ops->coords(cs, &(VkOffset2D) {dst_x}, &(VkOffset2D) {src_x}, &(VkExtent2D) {width, 1});
1683
ops->run(cmd, cs);
1684
1685
src_va += width * block_size;
1686
dst_va += width * block_size;
1687
blocks -= width;
1688
}
1689
1690
ops->teardown(cmd, cs);
1691
}
1692
1693
VKAPI_ATTR void VKAPI_CALL
1694
tu_CmdCopyBuffer(VkCommandBuffer commandBuffer,
1695
VkBuffer srcBuffer,
1696
VkBuffer dstBuffer,
1697
uint32_t regionCount,
1698
const VkBufferCopy *pRegions)
1699
{
1700
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1701
TU_FROM_HANDLE(tu_buffer, src_buffer, srcBuffer);
1702
TU_FROM_HANDLE(tu_buffer, dst_buffer, dstBuffer);
1703
1704
for (unsigned i = 0; i < regionCount; ++i) {
1705
copy_buffer(cmd,
1706
tu_buffer_iova(dst_buffer) + pRegions[i].dstOffset,
1707
tu_buffer_iova(src_buffer) + pRegions[i].srcOffset,
1708
pRegions[i].size, 1);
1709
}
1710
}
1711
1712
VKAPI_ATTR void VKAPI_CALL
1713
tu_CmdUpdateBuffer(VkCommandBuffer commandBuffer,
1714
VkBuffer dstBuffer,
1715
VkDeviceSize dstOffset,
1716
VkDeviceSize dataSize,
1717
const void *pData)
1718
{
1719
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1720
TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
1721
1722
struct tu_cs_memory tmp;
1723
VkResult result = tu_cs_alloc(&cmd->sub_cs, DIV_ROUND_UP(dataSize, 64), 64 / 4, &tmp);
1724
if (result != VK_SUCCESS) {
1725
cmd->record_result = result;
1726
return;
1727
}
1728
1729
memcpy(tmp.map, pData, dataSize);
1730
copy_buffer(cmd, tu_buffer_iova(buffer) + dstOffset, tmp.iova, dataSize, 4);
1731
}
1732
1733
VKAPI_ATTR void VKAPI_CALL
1734
tu_CmdFillBuffer(VkCommandBuffer commandBuffer,
1735
VkBuffer dstBuffer,
1736
VkDeviceSize dstOffset,
1737
VkDeviceSize fillSize,
1738
uint32_t data)
1739
{
1740
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1741
TU_FROM_HANDLE(tu_buffer, buffer, dstBuffer);
1742
const struct blit_ops *ops = &r2d_ops;
1743
struct tu_cs *cs = &cmd->cs;
1744
1745
if (fillSize == VK_WHOLE_SIZE)
1746
fillSize = buffer->size - dstOffset;
1747
1748
uint64_t dst_va = tu_buffer_iova(buffer) + dstOffset;
1749
uint32_t blocks = fillSize / 4;
1750
1751
ops->setup(cmd, cs, VK_FORMAT_R32_UINT, VK_IMAGE_ASPECT_COLOR_BIT, 0, true, false);
1752
ops->clear_value(cs, VK_FORMAT_R32_UINT, &(VkClearValue){.color = {.uint32[0] = data}});
1753
1754
while (blocks) {
1755
uint32_t dst_x = (dst_va & 63) / 4;
1756
uint32_t width = MIN2(blocks, 0x4000 - dst_x);
1757
1758
ops->dst_buffer(cs, VK_FORMAT_R32_UINT, dst_va & ~63, 0);
1759
ops->coords(cs, &(VkOffset2D) {dst_x}, NULL, &(VkExtent2D) {width, 1});
1760
ops->run(cmd, cs);
1761
1762
dst_va += width * 4;
1763
blocks -= width;
1764
}
1765
1766
ops->teardown(cmd, cs);
1767
}
1768
1769
VKAPI_ATTR void VKAPI_CALL
1770
tu_CmdResolveImage(VkCommandBuffer commandBuffer,
1771
VkImage srcImage,
1772
VkImageLayout srcImageLayout,
1773
VkImage dstImage,
1774
VkImageLayout dstImageLayout,
1775
uint32_t regionCount,
1776
const VkImageResolve *pRegions)
1777
{
1778
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1779
TU_FROM_HANDLE(tu_image, src_image, srcImage);
1780
TU_FROM_HANDLE(tu_image, dst_image, dstImage);
1781
const struct blit_ops *ops = &r2d_ops;
1782
struct tu_cs *cs = &cmd->cs;
1783
1784
ops->setup(cmd, cs, dst_image->vk_format, VK_IMAGE_ASPECT_COLOR_BIT,
1785
0, false, dst_image->layout[0].ubwc);
1786
1787
for (uint32_t i = 0; i < regionCount; ++i) {
1788
const VkImageResolve *info = &pRegions[i];
1789
uint32_t layers = MAX2(info->extent.depth, info->dstSubresource.layerCount);
1790
1791
assert(info->srcSubresource.layerCount == info->dstSubresource.layerCount);
1792
/* TODO: aspect masks possible ? */
1793
1794
coords(ops, cs, &info->dstOffset, &info->srcOffset, &info->extent);
1795
1796
struct tu_image_view dst, src;
1797
tu_image_view_blit(&dst, dst_image, &info->dstSubresource, info->dstOffset.z);
1798
tu_image_view_blit(&src, src_image, &info->srcSubresource, info->srcOffset.z);
1799
1800
for (uint32_t i = 0; i < layers; i++) {
1801
ops->src(cmd, cs, &src, i, VK_FILTER_NEAREST);
1802
ops->dst(cs, &dst, i);
1803
ops->run(cmd, cs);
1804
}
1805
}
1806
1807
ops->teardown(cmd, cs);
1808
}
1809
1810
#define for_each_layer(layer, layer_mask, layers) \
1811
for (uint32_t layer = 0; \
1812
layer < ((layer_mask) ? (util_logbase2(layer_mask) + 1) : layers); \
1813
layer++) \
1814
if (!layer_mask || (layer_mask & BIT(layer)))
1815
1816
static void
1817
resolve_sysmem(struct tu_cmd_buffer *cmd,
1818
struct tu_cs *cs,
1819
VkFormat format,
1820
struct tu_image_view *src,
1821
struct tu_image_view *dst,
1822
uint32_t layer_mask,
1823
uint32_t layers,
1824
const VkRect2D *rect,
1825
bool separate_stencil)
1826
{
1827
const struct blit_ops *ops = &r2d_ops;
1828
1829
ops->setup(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT,
1830
0, false, dst->ubwc_enabled);
1831
ops->coords(cs, &rect->offset, &rect->offset, &rect->extent);
1832
1833
for_each_layer(i, layer_mask, layers) {
1834
if (separate_stencil) {
1835
r2d_src_stencil(cmd, cs, src, i, VK_FILTER_NEAREST);
1836
r2d_dst_stencil(cs, dst, i);
1837
} else {
1838
ops->src(cmd, cs, src, i, VK_FILTER_NEAREST);
1839
ops->dst(cs, dst, i);
1840
}
1841
ops->run(cmd, cs);
1842
}
1843
1844
ops->teardown(cmd, cs);
1845
}
1846
1847
void
1848
tu_resolve_sysmem(struct tu_cmd_buffer *cmd,
1849
struct tu_cs *cs,
1850
struct tu_image_view *src,
1851
struct tu_image_view *dst,
1852
uint32_t layer_mask,
1853
uint32_t layers,
1854
const VkRect2D *rect)
1855
{
1856
assert(src->image->vk_format == dst->image->vk_format);
1857
1858
if (dst->image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
1859
resolve_sysmem(cmd, cs, VK_FORMAT_D32_SFLOAT,
1860
src, dst, layer_mask, layers, rect, false);
1861
resolve_sysmem(cmd, cs, VK_FORMAT_S8_UINT,
1862
src, dst, layer_mask, layers, rect, true);
1863
} else {
1864
resolve_sysmem(cmd, cs, dst->image->vk_format,
1865
src, dst, layer_mask, layers, rect, false);
1866
}
1867
}
1868
1869
static void
1870
clear_image(struct tu_cmd_buffer *cmd,
1871
struct tu_image *image,
1872
const VkClearValue *clear_value,
1873
const VkImageSubresourceRange *range,
1874
VkImageAspectFlags aspect_mask)
1875
{
1876
uint32_t level_count = tu_get_levelCount(image, range);
1877
uint32_t layer_count = tu_get_layerCount(image, range);
1878
struct tu_cs *cs = &cmd->cs;
1879
VkFormat format = image->vk_format;
1880
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT || format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
1881
format = copy_format(format, aspect_mask, false);
1882
1883
if (image->layout[0].depth0 > 1) {
1884
assert(layer_count == 1);
1885
assert(range->baseArrayLayer == 0);
1886
}
1887
1888
const struct blit_ops *ops = image->layout[0].nr_samples > 1 ? &r3d_ops : &r2d_ops;
1889
1890
ops->setup(cmd, cs, format, aspect_mask, 0, true, image->layout[0].ubwc);
1891
if (image->vk_format == VK_FORMAT_E5B9G9R9_UFLOAT_PACK32)
1892
ops->clear_value(cs, VK_FORMAT_E5B9G9R9_UFLOAT_PACK32, clear_value);
1893
else
1894
ops->clear_value(cs, format, clear_value);
1895
1896
for (unsigned j = 0; j < level_count; j++) {
1897
if (image->layout[0].depth0 > 1)
1898
layer_count = u_minify(image->layout[0].depth0, range->baseMipLevel + j);
1899
1900
ops->coords(cs, &(VkOffset2D){}, NULL, &(VkExtent2D) {
1901
u_minify(image->layout[0].width0, range->baseMipLevel + j),
1902
u_minify(image->layout[0].height0, range->baseMipLevel + j)
1903
});
1904
1905
struct tu_image_view dst;
1906
tu_image_view_copy_blit(&dst, image, format, &(VkImageSubresourceLayers) {
1907
.aspectMask = aspect_mask,
1908
.mipLevel = range->baseMipLevel + j,
1909
.baseArrayLayer = range->baseArrayLayer,
1910
.layerCount = 1,
1911
}, 0, false, false);
1912
1913
for (uint32_t i = 0; i < layer_count; i++) {
1914
ops->dst(cs, &dst, i);
1915
ops->run(cmd, cs);
1916
}
1917
}
1918
1919
ops->teardown(cmd, cs);
1920
}
1921
1922
VKAPI_ATTR void VKAPI_CALL
1923
tu_CmdClearColorImage(VkCommandBuffer commandBuffer,
1924
VkImage image_h,
1925
VkImageLayout imageLayout,
1926
const VkClearColorValue *pColor,
1927
uint32_t rangeCount,
1928
const VkImageSubresourceRange *pRanges)
1929
{
1930
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1931
TU_FROM_HANDLE(tu_image, image, image_h);
1932
1933
for (unsigned i = 0; i < rangeCount; i++)
1934
clear_image(cmd, image, (const VkClearValue*) pColor, pRanges + i, VK_IMAGE_ASPECT_COLOR_BIT);
1935
}
1936
1937
VKAPI_ATTR void VKAPI_CALL
1938
tu_CmdClearDepthStencilImage(VkCommandBuffer commandBuffer,
1939
VkImage image_h,
1940
VkImageLayout imageLayout,
1941
const VkClearDepthStencilValue *pDepthStencil,
1942
uint32_t rangeCount,
1943
const VkImageSubresourceRange *pRanges)
1944
{
1945
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
1946
TU_FROM_HANDLE(tu_image, image, image_h);
1947
1948
for (unsigned i = 0; i < rangeCount; i++) {
1949
const VkImageSubresourceRange *range = &pRanges[i];
1950
1951
if (image->vk_format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
1952
/* can't clear both depth and stencil at once, split up the aspect mask */
1953
u_foreach_bit(b, range->aspectMask)
1954
clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, BIT(b));
1955
continue;
1956
}
1957
1958
clear_image(cmd, image, (const VkClearValue*) pDepthStencil, range, range->aspectMask);
1959
}
1960
}
1961
1962
static void
1963
tu_clear_sysmem_attachments(struct tu_cmd_buffer *cmd,
1964
uint32_t attachment_count,
1965
const VkClearAttachment *attachments,
1966
uint32_t rect_count,
1967
const VkClearRect *rects)
1968
{
1969
/* the shader path here is special, it avoids changing MRT/etc state */
1970
const struct tu_render_pass *pass = cmd->state.pass;
1971
const struct tu_subpass *subpass = cmd->state.subpass;
1972
const uint32_t mrt_count = subpass->color_count;
1973
struct tu_cs *cs = &cmd->draw_cs;
1974
uint32_t clear_value[MAX_RTS][4];
1975
float z_clear_val = 0.0f;
1976
uint8_t s_clear_val = 0;
1977
uint32_t clear_rts = 0, clear_components = 0, num_rts = 0;
1978
bool z_clear = false;
1979
bool s_clear = false;
1980
bool layered_clear = false;
1981
uint32_t max_samples = 1;
1982
1983
for (uint32_t i = 0; i < attachment_count; i++) {
1984
uint32_t a;
1985
if (attachments[i].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT) {
1986
uint32_t c = attachments[i].colorAttachment;
1987
a = subpass->color_attachments[c].attachment;
1988
if (a == VK_ATTACHMENT_UNUSED)
1989
continue;
1990
1991
clear_rts |= 1 << c;
1992
clear_components |= 0xf << (c * 4);
1993
memcpy(clear_value[c], &attachments[i].clearValue, 4 * sizeof(uint32_t));
1994
} else {
1995
a = subpass->depth_stencil_attachment.attachment;
1996
if (a == VK_ATTACHMENT_UNUSED)
1997
continue;
1998
1999
if (attachments[i].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) {
2000
z_clear = true;
2001
z_clear_val = attachments[i].clearValue.depthStencil.depth;
2002
}
2003
2004
if (attachments[i].aspectMask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2005
s_clear = true;
2006
s_clear_val = attachments[i].clearValue.depthStencil.stencil & 0xff;
2007
}
2008
}
2009
2010
max_samples = MAX2(max_samples, pass->attachments[a].samples);
2011
}
2012
2013
/* disable all draw states so they don't interfere
2014
* TODO: use and re-use draw states
2015
* we have to disable draw states individually to preserve
2016
* input attachment states, because a secondary command buffer
2017
* won't be able to restore them
2018
*/
2019
tu_cs_emit_pkt7(cs, CP_SET_DRAW_STATE, 3 * (TU_DRAW_STATE_COUNT - 2));
2020
for (uint32_t i = 0; i < TU_DRAW_STATE_COUNT; i++) {
2021
if (i == TU_DRAW_STATE_INPUT_ATTACHMENTS_GMEM ||
2022
i == TU_DRAW_STATE_INPUT_ATTACHMENTS_SYSMEM)
2023
continue;
2024
tu_cs_emit(cs, CP_SET_DRAW_STATE__0_GROUP_ID(i) |
2025
CP_SET_DRAW_STATE__0_DISABLE);
2026
tu_cs_emit_qw(cs, 0);
2027
}
2028
cmd->state.dirty |= TU_CMD_DIRTY_DRAW_STATE;
2029
2030
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_CNTL0, 2);
2031
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL0_DEPTH_REGID(0xfc) |
2032
A6XX_SP_FS_OUTPUT_CNTL0_SAMPMASK_REGID(0xfc) |
2033
0xfc000000);
2034
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_CNTL1_MRT(mrt_count));
2035
2036
tu_cs_emit_pkt4(cs, REG_A6XX_SP_FS_OUTPUT_REG(0), mrt_count);
2037
for (uint32_t i = 0; i < mrt_count; i++) {
2038
if (clear_rts & (1 << i))
2039
tu_cs_emit(cs, A6XX_SP_FS_OUTPUT_REG_REGID(num_rts++ * 4));
2040
else
2041
tu_cs_emit(cs, 0);
2042
}
2043
2044
for (uint32_t i = 0; i < rect_count; i++) {
2045
if (rects[i].baseArrayLayer || rects[i].layerCount > 1)
2046
layered_clear = true;
2047
}
2048
2049
/* a630 doesn't support multiview masks, which means that we can't use the
2050
* normal multiview path without potentially recompiling a shader on-demand
2051
* or using a more complicated variant that takes the mask as a const. Just
2052
* use the layered path instead, since it shouldn't be much worse.
2053
*/
2054
if (subpass->multiview_mask) {
2055
layered_clear = true;
2056
}
2057
2058
r3d_common(cmd, cs, false, num_rts, layered_clear, false);
2059
2060
tu_cs_emit_regs(cs,
2061
A6XX_SP_FS_RENDER_COMPONENTS(.dword = clear_components));
2062
tu_cs_emit_regs(cs,
2063
A6XX_RB_RENDER_COMPONENTS(.dword = clear_components));
2064
2065
tu_cs_emit_regs(cs,
2066
A6XX_RB_FS_OUTPUT_CNTL0(),
2067
A6XX_RB_FS_OUTPUT_CNTL1(.mrt = mrt_count));
2068
2069
tu_cs_emit_regs(cs, A6XX_SP_BLEND_CNTL());
2070
tu_cs_emit_regs(cs, A6XX_RB_BLEND_CNTL(.independent_blend = 1, .sample_mask = 0xffff));
2071
for (uint32_t i = 0; i < mrt_count; i++) {
2072
tu_cs_emit_regs(cs, A6XX_RB_MRT_CONTROL(i,
2073
.component_enable = COND(clear_rts & (1 << i), 0xf)));
2074
}
2075
2076
tu_cs_emit_regs(cs, A6XX_GRAS_LRZ_CNTL(0));
2077
tu_cs_emit_regs(cs, A6XX_RB_LRZ_CNTL(0));
2078
2079
tu_cs_emit_regs(cs, A6XX_RB_DEPTH_PLANE_CNTL());
2080
tu_cs_emit_regs(cs, A6XX_RB_DEPTH_CNTL(
2081
.z_enable = z_clear,
2082
.z_write_enable = z_clear,
2083
.zfunc = FUNC_ALWAYS));
2084
tu_cs_emit_regs(cs, A6XX_GRAS_SU_DEPTH_PLANE_CNTL());
2085
tu_cs_emit_regs(cs, A6XX_RB_STENCIL_CONTROL(
2086
.stencil_enable = s_clear,
2087
.func = FUNC_ALWAYS,
2088
.zpass = STENCIL_REPLACE));
2089
tu_cs_emit_regs(cs, A6XX_RB_STENCILMASK(.mask = 0xff));
2090
tu_cs_emit_regs(cs, A6XX_RB_STENCILWRMASK(.wrmask = 0xff));
2091
tu_cs_emit_regs(cs, A6XX_RB_STENCILREF(.ref = s_clear_val));
2092
2093
tu_cs_emit_pkt7(cs, CP_LOAD_STATE6_FRAG, 3 + 4 * num_rts);
2094
tu_cs_emit(cs, CP_LOAD_STATE6_0_DST_OFF(0) |
2095
CP_LOAD_STATE6_0_STATE_TYPE(ST6_CONSTANTS) |
2096
CP_LOAD_STATE6_0_STATE_SRC(SS6_DIRECT) |
2097
CP_LOAD_STATE6_0_STATE_BLOCK(SB6_FS_SHADER) |
2098
CP_LOAD_STATE6_0_NUM_UNIT(num_rts));
2099
tu_cs_emit(cs, CP_LOAD_STATE6_1_EXT_SRC_ADDR(0));
2100
tu_cs_emit(cs, CP_LOAD_STATE6_2_EXT_SRC_ADDR_HI(0));
2101
u_foreach_bit(b, clear_rts)
2102
tu_cs_emit_array(cs, clear_value[b], 4);
2103
2104
for (uint32_t i = 0; i < rect_count; i++) {
2105
/* This should be true because of this valid usage for
2106
* vkCmdClearAttachments:
2107
*
2108
* "If the render pass instance this is recorded in uses multiview,
2109
* then baseArrayLayer must be zero and layerCount must be one"
2110
*/
2111
assert(!subpass->multiview_mask || rects[i].baseArrayLayer == 0);
2112
2113
for_each_layer(layer, subpass->multiview_mask, rects[i].layerCount) {
2114
r3d_coords_raw(cs, (float[]) {
2115
rects[i].rect.offset.x, rects[i].rect.offset.y,
2116
z_clear_val, uif(rects[i].baseArrayLayer + layer),
2117
rects[i].rect.offset.x + rects[i].rect.extent.width,
2118
rects[i].rect.offset.y + rects[i].rect.extent.height,
2119
z_clear_val, 1.0f,
2120
});
2121
r3d_run(cmd, cs);
2122
}
2123
}
2124
}
2125
2126
static void
2127
pack_gmem_clear_value(const VkClearValue *val, VkFormat format, uint32_t clear_value[4])
2128
{
2129
switch (format) {
2130
case VK_FORMAT_X8_D24_UNORM_PACK32:
2131
case VK_FORMAT_D24_UNORM_S8_UINT:
2132
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 24) |
2133
val->depthStencil.stencil << 24;
2134
return;
2135
case VK_FORMAT_D16_UNORM:
2136
clear_value[0] = tu_pack_float32_for_unorm(val->depthStencil.depth, 16);
2137
return;
2138
case VK_FORMAT_D32_SFLOAT:
2139
clear_value[0] = fui(val->depthStencil.depth);
2140
return;
2141
case VK_FORMAT_S8_UINT:
2142
clear_value[0] = val->depthStencil.stencil;
2143
return;
2144
default:
2145
break;
2146
}
2147
2148
float tmp[4];
2149
memcpy(tmp, val->color.float32, 4 * sizeof(float));
2150
if (vk_format_is_srgb(format)) {
2151
for (int i = 0; i < 3; i++)
2152
tmp[i] = util_format_linear_to_srgb_float(tmp[i]);
2153
}
2154
2155
#define PACK_F(type) util_format_##type##_pack_rgba_float \
2156
( (uint8_t*) &clear_value[0], 0, tmp, 0, 1, 1)
2157
switch (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_X)) {
2158
case 4:
2159
PACK_F(r4g4b4a4_unorm);
2160
break;
2161
case 5:
2162
if (vk_format_get_component_bits(format, UTIL_FORMAT_COLORSPACE_RGB, PIPE_SWIZZLE_Y) == 6)
2163
PACK_F(r5g6b5_unorm);
2164
else
2165
PACK_F(r5g5b5a1_unorm);
2166
break;
2167
case 8:
2168
if (vk_format_is_snorm(format))
2169
PACK_F(r8g8b8a8_snorm);
2170
else if (vk_format_is_unorm(format))
2171
PACK_F(r8g8b8a8_unorm);
2172
else
2173
pack_int8(clear_value, val->color.uint32);
2174
break;
2175
case 10:
2176
if (vk_format_is_int(format))
2177
pack_int10_2(clear_value, val->color.uint32);
2178
else
2179
PACK_F(r10g10b10a2_unorm);
2180
break;
2181
case 11:
2182
clear_value[0] = float3_to_r11g11b10f(val->color.float32);
2183
break;
2184
case 16:
2185
if (vk_format_is_snorm(format))
2186
PACK_F(r16g16b16a16_snorm);
2187
else if (vk_format_is_unorm(format))
2188
PACK_F(r16g16b16a16_unorm);
2189
else if (vk_format_is_float(format))
2190
PACK_F(r16g16b16a16_float);
2191
else
2192
pack_int16(clear_value, val->color.uint32);
2193
break;
2194
case 32:
2195
memcpy(clear_value, val->color.float32, 4 * sizeof(float));
2196
break;
2197
default:
2198
unreachable("unexpected channel size");
2199
}
2200
#undef PACK_F
2201
}
2202
2203
static void
2204
clear_gmem_attachment(struct tu_cmd_buffer *cmd,
2205
struct tu_cs *cs,
2206
VkFormat format,
2207
uint8_t clear_mask,
2208
uint32_t gmem_offset,
2209
const VkClearValue *value)
2210
{
2211
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 1);
2212
tu_cs_emit(cs, A6XX_RB_BLIT_DST_INFO_COLOR_FORMAT(tu6_base_format(format)));
2213
2214
tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(.gmem = 1, .clear_mask = clear_mask));
2215
2216
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_BASE_GMEM, 1);
2217
tu_cs_emit(cs, gmem_offset);
2218
2219
tu_cs_emit_pkt4(cs, REG_A6XX_RB_UNKNOWN_88D0, 1);
2220
tu_cs_emit(cs, 0);
2221
2222
uint32_t clear_vals[4] = {};
2223
pack_gmem_clear_value(value, format, clear_vals);
2224
2225
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_CLEAR_COLOR_DW0, 4);
2226
tu_cs_emit_array(cs, clear_vals, 4);
2227
2228
tu6_emit_event_write(cmd, cs, BLIT);
2229
}
2230
2231
static void
2232
tu_emit_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
2233
struct tu_cs *cs,
2234
uint32_t attachment,
2235
VkImageAspectFlags mask,
2236
const VkClearValue *value)
2237
{
2238
const struct tu_render_pass_attachment *att =
2239
&cmd->state.pass->attachments[attachment];
2240
2241
if (att->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
2242
if (mask & VK_IMAGE_ASPECT_DEPTH_BIT)
2243
clear_gmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, 0xf, att->gmem_offset, value);
2244
if (mask & VK_IMAGE_ASPECT_STENCIL_BIT)
2245
clear_gmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, 0xf, att->gmem_offset_stencil, value);
2246
return;
2247
}
2248
2249
clear_gmem_attachment(cmd, cs, att->format, aspect_write_mask(att->format, mask), att->gmem_offset, value);
2250
}
2251
2252
static void
2253
tu_clear_gmem_attachments(struct tu_cmd_buffer *cmd,
2254
uint32_t attachment_count,
2255
const VkClearAttachment *attachments,
2256
uint32_t rect_count,
2257
const VkClearRect *rects)
2258
{
2259
const struct tu_subpass *subpass = cmd->state.subpass;
2260
struct tu_cs *cs = &cmd->draw_cs;
2261
2262
/* TODO: swap the loops for smaller cmdstream */
2263
for (unsigned i = 0; i < rect_count; i++) {
2264
unsigned x1 = rects[i].rect.offset.x;
2265
unsigned y1 = rects[i].rect.offset.y;
2266
unsigned x2 = x1 + rects[i].rect.extent.width - 1;
2267
unsigned y2 = y1 + rects[i].rect.extent.height - 1;
2268
2269
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_SCISSOR_TL, 2);
2270
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_TL_X(x1) | A6XX_RB_BLIT_SCISSOR_TL_Y(y1));
2271
tu_cs_emit(cs, A6XX_RB_BLIT_SCISSOR_BR_X(x2) | A6XX_RB_BLIT_SCISSOR_BR_Y(y2));
2272
2273
for (unsigned j = 0; j < attachment_count; j++) {
2274
uint32_t a;
2275
if (attachments[j].aspectMask & VK_IMAGE_ASPECT_COLOR_BIT)
2276
a = subpass->color_attachments[attachments[j].colorAttachment].attachment;
2277
else
2278
a = subpass->depth_stencil_attachment.attachment;
2279
2280
if (a == VK_ATTACHMENT_UNUSED)
2281
continue;
2282
2283
tu_emit_clear_gmem_attachment(cmd, cs, a, attachments[j].aspectMask,
2284
&attachments[j].clearValue);
2285
}
2286
}
2287
}
2288
2289
VKAPI_ATTR void VKAPI_CALL
2290
tu_CmdClearAttachments(VkCommandBuffer commandBuffer,
2291
uint32_t attachmentCount,
2292
const VkClearAttachment *pAttachments,
2293
uint32_t rectCount,
2294
const VkClearRect *pRects)
2295
{
2296
TU_FROM_HANDLE(tu_cmd_buffer, cmd, commandBuffer);
2297
struct tu_cs *cs = &cmd->draw_cs;
2298
2299
/* sysmem path behaves like a draw, note we don't have a way of using different
2300
* flushes for sysmem/gmem, so this needs to be outside of the cond_exec
2301
*/
2302
tu_emit_cache_flush_renderpass(cmd, cs);
2303
2304
for (uint32_t j = 0; j < attachmentCount; j++) {
2305
if ((pAttachments[j].aspectMask & VK_IMAGE_ASPECT_DEPTH_BIT) == 0)
2306
continue;
2307
cmd->state.lrz.valid = false;
2308
cmd->state.dirty |= TU_CMD_DIRTY_LRZ;
2309
}
2310
2311
/* vkCmdClearAttachments is supposed to respect the predicate if active.
2312
* The easiest way to do this is to always use the 3d path, which always
2313
* works even with GMEM because it's just a simple draw using the existing
2314
* attachment state. However it seems that IGNORE_VISIBILITY draws must be
2315
* skipped in the binning pass, since otherwise they produce binning data
2316
* which isn't consumed and leads to the wrong binning data being read, so
2317
* condition on GMEM | SYSMEM.
2318
*/
2319
if (cmd->state.predication_active) {
2320
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM |
2321
CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
2322
tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
2323
tu_cond_exec_end(cs);
2324
return;
2325
}
2326
2327
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_GMEM);
2328
tu_clear_gmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
2329
tu_cond_exec_end(cs);
2330
2331
tu_cond_exec_start(cs, CP_COND_EXEC_0_RENDER_MODE_SYSMEM);
2332
tu_clear_sysmem_attachments(cmd, attachmentCount, pAttachments, rectCount, pRects);
2333
tu_cond_exec_end(cs);
2334
}
2335
2336
static void
2337
clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
2338
struct tu_cs *cs,
2339
VkFormat format,
2340
VkImageAspectFlags clear_mask,
2341
const VkRenderPassBeginInfo *info,
2342
uint32_t a,
2343
bool separate_stencil)
2344
{
2345
const struct tu_framebuffer *fb = cmd->state.framebuffer;
2346
const struct tu_image_view *iview = fb->attachments[a].attachment;
2347
const uint32_t clear_views = cmd->state.pass->attachments[a].clear_views;
2348
const struct blit_ops *ops = &r2d_ops;
2349
if (cmd->state.pass->attachments[a].samples > 1)
2350
ops = &r3d_ops;
2351
2352
ops->setup(cmd, cs, format, clear_mask, 0, true, iview->ubwc_enabled);
2353
ops->coords(cs, &info->renderArea.offset, NULL, &info->renderArea.extent);
2354
ops->clear_value(cs, format, &info->pClearValues[a]);
2355
2356
for_each_layer(i, clear_views, fb->layers) {
2357
if (separate_stencil) {
2358
if (ops == &r3d_ops)
2359
r3d_dst_stencil(cs, iview, i);
2360
else
2361
r2d_dst_stencil(cs, iview, i);
2362
} else {
2363
ops->dst(cs, iview, i);
2364
}
2365
ops->run(cmd, cs);
2366
}
2367
2368
ops->teardown(cmd, cs);
2369
}
2370
2371
void
2372
tu_clear_sysmem_attachment(struct tu_cmd_buffer *cmd,
2373
struct tu_cs *cs,
2374
uint32_t a,
2375
const VkRenderPassBeginInfo *info)
2376
{
2377
const struct tu_render_pass_attachment *attachment =
2378
&cmd->state.pass->attachments[a];
2379
2380
if (!attachment->clear_mask)
2381
return;
2382
2383
/* Wait for any flushes at the beginning of the renderpass to complete */
2384
tu_cs_emit_wfi(cs);
2385
2386
if (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT) {
2387
if (attachment->clear_mask & VK_IMAGE_ASPECT_DEPTH_BIT) {
2388
clear_sysmem_attachment(cmd, cs, VK_FORMAT_D32_SFLOAT, VK_IMAGE_ASPECT_COLOR_BIT,
2389
info, a, false);
2390
}
2391
if (attachment->clear_mask & VK_IMAGE_ASPECT_STENCIL_BIT) {
2392
clear_sysmem_attachment(cmd, cs, VK_FORMAT_S8_UINT, VK_IMAGE_ASPECT_COLOR_BIT,
2393
info, a, true);
2394
}
2395
} else {
2396
clear_sysmem_attachment(cmd, cs, attachment->format, attachment->clear_mask,
2397
info, a, false);
2398
}
2399
2400
/* The spec doesn't explicitly say, but presumably the initial renderpass
2401
* clear is considered part of the renderpass, and therefore barriers
2402
* aren't required inside the subpass/renderpass. Therefore we need to
2403
* flush CCU color into CCU depth here, just like with
2404
* vkCmdClearAttachments(). Note that because this only happens at the
2405
* beginning of a renderpass, and renderpass writes are considered
2406
* "incoherent", we shouldn't have to worry about syncing depth into color
2407
* beforehand as depth should already be flushed.
2408
*/
2409
if (vk_format_is_depth_or_stencil(attachment->format)) {
2410
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
2411
tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_DEPTH);
2412
} else {
2413
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
2414
tu6_emit_event_write(cmd, cs, PC_CCU_INVALIDATE_COLOR);
2415
}
2416
}
2417
2418
void
2419
tu_clear_gmem_attachment(struct tu_cmd_buffer *cmd,
2420
struct tu_cs *cs,
2421
uint32_t a,
2422
const VkRenderPassBeginInfo *info)
2423
{
2424
const struct tu_render_pass_attachment *attachment =
2425
&cmd->state.pass->attachments[a];
2426
2427
if (!attachment->clear_mask)
2428
return;
2429
2430
tu_cs_emit_regs(cs, A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
2431
2432
tu_emit_clear_gmem_attachment(cmd, cs, a, attachment->clear_mask,
2433
&info->pClearValues[a]);
2434
}
2435
2436
static void
2437
tu_emit_blit(struct tu_cmd_buffer *cmd,
2438
struct tu_cs *cs,
2439
const struct tu_image_view *iview,
2440
const struct tu_render_pass_attachment *attachment,
2441
bool resolve,
2442
bool separate_stencil)
2443
{
2444
tu_cs_emit_regs(cs,
2445
A6XX_RB_MSAA_CNTL(tu_msaa_samples(attachment->samples)));
2446
2447
tu_cs_emit_regs(cs, A6XX_RB_BLIT_INFO(
2448
.unk0 = !resolve,
2449
.gmem = !resolve,
2450
.sample_0 = vk_format_is_int(attachment->format) |
2451
vk_format_is_depth_or_stencil(attachment->format)));
2452
2453
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_DST_INFO, 4);
2454
if (separate_stencil) {
2455
tu_cs_emit(cs, tu_image_view_stencil(iview, RB_BLIT_DST_INFO) & ~A6XX_RB_BLIT_DST_INFO_FLAGS);
2456
tu_cs_emit_qw(cs, iview->stencil_base_addr);
2457
tu_cs_emit(cs, iview->stencil_PITCH);
2458
2459
tu_cs_emit_regs(cs,
2460
A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset_stencil));
2461
} else {
2462
tu_cs_emit(cs, iview->RB_BLIT_DST_INFO);
2463
tu_cs_image_ref_2d(cs, iview, 0, false);
2464
2465
tu_cs_emit_pkt4(cs, REG_A6XX_RB_BLIT_FLAG_DST, 3);
2466
tu_cs_image_flag_ref(cs, iview, 0);
2467
2468
tu_cs_emit_regs(cs,
2469
A6XX_RB_BLIT_BASE_GMEM(attachment->gmem_offset));
2470
}
2471
2472
tu6_emit_event_write(cmd, cs, BLIT);
2473
}
2474
2475
static bool
2476
blit_can_resolve(VkFormat format)
2477
{
2478
const struct util_format_description *desc = vk_format_description(format);
2479
2480
/* blit event can only do resolve for simple cases:
2481
* averaging samples as unsigned integers or choosing only one sample
2482
*/
2483
if (vk_format_is_snorm(format) || vk_format_is_srgb(format))
2484
return false;
2485
2486
/* can't do formats with larger channel sizes
2487
* note: this includes all float formats
2488
* note2: single channel integer formats seem OK
2489
*/
2490
if (desc->channel[0].size > 10)
2491
return false;
2492
2493
switch (format) {
2494
/* for unknown reasons blit event can't msaa resolve these formats when tiled
2495
* likely related to these formats having different layout from other cpp=2 formats
2496
*/
2497
case VK_FORMAT_R8G8_UNORM:
2498
case VK_FORMAT_R8G8_UINT:
2499
case VK_FORMAT_R8G8_SINT:
2500
/* TODO: this one should be able to work? */
2501
case VK_FORMAT_D24_UNORM_S8_UINT:
2502
return false;
2503
default:
2504
break;
2505
}
2506
2507
return true;
2508
}
2509
2510
void
2511
tu_load_gmem_attachment(struct tu_cmd_buffer *cmd,
2512
struct tu_cs *cs,
2513
uint32_t a,
2514
bool force_load)
2515
{
2516
const struct tu_image_view *iview =
2517
cmd->state.framebuffer->attachments[a].attachment;
2518
const struct tu_render_pass_attachment *attachment =
2519
&cmd->state.pass->attachments[a];
2520
2521
if (attachment->load || force_load)
2522
tu_emit_blit(cmd, cs, iview, attachment, false, false);
2523
2524
if (attachment->load_stencil || (attachment->format == VK_FORMAT_D32_SFLOAT_S8_UINT && force_load))
2525
tu_emit_blit(cmd, cs, iview, attachment, false, true);
2526
}
2527
2528
static void
2529
store_cp_blit(struct tu_cmd_buffer *cmd,
2530
struct tu_cs *cs,
2531
struct tu_image_view *iview,
2532
uint32_t samples,
2533
bool separate_stencil,
2534
VkFormat format,
2535
uint32_t gmem_offset,
2536
uint32_t cpp)
2537
{
2538
r2d_setup_common(cmd, cs, format, VK_IMAGE_ASPECT_COLOR_BIT, 0, false,
2539
iview->ubwc_enabled, true);
2540
if (separate_stencil)
2541
r2d_dst_stencil(cs, iview, 0);
2542
else
2543
r2d_dst(cs, iview, 0);
2544
2545
tu_cs_emit_regs(cs,
2546
A6XX_SP_PS_2D_SRC_INFO(
2547
.color_format = tu6_format_texture(format, TILE6_2).fmt,
2548
.tile_mode = TILE6_2,
2549
.srgb = vk_format_is_srgb(format),
2550
.samples = tu_msaa_samples(samples),
2551
.samples_average = !vk_format_is_int(format) &&
2552
!vk_format_is_depth_or_stencil(format),
2553
.unk20 = 1,
2554
.unk22 = 1),
2555
/* note: src size does not matter when not scaling */
2556
A6XX_SP_PS_2D_SRC_SIZE( .width = 0x3fff, .height = 0x3fff),
2557
A6XX_SP_PS_2D_SRC(.qword = cmd->device->physical_device->gmem_base + gmem_offset),
2558
A6XX_SP_PS_2D_SRC_PITCH(.pitch = cmd->state.framebuffer->tile0.width * cpp));
2559
2560
/* sync GMEM writes with CACHE. */
2561
tu6_emit_event_write(cmd, cs, CACHE_INVALIDATE);
2562
2563
/* Wait for CACHE_INVALIDATE to land */
2564
tu_cs_emit_wfi(cs);
2565
2566
tu_cs_emit_pkt7(cs, CP_BLIT, 1);
2567
tu_cs_emit(cs, CP_BLIT_0_OP(BLIT_OP_SCALE));
2568
2569
/* CP_BLIT writes to the CCU, unlike CP_EVENT_WRITE::BLIT which writes to
2570
* sysmem, and we generally assume that GMEM renderpasses leave their
2571
* results in sysmem, so we need to flush manually here.
2572
*/
2573
tu6_emit_event_write(cmd, cs, PC_CCU_FLUSH_COLOR_TS);
2574
}
2575
2576
void
2577
tu_store_gmem_attachment(struct tu_cmd_buffer *cmd,
2578
struct tu_cs *cs,
2579
uint32_t a,
2580
uint32_t gmem_a)
2581
{
2582
struct tu_physical_device *phys_dev = cmd->device->physical_device;
2583
const VkRect2D *render_area = &cmd->state.render_area;
2584
struct tu_render_pass_attachment *dst = &cmd->state.pass->attachments[a];
2585
struct tu_image_view *iview = cmd->state.framebuffer->attachments[a].attachment;
2586
struct tu_render_pass_attachment *src = &cmd->state.pass->attachments[gmem_a];
2587
2588
if (!dst->store && !dst->store_stencil)
2589
return;
2590
2591
uint32_t x1 = render_area->offset.x;
2592
uint32_t y1 = render_area->offset.y;
2593
uint32_t x2 = x1 + render_area->extent.width;
2594
uint32_t y2 = y1 + render_area->extent.height;
2595
/* x2/y2 can be unaligned if equal to the size of the image,
2596
* since it will write into padding space
2597
* the one exception is linear levels which don't have the
2598
* required y padding in the layout (except for the last level)
2599
*/
2600
bool need_y2_align =
2601
y2 != iview->extent.height || iview->need_y2_align;
2602
2603
bool unaligned =
2604
x1 % phys_dev->info->gmem_align_w ||
2605
(x2 % phys_dev->info->gmem_align_w && x2 != iview->extent.width) ||
2606
y1 % phys_dev->info->gmem_align_h || (y2 % phys_dev->info->gmem_align_h && need_y2_align);
2607
2608
/* D32_SFLOAT_S8_UINT is quite special format: it has two planes,
2609
* one for depth and other for stencil. When resolving a MSAA
2610
* D32_SFLOAT_S8_UINT to S8_UINT, we need to take that into account.
2611
*/
2612
bool resolve_d32s8_s8 =
2613
src->format == VK_FORMAT_D32_SFLOAT_S8_UINT &&
2614
dst->format == VK_FORMAT_S8_UINT;
2615
2616
/* use fast path when render area is aligned, except for unsupported resolve cases */
2617
if (!unaligned && (a == gmem_a || blit_can_resolve(dst->format))) {
2618
if (dst->store)
2619
tu_emit_blit(cmd, cs, iview, src, true, resolve_d32s8_s8);
2620
if (dst->store_stencil)
2621
tu_emit_blit(cmd, cs, iview, src, true, true);
2622
return;
2623
}
2624
2625
if (dst->samples > 1) {
2626
/* I guess we need to use shader path in this case?
2627
* need a testcase which fails because of this
2628
*/
2629
tu_finishme("unaligned store of msaa attachment\n");
2630
return;
2631
}
2632
2633
r2d_coords(cs, &render_area->offset, &render_area->offset, &render_area->extent);
2634
2635
VkFormat format = src->format;
2636
if (format == VK_FORMAT_D32_SFLOAT_S8_UINT)
2637
format = VK_FORMAT_D32_SFLOAT;
2638
2639
if (dst->store) {
2640
store_cp_blit(cmd, cs, iview, src->samples, resolve_d32s8_s8, format,
2641
src->gmem_offset, src->cpp);
2642
}
2643
if (dst->store_stencil) {
2644
store_cp_blit(cmd, cs, iview, src->samples, true, VK_FORMAT_S8_UINT,
2645
src->gmem_offset_stencil, src->samples);
2646
}
2647
}
2648
2649