Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/vulkan/radv_meta_fast_clear.c
7237 views
1
/*
2
* Copyright © 2016 Intel Corporation
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <assert.h>
25
#include <stdbool.h>
26
27
#include "radv_meta.h"
28
#include "radv_private.h"
29
#include "sid.h"
30
31
static nir_shader *
32
build_dcc_decompress_compute_shader(struct radv_device *dev)
33
{
34
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
35
36
nir_builder b =
37
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "dcc_decompress_compute");
38
39
/* We need at least 16/16/1 to cover an entire DCC block in a single workgroup. */
40
b.shader->info.workgroup_size[0] = 16;
41
b.shader->info.workgroup_size[1] = 16;
42
b.shader->info.workgroup_size[2] = 1;
43
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "in_img");
44
input_img->data.descriptor_set = 0;
45
input_img->data.binding = 0;
46
47
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
48
output_img->data.descriptor_set = 0;
49
output_img->data.binding = 1;
50
51
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
52
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
53
nir_ssa_def *block_size =
54
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
55
b.shader->info.workgroup_size[2], 0);
56
57
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
58
59
nir_ssa_def *data =
60
nir_image_deref_load(&b, 4, 32, &nir_build_deref_var(&b, input_img)->dest.ssa, global_id,
61
nir_ssa_undef(&b, 1, 32), nir_imm_int(&b, 0));
62
63
/* We need a NIR_SCOPE_DEVICE memory_scope because ACO will avoid
64
* creating a vmcnt(0) because it expects the L1 cache to keep memory
65
* operations in-order for the same workgroup. The vmcnt(0) seems
66
* necessary however. */
67
nir_scoped_barrier(&b, .execution_scope = NIR_SCOPE_WORKGROUP, .memory_scope = NIR_SCOPE_DEVICE,
68
.memory_semantics = NIR_MEMORY_ACQ_REL, .memory_modes = nir_var_mem_ssbo);
69
70
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
71
nir_ssa_undef(&b, 1, 32), data, nir_imm_int(&b, 0));
72
return b.shader;
73
}
74
75
static VkResult
76
create_dcc_compress_compute(struct radv_device *device)
77
{
78
VkResult result = VK_SUCCESS;
79
nir_shader *cs = build_dcc_decompress_compute_shader(device);
80
81
VkDescriptorSetLayoutCreateInfo ds_create_info = {
82
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
83
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
84
.bindingCount = 2,
85
.pBindings = (VkDescriptorSetLayoutBinding[]){
86
{.binding = 0,
87
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
88
.descriptorCount = 1,
89
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
90
.pImmutableSamplers = NULL},
91
{.binding = 1,
92
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
93
.descriptorCount = 1,
94
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
95
.pImmutableSamplers = NULL},
96
}};
97
98
result = radv_CreateDescriptorSetLayout(
99
radv_device_to_handle(device), &ds_create_info, &device->meta_state.alloc,
100
&device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout);
101
if (result != VK_SUCCESS)
102
goto cleanup;
103
104
VkPipelineLayoutCreateInfo pl_create_info = {
105
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
106
.setLayoutCount = 1,
107
.pSetLayouts = &device->meta_state.fast_clear_flush.dcc_decompress_compute_ds_layout,
108
.pushConstantRangeCount = 0,
109
.pPushConstantRanges = NULL,
110
};
111
112
result = radv_CreatePipelineLayout(
113
radv_device_to_handle(device), &pl_create_info, &device->meta_state.alloc,
114
&device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout);
115
if (result != VK_SUCCESS)
116
goto cleanup;
117
118
/* compute shader */
119
120
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
121
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
122
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
123
.module = vk_shader_module_handle_from_nir(cs),
124
.pName = "main",
125
.pSpecializationInfo = NULL,
126
};
127
128
VkComputePipelineCreateInfo vk_pipeline_info = {
129
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
130
.stage = pipeline_shader_stage,
131
.flags = 0,
132
.layout = device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout,
133
};
134
135
result = radv_CreateComputePipelines(
136
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
137
&vk_pipeline_info, NULL,
138
&device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
139
if (result != VK_SUCCESS)
140
goto cleanup;
141
142
cleanup:
143
ralloc_free(cs);
144
return result;
145
}
146
147
static VkResult
148
create_pass(struct radv_device *device)
149
{
150
VkResult result;
151
VkDevice device_h = radv_device_to_handle(device);
152
const VkAllocationCallbacks *alloc = &device->meta_state.alloc;
153
VkAttachmentDescription2 attachment;
154
155
attachment.sType = VK_STRUCTURE_TYPE_ATTACHMENT_DESCRIPTION_2;
156
attachment.pNext = NULL;
157
attachment.format = VK_FORMAT_UNDEFINED;
158
attachment.samples = 1;
159
attachment.loadOp = VK_ATTACHMENT_LOAD_OP_LOAD;
160
attachment.storeOp = VK_ATTACHMENT_STORE_OP_STORE;
161
attachment.initialLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
162
attachment.finalLayout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL;
163
164
result = radv_CreateRenderPass2(
165
device_h,
166
&(VkRenderPassCreateInfo2){
167
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO_2,
168
.attachmentCount = 1,
169
.pAttachments = &attachment,
170
.subpassCount = 1,
171
.pSubpasses =
172
&(VkSubpassDescription2){
173
.sType = VK_STRUCTURE_TYPE_SUBPASS_DESCRIPTION_2,
174
.pipelineBindPoint = VK_PIPELINE_BIND_POINT_GRAPHICS,
175
.inputAttachmentCount = 0,
176
.colorAttachmentCount = 1,
177
.pColorAttachments =
178
(VkAttachmentReference2[]){
179
{
180
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
181
.attachment = 0,
182
.layout = VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
183
},
184
},
185
.pResolveAttachments = NULL,
186
.pDepthStencilAttachment =
187
&(VkAttachmentReference2){
188
.sType = VK_STRUCTURE_TYPE_ATTACHMENT_REFERENCE_2,
189
.attachment = VK_ATTACHMENT_UNUSED,
190
},
191
.preserveAttachmentCount = 0,
192
.pPreserveAttachments = NULL,
193
},
194
.dependencyCount = 2,
195
.pDependencies =
196
(VkSubpassDependency2[]){{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
197
.srcSubpass = VK_SUBPASS_EXTERNAL,
198
.dstSubpass = 0,
199
.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
200
.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
201
.srcAccessMask = 0,
202
.dstAccessMask = 0,
203
.dependencyFlags = 0},
204
{.sType = VK_STRUCTURE_TYPE_SUBPASS_DEPENDENCY_2,
205
.srcSubpass = 0,
206
.dstSubpass = VK_SUBPASS_EXTERNAL,
207
.srcStageMask = VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT,
208
.dstStageMask = VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
209
.srcAccessMask = 0,
210
.dstAccessMask = 0,
211
.dependencyFlags = 0}},
212
},
213
alloc, &device->meta_state.fast_clear_flush.pass);
214
215
return result;
216
}
217
218
static VkResult
219
create_pipeline_layout(struct radv_device *device, VkPipelineLayout *layout)
220
{
221
VkPipelineLayoutCreateInfo pl_create_info = {
222
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
223
.setLayoutCount = 0,
224
.pSetLayouts = NULL,
225
.pushConstantRangeCount = 0,
226
.pPushConstantRanges = NULL,
227
};
228
229
return radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
230
&device->meta_state.alloc, layout);
231
}
232
233
static VkResult
234
create_pipeline(struct radv_device *device, VkShaderModule vs_module_h, VkPipelineLayout layout)
235
{
236
VkResult result;
237
VkDevice device_h = radv_device_to_handle(device);
238
239
nir_shader *fs_module = radv_meta_build_nir_fs_noop();
240
241
if (!fs_module) {
242
/* XXX: Need more accurate error */
243
result = VK_ERROR_OUT_OF_HOST_MEMORY;
244
goto cleanup;
245
}
246
247
const VkPipelineShaderStageCreateInfo stages[2] = {
248
{
249
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
250
.stage = VK_SHADER_STAGE_VERTEX_BIT,
251
.module = vs_module_h,
252
.pName = "main",
253
},
254
{
255
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
256
.stage = VK_SHADER_STAGE_FRAGMENT_BIT,
257
.module = vk_shader_module_handle_from_nir(fs_module),
258
.pName = "main",
259
},
260
};
261
262
const VkPipelineVertexInputStateCreateInfo vi_state = {
263
.sType = VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,
264
.vertexBindingDescriptionCount = 0,
265
.vertexAttributeDescriptionCount = 0,
266
};
267
268
const VkPipelineInputAssemblyStateCreateInfo ia_state = {
269
.sType = VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO,
270
.topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
271
.primitiveRestartEnable = false,
272
};
273
274
const VkPipelineColorBlendStateCreateInfo blend_state = {
275
.sType = VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO,
276
.logicOpEnable = false,
277
.attachmentCount = 1,
278
.pAttachments = (VkPipelineColorBlendAttachmentState[]){
279
{
280
.colorWriteMask = VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT |
281
VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT,
282
},
283
}};
284
const VkPipelineRasterizationStateCreateInfo rs_state = {
285
.sType = VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO,
286
.depthClampEnable = false,
287
.rasterizerDiscardEnable = false,
288
.polygonMode = VK_POLYGON_MODE_FILL,
289
.cullMode = VK_CULL_MODE_NONE,
290
.frontFace = VK_FRONT_FACE_COUNTER_CLOCKWISE,
291
};
292
293
result = radv_graphics_pipeline_create(
294
device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
295
&(VkGraphicsPipelineCreateInfo){
296
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
297
.stageCount = 2,
298
.pStages = stages,
299
300
.pVertexInputState = &vi_state,
301
.pInputAssemblyState = &ia_state,
302
303
.pViewportState =
304
&(VkPipelineViewportStateCreateInfo){
305
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
306
.viewportCount = 1,
307
.scissorCount = 1,
308
},
309
.pRasterizationState = &rs_state,
310
.pMultisampleState =
311
&(VkPipelineMultisampleStateCreateInfo){
312
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
313
.rasterizationSamples = 1,
314
.sampleShadingEnable = false,
315
.pSampleMask = NULL,
316
.alphaToCoverageEnable = false,
317
.alphaToOneEnable = false,
318
},
319
.pColorBlendState = &blend_state,
320
.pDynamicState =
321
&(VkPipelineDynamicStateCreateInfo){
322
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
323
.dynamicStateCount = 2,
324
.pDynamicStates =
325
(VkDynamicState[]){
326
VK_DYNAMIC_STATE_VIEWPORT,
327
VK_DYNAMIC_STATE_SCISSOR,
328
},
329
},
330
.layout = layout,
331
.renderPass = device->meta_state.fast_clear_flush.pass,
332
.subpass = 0,
333
},
334
&(struct radv_graphics_pipeline_create_info){
335
.use_rectlist = true,
336
.custom_blend_mode = V_028808_CB_ELIMINATE_FAST_CLEAR,
337
},
338
&device->meta_state.alloc, &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline);
339
if (result != VK_SUCCESS)
340
goto cleanup;
341
342
result = radv_graphics_pipeline_create(
343
device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
344
&(VkGraphicsPipelineCreateInfo){
345
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
346
.stageCount = 2,
347
.pStages = stages,
348
349
.pVertexInputState = &vi_state,
350
.pInputAssemblyState = &ia_state,
351
352
.pViewportState =
353
&(VkPipelineViewportStateCreateInfo){
354
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
355
.viewportCount = 1,
356
.scissorCount = 1,
357
},
358
.pRasterizationState = &rs_state,
359
.pMultisampleState =
360
&(VkPipelineMultisampleStateCreateInfo){
361
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
362
.rasterizationSamples = 1,
363
.sampleShadingEnable = false,
364
.pSampleMask = NULL,
365
.alphaToCoverageEnable = false,
366
.alphaToOneEnable = false,
367
},
368
.pColorBlendState = &blend_state,
369
.pDynamicState =
370
&(VkPipelineDynamicStateCreateInfo){
371
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
372
.dynamicStateCount = 2,
373
.pDynamicStates =
374
(VkDynamicState[]){
375
VK_DYNAMIC_STATE_VIEWPORT,
376
VK_DYNAMIC_STATE_SCISSOR,
377
},
378
},
379
.layout = layout,
380
.renderPass = device->meta_state.fast_clear_flush.pass,
381
.subpass = 0,
382
},
383
&(struct radv_graphics_pipeline_create_info){
384
.use_rectlist = true,
385
.custom_blend_mode = V_028808_CB_FMASK_DECOMPRESS,
386
},
387
&device->meta_state.alloc, &device->meta_state.fast_clear_flush.fmask_decompress_pipeline);
388
if (result != VK_SUCCESS)
389
goto cleanup;
390
391
result = radv_graphics_pipeline_create(
392
device_h, radv_pipeline_cache_to_handle(&device->meta_state.cache),
393
&(VkGraphicsPipelineCreateInfo){
394
.sType = VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO,
395
.stageCount = 2,
396
.pStages = stages,
397
398
.pVertexInputState = &vi_state,
399
.pInputAssemblyState = &ia_state,
400
401
.pViewportState =
402
&(VkPipelineViewportStateCreateInfo){
403
.sType = VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO,
404
.viewportCount = 1,
405
.scissorCount = 1,
406
},
407
.pRasterizationState = &rs_state,
408
.pMultisampleState =
409
&(VkPipelineMultisampleStateCreateInfo){
410
.sType = VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO,
411
.rasterizationSamples = 1,
412
.sampleShadingEnable = false,
413
.pSampleMask = NULL,
414
.alphaToCoverageEnable = false,
415
.alphaToOneEnable = false,
416
},
417
.pColorBlendState = &blend_state,
418
.pDynamicState =
419
&(VkPipelineDynamicStateCreateInfo){
420
.sType = VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO,
421
.dynamicStateCount = 2,
422
.pDynamicStates =
423
(VkDynamicState[]){
424
VK_DYNAMIC_STATE_VIEWPORT,
425
VK_DYNAMIC_STATE_SCISSOR,
426
},
427
},
428
.layout = layout,
429
.renderPass = device->meta_state.fast_clear_flush.pass,
430
.subpass = 0,
431
},
432
&(struct radv_graphics_pipeline_create_info){
433
.use_rectlist = true,
434
.custom_blend_mode = V_028808_CB_DCC_DECOMPRESS,
435
},
436
&device->meta_state.alloc, &device->meta_state.fast_clear_flush.dcc_decompress_pipeline);
437
if (result != VK_SUCCESS)
438
goto cleanup;
439
440
goto cleanup;
441
442
cleanup:
443
ralloc_free(fs_module);
444
return result;
445
}
446
447
void
448
radv_device_finish_meta_fast_clear_flush_state(struct radv_device *device)
449
{
450
struct radv_meta_state *state = &device->meta_state;
451
452
radv_DestroyPipeline(radv_device_to_handle(device),
453
state->fast_clear_flush.dcc_decompress_pipeline, &state->alloc);
454
radv_DestroyPipeline(radv_device_to_handle(device),
455
state->fast_clear_flush.fmask_decompress_pipeline, &state->alloc);
456
radv_DestroyPipeline(radv_device_to_handle(device),
457
state->fast_clear_flush.cmask_eliminate_pipeline, &state->alloc);
458
radv_DestroyRenderPass(radv_device_to_handle(device), state->fast_clear_flush.pass,
459
&state->alloc);
460
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->fast_clear_flush.p_layout,
461
&state->alloc);
462
463
radv_DestroyPipeline(radv_device_to_handle(device),
464
state->fast_clear_flush.dcc_decompress_compute_pipeline, &state->alloc);
465
radv_DestroyPipelineLayout(radv_device_to_handle(device),
466
state->fast_clear_flush.dcc_decompress_compute_p_layout,
467
&state->alloc);
468
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
469
state->fast_clear_flush.dcc_decompress_compute_ds_layout,
470
&state->alloc);
471
}
472
473
static VkResult
474
radv_device_init_meta_fast_clear_flush_state_internal(struct radv_device *device)
475
{
476
VkResult res = VK_SUCCESS;
477
478
mtx_lock(&device->meta_state.mtx);
479
if (device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
480
mtx_unlock(&device->meta_state.mtx);
481
return VK_SUCCESS;
482
}
483
484
nir_shader *vs_module = radv_meta_build_nir_vs_generate_vertices();
485
if (!vs_module) {
486
/* XXX: Need more accurate error */
487
res = VK_ERROR_OUT_OF_HOST_MEMORY;
488
goto fail;
489
}
490
491
res = create_pass(device);
492
if (res != VK_SUCCESS)
493
goto fail;
494
495
res = create_pipeline_layout(device, &device->meta_state.fast_clear_flush.p_layout);
496
if (res != VK_SUCCESS)
497
goto fail;
498
499
VkShaderModule vs_module_h = vk_shader_module_handle_from_nir(vs_module);
500
res = create_pipeline(device, vs_module_h, device->meta_state.fast_clear_flush.p_layout);
501
if (res != VK_SUCCESS)
502
goto fail;
503
504
res = create_dcc_compress_compute(device);
505
if (res != VK_SUCCESS)
506
goto fail;
507
508
goto cleanup;
509
510
fail:
511
radv_device_finish_meta_fast_clear_flush_state(device);
512
513
cleanup:
514
ralloc_free(vs_module);
515
mtx_unlock(&device->meta_state.mtx);
516
517
return res;
518
}
519
520
VkResult
521
radv_device_init_meta_fast_clear_flush_state(struct radv_device *device, bool on_demand)
522
{
523
if (on_demand)
524
return VK_SUCCESS;
525
526
return radv_device_init_meta_fast_clear_flush_state_internal(device);
527
}
528
529
static void
530
radv_emit_set_predication_state_from_image(struct radv_cmd_buffer *cmd_buffer,
531
struct radv_image *image, uint64_t pred_offset,
532
bool value)
533
{
534
uint64_t va = 0;
535
536
if (value) {
537
va = radv_buffer_get_va(image->bo) + image->offset;
538
va += pred_offset;
539
}
540
541
si_emit_set_predication_state(cmd_buffer, true, PREDICATION_OP_BOOL64, va);
542
}
543
544
static void
545
radv_process_color_image_layer(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
546
const VkImageSubresourceRange *range, int level, int layer,
547
bool flush_cb)
548
{
549
struct radv_device *device = cmd_buffer->device;
550
struct radv_image_view iview;
551
uint32_t width, height;
552
553
width = radv_minify(image->info.width, range->baseMipLevel + level);
554
height = radv_minify(image->info.height, range->baseMipLevel + level);
555
556
radv_image_view_init(&iview, device,
557
&(VkImageViewCreateInfo){
558
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
559
.image = radv_image_to_handle(image),
560
.viewType = radv_meta_get_view_type(image),
561
.format = image->vk_format,
562
.subresourceRange =
563
{
564
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
565
.baseMipLevel = range->baseMipLevel + level,
566
.levelCount = 1,
567
.baseArrayLayer = range->baseArrayLayer + layer,
568
.layerCount = 1,
569
},
570
},
571
NULL);
572
573
VkFramebuffer fb_h;
574
radv_CreateFramebuffer(
575
radv_device_to_handle(device),
576
&(VkFramebufferCreateInfo){.sType = VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO,
577
.attachmentCount = 1,
578
.pAttachments = (VkImageView[]){radv_image_view_to_handle(&iview)},
579
.width = width,
580
.height = height,
581
.layers = 1},
582
&cmd_buffer->pool->alloc, &fb_h);
583
584
radv_cmd_buffer_begin_render_pass(cmd_buffer,
585
&(VkRenderPassBeginInfo){
586
.sType = VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO,
587
.renderPass = device->meta_state.fast_clear_flush.pass,
588
.framebuffer = fb_h,
589
.renderArea = {.offset =
590
{
591
0,
592
0,
593
},
594
.extent =
595
{
596
width,
597
height,
598
}},
599
.clearValueCount = 0,
600
.pClearValues = NULL,
601
},
602
NULL);
603
604
radv_cmd_buffer_set_subpass(cmd_buffer, &cmd_buffer->state.pass->subpasses[0]);
605
606
if (flush_cb)
607
cmd_buffer->state.flush_bits |=
608
radv_dst_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
609
610
radv_CmdDraw(radv_cmd_buffer_to_handle(cmd_buffer), 3, 1, 0, 0);
611
612
if (flush_cb)
613
cmd_buffer->state.flush_bits |=
614
radv_src_access_flush(cmd_buffer, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, image);
615
616
radv_cmd_buffer_end_render_pass(cmd_buffer);
617
618
radv_DestroyFramebuffer(radv_device_to_handle(device), fb_h, &cmd_buffer->pool->alloc);
619
}
620
621
static void
622
radv_process_color_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
623
const VkImageSubresourceRange *subresourceRange, bool decompress_dcc)
624
{
625
struct radv_device *device = cmd_buffer->device;
626
struct radv_meta_saved_state saved_state;
627
bool flush_cb = false;
628
VkPipeline *pipeline;
629
630
if (decompress_dcc) {
631
pipeline = &device->meta_state.fast_clear_flush.dcc_decompress_pipeline;
632
} else if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
633
pipeline = &device->meta_state.fast_clear_flush.fmask_decompress_pipeline;
634
} else {
635
pipeline = &device->meta_state.fast_clear_flush.cmask_eliminate_pipeline;
636
}
637
638
if (!*pipeline) {
639
VkResult ret;
640
641
ret = radv_device_init_meta_fast_clear_flush_state_internal(device);
642
if (ret != VK_SUCCESS) {
643
cmd_buffer->record_result = ret;
644
return;
645
}
646
}
647
648
if (pipeline == &device->meta_state.fast_clear_flush.dcc_decompress_pipeline ||
649
pipeline == &device->meta_state.fast_clear_flush.fmask_decompress_pipeline) {
650
/* Flushing CB is required before and after DCC_DECOMPRESS or
651
* FMASK_DECOMPRESS.
652
*/
653
flush_cb = true;
654
}
655
656
radv_meta_save(&saved_state, cmd_buffer, RADV_META_SAVE_GRAPHICS_PIPELINE | RADV_META_SAVE_PASS);
657
658
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_GRAPHICS,
659
*pipeline);
660
661
for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); ++l) {
662
uint32_t width, height;
663
664
/* Do not decompress levels without DCC. */
665
if (decompress_dcc && !radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
666
continue;
667
668
width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
669
height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
670
671
radv_CmdSetViewport(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
672
&(VkViewport){.x = 0,
673
.y = 0,
674
.width = width,
675
.height = height,
676
.minDepth = 0.0f,
677
.maxDepth = 1.0f});
678
679
radv_CmdSetScissor(radv_cmd_buffer_to_handle(cmd_buffer), 0, 1,
680
&(VkRect2D){
681
.offset = {0, 0},
682
.extent = {width, height},
683
});
684
685
for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
686
radv_process_color_image_layer(cmd_buffer, image, subresourceRange, l, s, flush_cb);
687
}
688
}
689
690
cmd_buffer->state.flush_bits |=
691
RADV_CMD_FLAG_FLUSH_AND_INV_CB | RADV_CMD_FLAG_FLUSH_AND_INV_CB_META;
692
693
radv_meta_restore(&saved_state, cmd_buffer);
694
}
695
696
static void
697
radv_emit_color_decompress(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
698
const VkImageSubresourceRange *subresourceRange, bool decompress_dcc)
699
{
700
bool use_predication = false;
701
bool old_predicating = false;
702
703
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
704
705
if (decompress_dcc ||
706
(!(radv_image_has_fmask(image) && !image->tc_compatible_cmask) && image->fce_pred_offset)) {
707
use_predication = true;
708
}
709
710
/* If we are asked for DCC decompression without DCC predicates we cannot
711
* use the FCE predicate. */
712
if (decompress_dcc && image->dcc_pred_offset == 0)
713
use_predication = false;
714
715
if (radv_dcc_enabled(image, subresourceRange->baseMipLevel) &&
716
(image->info.array_size != radv_get_layerCount(image, subresourceRange) ||
717
subresourceRange->baseArrayLayer != 0)) {
718
/* Only use predication if the image has DCC with mipmaps or
719
* if the range of layers covers the whole image because the
720
* predication is based on mip level.
721
*/
722
use_predication = false;
723
}
724
725
if (use_predication) {
726
uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : image->fce_pred_offset;
727
pred_offset += 8 * subresourceRange->baseMipLevel;
728
729
old_predicating = cmd_buffer->state.predicating;
730
731
radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, true);
732
cmd_buffer->state.predicating = true;
733
}
734
735
radv_process_color_image(cmd_buffer, image, subresourceRange, decompress_dcc);
736
737
if (use_predication) {
738
uint64_t pred_offset = decompress_dcc ? image->dcc_pred_offset : image->fce_pred_offset;
739
pred_offset += 8 * subresourceRange->baseMipLevel;
740
741
cmd_buffer->state.predicating = old_predicating;
742
743
radv_emit_set_predication_state_from_image(cmd_buffer, image, pred_offset, false);
744
745
if (cmd_buffer->state.predication_type != -1) {
746
/* Restore previous conditional rendering user state. */
747
si_emit_set_predication_state(cmd_buffer, cmd_buffer->state.predication_type,
748
cmd_buffer->state.predication_op,
749
cmd_buffer->state.predication_va);
750
}
751
}
752
753
if (image->fce_pred_offset != 0) {
754
/* Clear the image's fast-clear eliminate predicate because
755
* FMASK and DCC also imply a fast-clear eliminate.
756
*/
757
radv_update_fce_metadata(cmd_buffer, image, subresourceRange, false);
758
}
759
760
/* Mark the image as being decompressed. */
761
if (decompress_dcc)
762
radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
763
}
764
765
void
766
radv_fast_clear_flush_image_inplace(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
767
const VkImageSubresourceRange *subresourceRange)
768
{
769
struct radv_barrier_data barrier = {0};
770
771
if (radv_image_has_fmask(image) && !image->tc_compatible_cmask) {
772
barrier.layout_transitions.fmask_decompress = 1;
773
} else {
774
barrier.layout_transitions.fast_clear_eliminate = 1;
775
}
776
radv_describe_layout_transition(cmd_buffer, &barrier);
777
778
assert(cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL);
779
radv_emit_color_decompress(cmd_buffer, image, subresourceRange, false);
780
}
781
782
static void
783
radv_decompress_dcc_gfx(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
784
const VkImageSubresourceRange *subresourceRange)
785
{
786
assert(radv_dcc_enabled(image, subresourceRange->baseMipLevel));
787
radv_emit_color_decompress(cmd_buffer, image, subresourceRange, true);
788
}
789
790
static void
791
radv_decompress_dcc_compute(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
792
const VkImageSubresourceRange *subresourceRange)
793
{
794
struct radv_meta_saved_state saved_state;
795
struct radv_image_view load_iview = {0};
796
struct radv_image_view store_iview = {0};
797
struct radv_device *device = cmd_buffer->device;
798
799
cmd_buffer->state.flush_bits |=
800
radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
801
802
if (!cmd_buffer->device->meta_state.fast_clear_flush.cmask_eliminate_pipeline) {
803
VkResult ret = radv_device_init_meta_fast_clear_flush_state_internal(cmd_buffer->device);
804
if (ret != VK_SUCCESS) {
805
cmd_buffer->record_result = ret;
806
return;
807
}
808
}
809
810
radv_meta_save(&saved_state, cmd_buffer,
811
RADV_META_SAVE_DESCRIPTORS | RADV_META_SAVE_COMPUTE_PIPELINE);
812
813
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
814
device->meta_state.fast_clear_flush.dcc_decompress_compute_pipeline);
815
816
for (uint32_t l = 0; l < radv_get_levelCount(image, subresourceRange); l++) {
817
uint32_t width, height;
818
819
/* Do not decompress levels without DCC. */
820
if (!radv_dcc_enabled(image, subresourceRange->baseMipLevel + l))
821
continue;
822
823
width = radv_minify(image->info.width, subresourceRange->baseMipLevel + l);
824
height = radv_minify(image->info.height, subresourceRange->baseMipLevel + l);
825
826
for (uint32_t s = 0; s < radv_get_layerCount(image, subresourceRange); s++) {
827
radv_image_view_init(
828
&load_iview, cmd_buffer->device,
829
&(VkImageViewCreateInfo){
830
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
831
.image = radv_image_to_handle(image),
832
.viewType = VK_IMAGE_VIEW_TYPE_2D,
833
.format = image->vk_format,
834
.subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
835
.baseMipLevel = subresourceRange->baseMipLevel + l,
836
.levelCount = 1,
837
.baseArrayLayer = subresourceRange->baseArrayLayer + s,
838
.layerCount = 1},
839
},
840
&(struct radv_image_view_extra_create_info){.enable_compression = true});
841
radv_image_view_init(
842
&store_iview, cmd_buffer->device,
843
&(VkImageViewCreateInfo){
844
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
845
.image = radv_image_to_handle(image),
846
.viewType = VK_IMAGE_VIEW_TYPE_2D,
847
.format = image->vk_format,
848
.subresourceRange = {.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
849
.baseMipLevel = subresourceRange->baseMipLevel + l,
850
.levelCount = 1,
851
.baseArrayLayer = subresourceRange->baseArrayLayer + s,
852
.layerCount = 1},
853
},
854
&(struct radv_image_view_extra_create_info){.disable_compression = true});
855
856
radv_meta_push_descriptor_set(
857
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
858
device->meta_state.fast_clear_flush.dcc_decompress_compute_p_layout, 0, /* set */
859
2, /* descriptorWriteCount */
860
(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
861
.dstBinding = 0,
862
.dstArrayElement = 0,
863
.descriptorCount = 1,
864
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
865
.pImageInfo =
866
(VkDescriptorImageInfo[]){
867
{
868
.sampler = VK_NULL_HANDLE,
869
.imageView = radv_image_view_to_handle(&load_iview),
870
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
871
},
872
}},
873
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
874
.dstBinding = 1,
875
.dstArrayElement = 0,
876
.descriptorCount = 1,
877
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
878
.pImageInfo = (VkDescriptorImageInfo[]){
879
{
880
.sampler = VK_NULL_HANDLE,
881
.imageView = radv_image_view_to_handle(&store_iview),
882
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
883
},
884
}}});
885
886
radv_unaligned_dispatch(cmd_buffer, width, height, 1);
887
}
888
}
889
890
/* Mark this image as actually being decompressed. */
891
radv_update_dcc_metadata(cmd_buffer, image, subresourceRange, false);
892
893
radv_meta_restore(&saved_state, cmd_buffer);
894
895
cmd_buffer->state.flush_bits |=
896
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
897
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, image);
898
899
/* Initialize the DCC metadata as "fully expanded". */
900
cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, image, subresourceRange, 0xffffffff);
901
}
902
903
void
904
radv_decompress_dcc(struct radv_cmd_buffer *cmd_buffer, struct radv_image *image,
905
const VkImageSubresourceRange *subresourceRange)
906
{
907
struct radv_barrier_data barrier = {0};
908
909
barrier.layout_transitions.dcc_decompress = 1;
910
radv_describe_layout_transition(cmd_buffer, &barrier);
911
912
if (cmd_buffer->queue_family_index == RADV_QUEUE_GENERAL)
913
radv_decompress_dcc_gfx(cmd_buffer, image, subresourceRange);
914
else
915
radv_decompress_dcc_compute(cmd_buffer, image, subresourceRange);
916
}
917
918