Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/vulkan/radv_meta_resolve_cs.c
7236 views
1
/*
2
* Copyright © 2016 Dave Airlie
3
*
4
* Permission is hereby granted, free of charge, to any person obtaining a
5
* copy of this software and associated documentation files (the "Software"),
6
* to deal in the Software without restriction, including without limitation
7
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
8
* and/or sell copies of the Software, and to permit persons to whom the
9
* Software is furnished to do so, subject to the following conditions:
10
*
11
* The above copyright notice and this permission notice (including the next
12
* paragraph) shall be included in all copies or substantial portions of the
13
* Software.
14
*
15
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
18
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
20
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
21
* IN THE SOFTWARE.
22
*/
23
24
#include <assert.h>
25
#include <stdbool.h>
26
27
#include "nir/nir_builder.h"
28
#include "radv_meta.h"
29
#include "radv_private.h"
30
#include "sid.h"
31
#include "vk_format.h"
32
33
static nir_ssa_def *
34
radv_meta_build_resolve_srgb_conversion(nir_builder *b, nir_ssa_def *input)
35
{
36
unsigned i;
37
38
nir_ssa_def *cmp[3];
39
for (i = 0; i < 3; i++)
40
cmp[i] = nir_flt(b, nir_channel(b, input, i), nir_imm_int(b, 0x3b4d2e1c));
41
42
nir_ssa_def *ltvals[3];
43
for (i = 0; i < 3; i++)
44
ltvals[i] = nir_fmul(b, nir_channel(b, input, i), nir_imm_float(b, 12.92));
45
46
nir_ssa_def *gtvals[3];
47
48
for (i = 0; i < 3; i++) {
49
gtvals[i] = nir_fpow(b, nir_channel(b, input, i), nir_imm_float(b, 1.0 / 2.4));
50
gtvals[i] = nir_fmul(b, gtvals[i], nir_imm_float(b, 1.055));
51
gtvals[i] = nir_fsub(b, gtvals[i], nir_imm_float(b, 0.055));
52
}
53
54
nir_ssa_def *comp[4];
55
for (i = 0; i < 3; i++)
56
comp[i] = nir_bcsel(b, cmp[i], ltvals[i], gtvals[i]);
57
comp[3] = nir_channels(b, input, 1 << 3);
58
return nir_vec(b, comp, 4);
59
}
60
61
static nir_shader *
62
build_resolve_compute_shader(struct radv_device *dev, bool is_integer, bool is_srgb, int samples)
63
{
64
const struct glsl_type *sampler_type =
65
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, false, GLSL_TYPE_FLOAT);
66
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, false, GLSL_TYPE_FLOAT);
67
nir_builder b =
68
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs-%d-%s", samples,
69
is_integer ? "int" : (is_srgb ? "srgb" : "float"));
70
b.shader->info.workgroup_size[0] = 8;
71
b.shader->info.workgroup_size[1] = 8;
72
b.shader->info.workgroup_size[2] = 1;
73
74
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
75
input_img->data.descriptor_set = 0;
76
input_img->data.binding = 0;
77
78
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
79
output_img->data.descriptor_set = 0;
80
output_img->data.binding = 1;
81
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
82
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
83
nir_ssa_def *block_size =
84
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
85
b.shader->info.workgroup_size[2], 0);
86
87
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
88
89
nir_ssa_def *src_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
90
nir_ssa_def *dst_offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 8), .range = 16);
91
92
nir_ssa_def *img_coord = nir_channels(&b, nir_iadd(&b, global_id, src_offset), 0x3);
93
nir_variable *color = nir_local_variable_create(b.impl, glsl_vec4_type(), "color");
94
95
radv_meta_build_resolve_shader_core(&b, is_integer, samples, input_img, color, img_coord);
96
97
nir_ssa_def *outval = nir_load_var(&b, color);
98
if (is_srgb)
99
outval = radv_meta_build_resolve_srgb_conversion(&b, outval);
100
101
nir_ssa_def *coord = nir_iadd(&b, global_id, dst_offset);
102
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
103
nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
104
return b.shader;
105
}
106
107
enum {
108
DEPTH_RESOLVE,
109
STENCIL_RESOLVE,
110
};
111
112
static const char *
113
get_resolve_mode_str(VkResolveModeFlagBits resolve_mode)
114
{
115
switch (resolve_mode) {
116
case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
117
return "zero";
118
case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
119
return "average";
120
case VK_RESOLVE_MODE_MIN_BIT_KHR:
121
return "min";
122
case VK_RESOLVE_MODE_MAX_BIT_KHR:
123
return "max";
124
default:
125
unreachable("invalid resolve mode");
126
}
127
}
128
129
static nir_shader *
130
build_depth_stencil_resolve_compute_shader(struct radv_device *dev, int samples, int index,
131
VkResolveModeFlagBits resolve_mode)
132
{
133
const struct glsl_type *sampler_type =
134
glsl_sampler_type(GLSL_SAMPLER_DIM_MS, false, true, GLSL_TYPE_FLOAT);
135
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_2D, true, GLSL_TYPE_FLOAT);
136
137
nir_builder b = nir_builder_init_simple_shader(
138
MESA_SHADER_COMPUTE, NULL, "meta_resolve_cs_%s-%s-%d",
139
index == DEPTH_RESOLVE ? "depth" : "stencil", get_resolve_mode_str(resolve_mode), samples);
140
b.shader->info.workgroup_size[0] = 8;
141
b.shader->info.workgroup_size[1] = 8;
142
b.shader->info.workgroup_size[2] = 1;
143
144
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
145
input_img->data.descriptor_set = 0;
146
input_img->data.binding = 0;
147
148
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
149
output_img->data.descriptor_set = 0;
150
output_img->data.binding = 1;
151
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
152
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
153
nir_ssa_def *block_size =
154
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
155
b.shader->info.workgroup_size[2], 0);
156
157
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
158
nir_ssa_def *layer_id = nir_channel(&b, wg_id, 2);
159
160
nir_ssa_def *img_coord =
161
nir_vec3(&b, nir_channel(&b, global_id, 0), nir_channel(&b, global_id, 1), layer_id);
162
163
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
164
165
nir_alu_type type = index == DEPTH_RESOLVE ? nir_type_float32 : nir_type_uint32;
166
167
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
168
tex->sampler_dim = GLSL_SAMPLER_DIM_MS;
169
tex->op = nir_texop_txf_ms;
170
tex->src[0].src_type = nir_tex_src_coord;
171
tex->src[0].src = nir_src_for_ssa(img_coord);
172
tex->src[1].src_type = nir_tex_src_ms_index;
173
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
174
tex->src[2].src_type = nir_tex_src_texture_deref;
175
tex->src[2].src = nir_src_for_ssa(input_img_deref);
176
tex->dest_type = type;
177
tex->is_array = true;
178
tex->coord_components = 3;
179
180
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
181
nir_builder_instr_insert(&b, &tex->instr);
182
183
nir_ssa_def *outval = &tex->dest.ssa;
184
185
if (resolve_mode != VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR) {
186
for (int i = 1; i < samples; i++) {
187
nir_tex_instr *tex_add = nir_tex_instr_create(b.shader, 3);
188
tex_add->sampler_dim = GLSL_SAMPLER_DIM_MS;
189
tex_add->op = nir_texop_txf_ms;
190
tex_add->src[0].src_type = nir_tex_src_coord;
191
tex_add->src[0].src = nir_src_for_ssa(img_coord);
192
tex_add->src[1].src_type = nir_tex_src_ms_index;
193
tex_add->src[1].src = nir_src_for_ssa(nir_imm_int(&b, i));
194
tex_add->src[2].src_type = nir_tex_src_texture_deref;
195
tex_add->src[2].src = nir_src_for_ssa(input_img_deref);
196
tex_add->dest_type = type;
197
tex_add->is_array = true;
198
tex_add->coord_components = 3;
199
200
nir_ssa_dest_init(&tex_add->instr, &tex_add->dest, 4, 32, "tex");
201
nir_builder_instr_insert(&b, &tex_add->instr);
202
203
switch (resolve_mode) {
204
case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
205
assert(index == DEPTH_RESOLVE);
206
outval = nir_fadd(&b, outval, &tex_add->dest.ssa);
207
break;
208
case VK_RESOLVE_MODE_MIN_BIT_KHR:
209
if (index == DEPTH_RESOLVE)
210
outval = nir_fmin(&b, outval, &tex_add->dest.ssa);
211
else
212
outval = nir_umin(&b, outval, &tex_add->dest.ssa);
213
break;
214
case VK_RESOLVE_MODE_MAX_BIT_KHR:
215
if (index == DEPTH_RESOLVE)
216
outval = nir_fmax(&b, outval, &tex_add->dest.ssa);
217
else
218
outval = nir_umax(&b, outval, &tex_add->dest.ssa);
219
break;
220
default:
221
unreachable("invalid resolve mode");
222
}
223
}
224
225
if (resolve_mode == VK_RESOLVE_MODE_AVERAGE_BIT_KHR)
226
outval = nir_fdiv(&b, outval, nir_imm_float(&b, samples));
227
}
228
229
nir_ssa_def *coord = nir_vec4(&b, nir_channel(&b, img_coord, 0), nir_channel(&b, img_coord, 1),
230
nir_channel(&b, img_coord, 2), nir_imm_int(&b, 0));
231
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
232
nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
233
return b.shader;
234
}
235
236
static VkResult
237
create_layout(struct radv_device *device)
238
{
239
VkResult result;
240
/*
241
* two descriptors one for the image being sampled
242
* one for the buffer being written.
243
*/
244
VkDescriptorSetLayoutCreateInfo ds_create_info = {
245
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
246
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
247
.bindingCount = 2,
248
.pBindings = (VkDescriptorSetLayoutBinding[]){
249
{.binding = 0,
250
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
251
.descriptorCount = 1,
252
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
253
.pImmutableSamplers = NULL},
254
{.binding = 1,
255
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
256
.descriptorCount = 1,
257
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
258
.pImmutableSamplers = NULL},
259
}};
260
261
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
262
&device->meta_state.alloc,
263
&device->meta_state.resolve_compute.ds_layout);
264
if (result != VK_SUCCESS)
265
goto fail;
266
267
VkPipelineLayoutCreateInfo pl_create_info = {
268
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
269
.setLayoutCount = 1,
270
.pSetLayouts = &device->meta_state.resolve_compute.ds_layout,
271
.pushConstantRangeCount = 1,
272
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
273
};
274
275
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
276
&device->meta_state.alloc,
277
&device->meta_state.resolve_compute.p_layout);
278
if (result != VK_SUCCESS)
279
goto fail;
280
return VK_SUCCESS;
281
fail:
282
return result;
283
}
284
285
static VkResult
286
create_resolve_pipeline(struct radv_device *device, int samples, bool is_integer, bool is_srgb,
287
VkPipeline *pipeline)
288
{
289
VkResult result;
290
291
mtx_lock(&device->meta_state.mtx);
292
if (*pipeline) {
293
mtx_unlock(&device->meta_state.mtx);
294
return VK_SUCCESS;
295
}
296
297
nir_shader *cs = build_resolve_compute_shader(device, is_integer, is_srgb, samples);
298
299
/* compute shader */
300
301
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
302
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
303
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
304
.module = vk_shader_module_handle_from_nir(cs),
305
.pName = "main",
306
.pSpecializationInfo = NULL,
307
};
308
309
VkComputePipelineCreateInfo vk_pipeline_info = {
310
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
311
.stage = pipeline_shader_stage,
312
.flags = 0,
313
.layout = device->meta_state.resolve_compute.p_layout,
314
};
315
316
result = radv_CreateComputePipelines(radv_device_to_handle(device),
317
radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
318
&vk_pipeline_info, NULL, pipeline);
319
if (result != VK_SUCCESS)
320
goto fail;
321
322
ralloc_free(cs);
323
mtx_unlock(&device->meta_state.mtx);
324
return VK_SUCCESS;
325
fail:
326
ralloc_free(cs);
327
mtx_unlock(&device->meta_state.mtx);
328
return result;
329
}
330
331
static VkResult
332
create_depth_stencil_resolve_pipeline(struct radv_device *device, int samples, int index,
333
VkResolveModeFlagBits resolve_mode, VkPipeline *pipeline)
334
{
335
VkResult result;
336
337
mtx_lock(&device->meta_state.mtx);
338
if (*pipeline) {
339
mtx_unlock(&device->meta_state.mtx);
340
return VK_SUCCESS;
341
}
342
343
nir_shader *cs =
344
build_depth_stencil_resolve_compute_shader(device, samples, index, resolve_mode);
345
346
/* compute shader */
347
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
348
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
349
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
350
.module = vk_shader_module_handle_from_nir(cs),
351
.pName = "main",
352
.pSpecializationInfo = NULL,
353
};
354
355
VkComputePipelineCreateInfo vk_pipeline_info = {
356
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
357
.stage = pipeline_shader_stage,
358
.flags = 0,
359
.layout = device->meta_state.resolve_compute.p_layout,
360
};
361
362
result = radv_CreateComputePipelines(radv_device_to_handle(device),
363
radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
364
&vk_pipeline_info, NULL, pipeline);
365
if (result != VK_SUCCESS)
366
goto fail;
367
368
ralloc_free(cs);
369
mtx_unlock(&device->meta_state.mtx);
370
return VK_SUCCESS;
371
fail:
372
ralloc_free(cs);
373
mtx_unlock(&device->meta_state.mtx);
374
return result;
375
}
376
377
VkResult
378
radv_device_init_meta_resolve_compute_state(struct radv_device *device, bool on_demand)
379
{
380
struct radv_meta_state *state = &device->meta_state;
381
VkResult res;
382
383
res = create_layout(device);
384
if (res != VK_SUCCESS)
385
goto fail;
386
387
if (on_demand)
388
return VK_SUCCESS;
389
390
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
391
uint32_t samples = 1 << i;
392
393
res = create_resolve_pipeline(device, samples, false, false,
394
&state->resolve_compute.rc[i].pipeline);
395
if (res != VK_SUCCESS)
396
goto fail;
397
398
res = create_resolve_pipeline(device, samples, true, false,
399
&state->resolve_compute.rc[i].i_pipeline);
400
if (res != VK_SUCCESS)
401
goto fail;
402
403
res = create_resolve_pipeline(device, samples, false, true,
404
&state->resolve_compute.rc[i].srgb_pipeline);
405
if (res != VK_SUCCESS)
406
goto fail;
407
408
res = create_depth_stencil_resolve_pipeline(
409
device, samples, DEPTH_RESOLVE, VK_RESOLVE_MODE_AVERAGE_BIT_KHR,
410
&state->resolve_compute.depth[i].average_pipeline);
411
if (res != VK_SUCCESS)
412
goto fail;
413
414
res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
415
VK_RESOLVE_MODE_MAX_BIT_KHR,
416
&state->resolve_compute.depth[i].max_pipeline);
417
if (res != VK_SUCCESS)
418
goto fail;
419
420
res = create_depth_stencil_resolve_pipeline(device, samples, DEPTH_RESOLVE,
421
VK_RESOLVE_MODE_MIN_BIT_KHR,
422
&state->resolve_compute.depth[i].min_pipeline);
423
if (res != VK_SUCCESS)
424
goto fail;
425
426
res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
427
VK_RESOLVE_MODE_MAX_BIT_KHR,
428
&state->resolve_compute.stencil[i].max_pipeline);
429
if (res != VK_SUCCESS)
430
goto fail;
431
432
res = create_depth_stencil_resolve_pipeline(device, samples, STENCIL_RESOLVE,
433
VK_RESOLVE_MODE_MIN_BIT_KHR,
434
&state->resolve_compute.stencil[i].min_pipeline);
435
if (res != VK_SUCCESS)
436
goto fail;
437
}
438
439
res = create_depth_stencil_resolve_pipeline(device, 0, DEPTH_RESOLVE,
440
VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
441
&state->resolve_compute.depth_zero_pipeline);
442
if (res != VK_SUCCESS)
443
goto fail;
444
445
res = create_depth_stencil_resolve_pipeline(device, 0, STENCIL_RESOLVE,
446
VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR,
447
&state->resolve_compute.stencil_zero_pipeline);
448
if (res != VK_SUCCESS)
449
goto fail;
450
451
return VK_SUCCESS;
452
fail:
453
radv_device_finish_meta_resolve_compute_state(device);
454
return res;
455
}
456
457
void
458
radv_device_finish_meta_resolve_compute_state(struct radv_device *device)
459
{
460
struct radv_meta_state *state = &device->meta_state;
461
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
462
radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].pipeline,
463
&state->alloc);
464
465
radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.rc[i].i_pipeline,
466
&state->alloc);
467
468
radv_DestroyPipeline(radv_device_to_handle(device),
469
state->resolve_compute.rc[i].srgb_pipeline, &state->alloc);
470
471
radv_DestroyPipeline(radv_device_to_handle(device),
472
state->resolve_compute.depth[i].average_pipeline, &state->alloc);
473
474
radv_DestroyPipeline(radv_device_to_handle(device),
475
state->resolve_compute.depth[i].max_pipeline, &state->alloc);
476
477
radv_DestroyPipeline(radv_device_to_handle(device),
478
state->resolve_compute.depth[i].min_pipeline, &state->alloc);
479
480
radv_DestroyPipeline(radv_device_to_handle(device),
481
state->resolve_compute.stencil[i].max_pipeline, &state->alloc);
482
483
radv_DestroyPipeline(radv_device_to_handle(device),
484
state->resolve_compute.stencil[i].min_pipeline, &state->alloc);
485
}
486
487
radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.depth_zero_pipeline,
488
&state->alloc);
489
490
radv_DestroyPipeline(radv_device_to_handle(device), state->resolve_compute.stencil_zero_pipeline,
491
&state->alloc);
492
493
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->resolve_compute.ds_layout,
494
&state->alloc);
495
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->resolve_compute.p_layout,
496
&state->alloc);
497
}
498
499
static VkPipeline *
500
radv_get_resolve_pipeline(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview)
501
{
502
struct radv_device *device = cmd_buffer->device;
503
struct radv_meta_state *state = &device->meta_state;
504
uint32_t samples = src_iview->image->info.samples;
505
uint32_t samples_log2 = ffs(samples) - 1;
506
VkPipeline *pipeline;
507
508
if (vk_format_is_int(src_iview->vk_format))
509
pipeline = &state->resolve_compute.rc[samples_log2].i_pipeline;
510
else if (vk_format_is_srgb(src_iview->vk_format))
511
pipeline = &state->resolve_compute.rc[samples_log2].srgb_pipeline;
512
else
513
pipeline = &state->resolve_compute.rc[samples_log2].pipeline;
514
515
if (!*pipeline) {
516
VkResult ret;
517
518
ret = create_resolve_pipeline(device, samples, vk_format_is_int(src_iview->vk_format),
519
vk_format_is_srgb(src_iview->vk_format), pipeline);
520
if (ret != VK_SUCCESS) {
521
cmd_buffer->record_result = ret;
522
return NULL;
523
}
524
}
525
526
return pipeline;
527
}
528
529
static void
530
emit_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
531
struct radv_image_view *dest_iview, const VkOffset2D *src_offset,
532
const VkOffset2D *dest_offset, const VkExtent2D *resolve_extent)
533
{
534
struct radv_device *device = cmd_buffer->device;
535
VkPipeline *pipeline;
536
537
radv_meta_push_descriptor_set(
538
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
539
0, /* set */
540
2, /* descriptorWriteCount */
541
(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
542
.dstBinding = 0,
543
.dstArrayElement = 0,
544
.descriptorCount = 1,
545
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
546
.pImageInfo =
547
(VkDescriptorImageInfo[]){
548
{.sampler = VK_NULL_HANDLE,
549
.imageView = radv_image_view_to_handle(src_iview),
550
.imageLayout = VK_IMAGE_LAYOUT_GENERAL},
551
}},
552
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
553
.dstBinding = 1,
554
.dstArrayElement = 0,
555
.descriptorCount = 1,
556
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
557
.pImageInfo = (VkDescriptorImageInfo[]){
558
{
559
.sampler = VK_NULL_HANDLE,
560
.imageView = radv_image_view_to_handle(dest_iview),
561
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
562
},
563
}}});
564
565
pipeline = radv_get_resolve_pipeline(cmd_buffer, src_iview);
566
567
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
568
*pipeline);
569
570
unsigned push_constants[4] = {
571
src_offset->x,
572
src_offset->y,
573
dest_offset->x,
574
dest_offset->y,
575
};
576
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
577
device->meta_state.resolve_compute.p_layout, VK_SHADER_STAGE_COMPUTE_BIT,
578
0, 16, push_constants);
579
radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height, 1);
580
}
581
582
static void
583
emit_depth_stencil_resolve(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src_iview,
584
struct radv_image_view *dest_iview, const VkExtent3D *resolve_extent,
585
VkImageAspectFlags aspects, VkResolveModeFlagBits resolve_mode)
586
{
587
struct radv_device *device = cmd_buffer->device;
588
const uint32_t samples = src_iview->image->info.samples;
589
const uint32_t samples_log2 = ffs(samples) - 1;
590
VkPipeline *pipeline;
591
592
radv_meta_push_descriptor_set(
593
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.resolve_compute.p_layout,
594
0, /* set */
595
2, /* descriptorWriteCount */
596
(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
597
.dstBinding = 0,
598
.dstArrayElement = 0,
599
.descriptorCount = 1,
600
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
601
.pImageInfo =
602
(VkDescriptorImageInfo[]){
603
{.sampler = VK_NULL_HANDLE,
604
.imageView = radv_image_view_to_handle(src_iview),
605
.imageLayout = VK_IMAGE_LAYOUT_GENERAL},
606
}},
607
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
608
.dstBinding = 1,
609
.dstArrayElement = 0,
610
.descriptorCount = 1,
611
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
612
.pImageInfo = (VkDescriptorImageInfo[]){
613
{
614
.sampler = VK_NULL_HANDLE,
615
.imageView = radv_image_view_to_handle(dest_iview),
616
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
617
},
618
}}});
619
620
switch (resolve_mode) {
621
case VK_RESOLVE_MODE_SAMPLE_ZERO_BIT_KHR:
622
if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
623
pipeline = &device->meta_state.resolve_compute.depth_zero_pipeline;
624
else
625
pipeline = &device->meta_state.resolve_compute.stencil_zero_pipeline;
626
break;
627
case VK_RESOLVE_MODE_AVERAGE_BIT_KHR:
628
assert(aspects == VK_IMAGE_ASPECT_DEPTH_BIT);
629
pipeline = &device->meta_state.resolve_compute.depth[samples_log2].average_pipeline;
630
break;
631
case VK_RESOLVE_MODE_MIN_BIT_KHR:
632
if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
633
pipeline = &device->meta_state.resolve_compute.depth[samples_log2].min_pipeline;
634
else
635
pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].min_pipeline;
636
break;
637
case VK_RESOLVE_MODE_MAX_BIT_KHR:
638
if (aspects == VK_IMAGE_ASPECT_DEPTH_BIT)
639
pipeline = &device->meta_state.resolve_compute.depth[samples_log2].max_pipeline;
640
else
641
pipeline = &device->meta_state.resolve_compute.stencil[samples_log2].max_pipeline;
642
break;
643
default:
644
unreachable("invalid resolve mode");
645
}
646
647
if (!*pipeline) {
648
int index = aspects == VK_IMAGE_ASPECT_DEPTH_BIT ? DEPTH_RESOLVE : STENCIL_RESOLVE;
649
VkResult ret;
650
651
ret = create_depth_stencil_resolve_pipeline(device, samples, index, resolve_mode, pipeline);
652
if (ret != VK_SUCCESS) {
653
cmd_buffer->record_result = ret;
654
return;
655
}
656
}
657
658
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
659
*pipeline);
660
661
radv_unaligned_dispatch(cmd_buffer, resolve_extent->width, resolve_extent->height,
662
resolve_extent->depth);
663
}
664
665
void
666
radv_meta_resolve_compute_image(struct radv_cmd_buffer *cmd_buffer, struct radv_image *src_image,
667
VkFormat src_format, VkImageLayout src_image_layout,
668
struct radv_image *dest_image, VkFormat dest_format,
669
VkImageLayout dest_image_layout, const VkImageResolve2KHR *region)
670
{
671
struct radv_meta_saved_state saved_state;
672
673
radv_decompress_resolve_src(cmd_buffer, src_image, src_image_layout, region);
674
675
/* For partial resolves, DCC should be decompressed before resolving
676
* because the metadata is re-initialized to the uncompressed after.
677
*/
678
uint32_t queue_mask = radv_image_queue_family_mask(dest_image, cmd_buffer->queue_family_index,
679
cmd_buffer->queue_family_index);
680
681
if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
682
radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
683
dest_image_layout, false, queue_mask) &&
684
(region->dstOffset.x || region->dstOffset.y || region->dstOffset.z ||
685
region->extent.width != dest_image->info.width ||
686
region->extent.height != dest_image->info.height ||
687
region->extent.depth != dest_image->info.depth)) {
688
radv_decompress_dcc(cmd_buffer, dest_image,
689
&(VkImageSubresourceRange){
690
.aspectMask = region->dstSubresource.aspectMask,
691
.baseMipLevel = region->dstSubresource.mipLevel,
692
.levelCount = 1,
693
.baseArrayLayer = region->dstSubresource.baseArrayLayer,
694
.layerCount = region->dstSubresource.layerCount,
695
});
696
}
697
698
radv_meta_save(
699
&saved_state, cmd_buffer,
700
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_CONSTANTS | RADV_META_SAVE_DESCRIPTORS);
701
702
assert(region->srcSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
703
assert(region->dstSubresource.aspectMask == VK_IMAGE_ASPECT_COLOR_BIT);
704
assert(region->srcSubresource.layerCount == region->dstSubresource.layerCount);
705
706
const uint32_t src_base_layer =
707
radv_meta_get_iview_layer(src_image, &region->srcSubresource, &region->srcOffset);
708
709
const uint32_t dest_base_layer =
710
radv_meta_get_iview_layer(dest_image, &region->dstSubresource, &region->dstOffset);
711
712
const struct VkExtent3D extent = radv_sanitize_image_extent(src_image->type, region->extent);
713
const struct VkOffset3D srcOffset =
714
radv_sanitize_image_offset(src_image->type, region->srcOffset);
715
const struct VkOffset3D dstOffset =
716
radv_sanitize_image_offset(dest_image->type, region->dstOffset);
717
718
for (uint32_t layer = 0; layer < region->srcSubresource.layerCount; ++layer) {
719
720
struct radv_image_view src_iview;
721
radv_image_view_init(&src_iview, cmd_buffer->device,
722
&(VkImageViewCreateInfo){
723
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
724
.image = radv_image_to_handle(src_image),
725
.viewType = radv_meta_get_view_type(src_image),
726
.format = src_format,
727
.subresourceRange =
728
{
729
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
730
.baseMipLevel = region->srcSubresource.mipLevel,
731
.levelCount = 1,
732
.baseArrayLayer = src_base_layer + layer,
733
.layerCount = 1,
734
},
735
},
736
NULL);
737
738
struct radv_image_view dest_iview;
739
radv_image_view_init(&dest_iview, cmd_buffer->device,
740
&(VkImageViewCreateInfo){
741
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
742
.image = radv_image_to_handle(dest_image),
743
.viewType = radv_meta_get_view_type(dest_image),
744
.format = vk_to_non_srgb_format(dest_format),
745
.subresourceRange =
746
{
747
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
748
.baseMipLevel = region->dstSubresource.mipLevel,
749
.levelCount = 1,
750
.baseArrayLayer = dest_base_layer + layer,
751
.layerCount = 1,
752
},
753
},
754
NULL);
755
756
emit_resolve(cmd_buffer, &src_iview, &dest_iview, &(VkOffset2D){srcOffset.x, srcOffset.y},
757
&(VkOffset2D){dstOffset.x, dstOffset.y},
758
&(VkExtent2D){extent.width, extent.height});
759
}
760
761
radv_meta_restore(&saved_state, cmd_buffer);
762
763
if (!radv_image_use_dcc_image_stores(cmd_buffer->device, dest_image) &&
764
radv_layout_dcc_compressed(cmd_buffer->device, dest_image, region->dstSubresource.mipLevel,
765
dest_image_layout, false, queue_mask)) {
766
767
cmd_buffer->state.flush_bits |= RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE;
768
769
VkImageSubresourceRange range = {
770
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
771
.baseMipLevel = region->dstSubresource.mipLevel,
772
.levelCount = 1,
773
.baseArrayLayer = dest_base_layer,
774
.layerCount = region->dstSubresource.layerCount,
775
};
776
777
cmd_buffer->state.flush_bits |= radv_init_dcc(cmd_buffer, dest_image, &range, 0xffffffff);
778
}
779
}
780
781
/**
782
* Emit any needed resolves for the current subpass.
783
*/
784
void
785
radv_cmd_buffer_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer)
786
{
787
struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
788
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
789
struct radv_subpass_barrier barrier;
790
uint32_t layer_count = fb->layers;
791
792
if (subpass->view_mask)
793
layer_count = util_last_bit(subpass->view_mask);
794
795
/* Resolves happen before the end-of-subpass barriers get executed, so
796
* we have to make the attachment shader-readable.
797
*/
798
barrier.src_stage_mask = VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT;
799
barrier.src_access_mask = VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT;
800
barrier.dst_access_mask = VK_ACCESS_SHADER_READ_BIT | VK_ACCESS_SHADER_WRITE_BIT;
801
radv_subpass_barrier(cmd_buffer, &barrier);
802
803
for (uint32_t i = 0; i < subpass->color_count; ++i) {
804
struct radv_subpass_attachment src_att = subpass->color_attachments[i];
805
struct radv_subpass_attachment dst_att = subpass->resolve_attachments[i];
806
807
if (dst_att.attachment == VK_ATTACHMENT_UNUSED)
808
continue;
809
810
struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
811
struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dst_att.attachment].iview;
812
813
VkImageResolve2KHR region = {
814
.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR,
815
.extent = (VkExtent3D){fb->width, fb->height, 1},
816
.srcSubresource =
817
(VkImageSubresourceLayers){
818
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
819
.mipLevel = src_iview->base_mip,
820
.baseArrayLayer = src_iview->base_layer,
821
.layerCount = layer_count,
822
},
823
.dstSubresource =
824
(VkImageSubresourceLayers){
825
.aspectMask = VK_IMAGE_ASPECT_COLOR_BIT,
826
.mipLevel = dst_iview->base_mip,
827
.baseArrayLayer = dst_iview->base_layer,
828
.layerCount = layer_count,
829
},
830
.srcOffset = (VkOffset3D){0, 0, 0},
831
.dstOffset = (VkOffset3D){0, 0, 0},
832
};
833
834
radv_meta_resolve_compute_image(cmd_buffer, src_iview->image, src_iview->vk_format,
835
src_att.layout, dst_iview->image, dst_iview->vk_format,
836
dst_att.layout, &region);
837
}
838
839
cmd_buffer->state.flush_bits |=
840
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
841
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
842
}
843
844
void
845
radv_depth_stencil_resolve_subpass_cs(struct radv_cmd_buffer *cmd_buffer,
846
VkImageAspectFlags aspects,
847
VkResolveModeFlagBits resolve_mode)
848
{
849
struct radv_framebuffer *fb = cmd_buffer->state.framebuffer;
850
const struct radv_subpass *subpass = cmd_buffer->state.subpass;
851
struct radv_meta_saved_state saved_state;
852
uint32_t layer_count = fb->layers;
853
854
if (subpass->view_mask)
855
layer_count = util_last_bit(subpass->view_mask);
856
857
/* Resolves happen before the end-of-subpass barriers get executed, so
858
* we have to make the attachment shader-readable.
859
*/
860
cmd_buffer->state.flush_bits |=
861
radv_src_access_flush(cmd_buffer, VK_ACCESS_DEPTH_STENCIL_ATTACHMENT_WRITE_BIT, NULL) |
862
radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_READ_BIT, NULL) |
863
radv_dst_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
864
865
struct radv_subpass_attachment src_att = *subpass->depth_stencil_attachment;
866
struct radv_image_view *src_iview = cmd_buffer->state.attachments[src_att.attachment].iview;
867
struct radv_image *src_image = src_iview->image;
868
869
VkImageResolve2KHR region = {0};
870
region.sType = VK_STRUCTURE_TYPE_IMAGE_RESOLVE_2_KHR;
871
region.srcSubresource.aspectMask = aspects;
872
region.srcSubresource.mipLevel = 0;
873
region.srcSubresource.baseArrayLayer = src_iview->base_layer;
874
region.srcSubresource.layerCount = layer_count;
875
876
radv_decompress_resolve_src(cmd_buffer, src_image, src_att.layout, &region);
877
878
radv_meta_save(&saved_state, cmd_buffer,
879
RADV_META_SAVE_COMPUTE_PIPELINE | RADV_META_SAVE_DESCRIPTORS);
880
881
struct radv_subpass_attachment dest_att = *subpass->ds_resolve_attachment;
882
struct radv_image_view *dst_iview = cmd_buffer->state.attachments[dest_att.attachment].iview;
883
struct radv_image *dst_image = dst_iview->image;
884
885
struct radv_image_view tsrc_iview;
886
radv_image_view_init(&tsrc_iview, cmd_buffer->device,
887
&(VkImageViewCreateInfo){
888
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
889
.image = radv_image_to_handle(src_image),
890
.viewType = radv_meta_get_view_type(src_image),
891
.format = src_iview->vk_format,
892
.subresourceRange =
893
{
894
.aspectMask = aspects,
895
.baseMipLevel = src_iview->base_mip,
896
.levelCount = 1,
897
.baseArrayLayer = src_iview->base_layer,
898
.layerCount = layer_count,
899
},
900
},
901
NULL);
902
903
struct radv_image_view tdst_iview;
904
radv_image_view_init(&tdst_iview, cmd_buffer->device,
905
&(VkImageViewCreateInfo){
906
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
907
.image = radv_image_to_handle(dst_image),
908
.viewType = radv_meta_get_view_type(dst_image),
909
.format = dst_iview->vk_format,
910
.subresourceRange =
911
{
912
.aspectMask = aspects,
913
.baseMipLevel = dst_iview->base_mip,
914
.levelCount = 1,
915
.baseArrayLayer = dst_iview->base_layer,
916
.layerCount = layer_count,
917
},
918
},
919
NULL);
920
921
emit_depth_stencil_resolve(cmd_buffer, &tsrc_iview, &tdst_iview,
922
&(VkExtent3D){fb->width, fb->height, layer_count}, aspects,
923
resolve_mode);
924
925
cmd_buffer->state.flush_bits |=
926
RADV_CMD_FLAG_CS_PARTIAL_FLUSH | RADV_CMD_FLAG_INV_VCACHE |
927
radv_src_access_flush(cmd_buffer, VK_ACCESS_SHADER_WRITE_BIT, NULL);
928
929
VkImageLayout layout = cmd_buffer->state.attachments[dest_att.attachment].current_layout;
930
uint32_t queue_mask = radv_image_queue_family_mask(dst_image, cmd_buffer->queue_family_index,
931
cmd_buffer->queue_family_index);
932
933
if (radv_layout_is_htile_compressed(cmd_buffer->device, dst_image, layout, false, queue_mask)) {
934
VkImageSubresourceRange range = {0};
935
range.aspectMask = aspects;
936
range.baseMipLevel = dst_iview->base_mip;
937
range.levelCount = 1;
938
range.baseArrayLayer = dst_iview->base_layer;
939
range.layerCount = layer_count;
940
941
uint32_t htile_value = radv_get_htile_initial_value(cmd_buffer->device, dst_image);
942
943
cmd_buffer->state.flush_bits |= radv_clear_htile(cmd_buffer, dst_image, &range, htile_value);
944
}
945
946
radv_meta_restore(&saved_state, cmd_buffer);
947
}
948
949