Book a Demo!
CoCalc Logo Icon
StoreFeaturesDocsShareSupportNewsAboutPoliciesSign UpSign In
PojavLauncherTeam
GitHub Repository: PojavLauncherTeam/mesa
Path: blob/21.2-virgl/src/amd/vulkan/radv_meta_bufimage.c
7202 views
1
/*
2
* Copyright © 2016 Red Hat.
3
* Copyright © 2016 Bas Nieuwenhuizen
4
*
5
* Permission is hereby granted, free of charge, to any person obtaining a
6
* copy of this software and associated documentation files (the "Software"),
7
* to deal in the Software without restriction, including without limitation
8
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
9
* and/or sell copies of the Software, and to permit persons to whom the
10
* Software is furnished to do so, subject to the following conditions:
11
*
12
* The above copyright notice and this permission notice (including the next
13
* paragraph) shall be included in all copies or substantial portions of the
14
* Software.
15
*
16
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
17
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
18
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL
19
* THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
20
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
22
* IN THE SOFTWARE.
23
*/
24
#include "nir/nir_builder.h"
25
#include "radv_meta.h"
26
27
/*
28
* GFX queue: Compute shader implementation of image->buffer copy
29
* Compute queue: implementation also of buffer->image, image->image, and image clear.
30
*/
31
32
/* GFX9 needs to use a 3D sampler to access 3D resources, so the shader has the options
33
* for that.
34
*/
35
static nir_shader *
36
build_nir_itob_compute_shader(struct radv_device *dev, bool is_3d)
37
{
38
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
39
const struct glsl_type *sampler_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
40
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
41
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
42
is_3d ? "meta_itob_cs_3d" : "meta_itob_cs");
43
b.shader->info.workgroup_size[0] = 8;
44
b.shader->info.workgroup_size[1] = 8;
45
b.shader->info.workgroup_size[2] = 1;
46
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, sampler_type, "s_tex");
47
input_img->data.descriptor_set = 0;
48
input_img->data.binding = 0;
49
50
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
51
output_img->data.descriptor_set = 0;
52
output_img->data.binding = 1;
53
54
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
55
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
56
nir_ssa_def *block_size =
57
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
58
b.shader->info.workgroup_size[2], 0);
59
60
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
61
62
nir_ssa_def *offset =
63
nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
64
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
65
66
nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
67
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
68
69
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
70
tex->sampler_dim = dim;
71
tex->op = nir_texop_txf;
72
tex->src[0].src_type = nir_tex_src_coord;
73
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, img_coord, is_3d ? 0x7 : 0x3));
74
tex->src[1].src_type = nir_tex_src_lod;
75
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
76
tex->src[2].src_type = nir_tex_src_texture_deref;
77
tex->src[2].src = nir_src_for_ssa(input_img_deref);
78
tex->dest_type = nir_type_float32;
79
tex->is_array = false;
80
tex->coord_components = is_3d ? 3 : 2;
81
82
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
83
nir_builder_instr_insert(&b, &tex->instr);
84
85
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
86
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
87
88
nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
89
tmp = nir_iadd(&b, tmp, pos_x);
90
91
nir_ssa_def *coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
92
93
nir_ssa_def *outval = &tex->dest.ssa;
94
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
95
nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
96
97
return b.shader;
98
}
99
100
/* Image to buffer - don't write use image accessors */
101
static VkResult
102
radv_device_init_meta_itob_state(struct radv_device *device)
103
{
104
VkResult result;
105
nir_shader *cs = build_nir_itob_compute_shader(device, false);
106
nir_shader *cs_3d = NULL;
107
108
if (device->physical_device->rad_info.chip_class >= GFX9)
109
cs_3d = build_nir_itob_compute_shader(device, true);
110
111
/*
112
* two descriptors one for the image being sampled
113
* one for the buffer being written.
114
*/
115
VkDescriptorSetLayoutCreateInfo ds_create_info = {
116
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
117
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
118
.bindingCount = 2,
119
.pBindings = (VkDescriptorSetLayoutBinding[]){
120
{.binding = 0,
121
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
122
.descriptorCount = 1,
123
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
124
.pImmutableSamplers = NULL},
125
{.binding = 1,
126
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
127
.descriptorCount = 1,
128
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
129
.pImmutableSamplers = NULL},
130
}};
131
132
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
133
&device->meta_state.alloc,
134
&device->meta_state.itob.img_ds_layout);
135
if (result != VK_SUCCESS)
136
goto fail;
137
138
VkPipelineLayoutCreateInfo pl_create_info = {
139
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
140
.setLayoutCount = 1,
141
.pSetLayouts = &device->meta_state.itob.img_ds_layout,
142
.pushConstantRangeCount = 1,
143
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
144
};
145
146
result =
147
radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
148
&device->meta_state.alloc, &device->meta_state.itob.img_p_layout);
149
if (result != VK_SUCCESS)
150
goto fail;
151
152
/* compute shader */
153
154
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
155
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
156
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
157
.module = vk_shader_module_handle_from_nir(cs),
158
.pName = "main",
159
.pSpecializationInfo = NULL,
160
};
161
162
VkComputePipelineCreateInfo vk_pipeline_info = {
163
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
164
.stage = pipeline_shader_stage,
165
.flags = 0,
166
.layout = device->meta_state.itob.img_p_layout,
167
};
168
169
result = radv_CreateComputePipelines(radv_device_to_handle(device),
170
radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
171
&vk_pipeline_info, NULL, &device->meta_state.itob.pipeline);
172
if (result != VK_SUCCESS)
173
goto fail;
174
175
if (device->physical_device->rad_info.chip_class >= GFX9) {
176
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
177
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
178
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
179
.module = vk_shader_module_handle_from_nir(cs_3d),
180
.pName = "main",
181
.pSpecializationInfo = NULL,
182
};
183
184
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
185
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
186
.stage = pipeline_shader_stage_3d,
187
.flags = 0,
188
.layout = device->meta_state.itob.img_p_layout,
189
};
190
191
result = radv_CreateComputePipelines(
192
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
193
&vk_pipeline_info_3d, NULL, &device->meta_state.itob.pipeline_3d);
194
if (result != VK_SUCCESS)
195
goto fail;
196
ralloc_free(cs_3d);
197
}
198
ralloc_free(cs);
199
200
return VK_SUCCESS;
201
fail:
202
ralloc_free(cs);
203
ralloc_free(cs_3d);
204
return result;
205
}
206
207
static void
208
radv_device_finish_meta_itob_state(struct radv_device *device)
209
{
210
struct radv_meta_state *state = &device->meta_state;
211
212
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itob.img_p_layout,
213
&state->alloc);
214
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itob.img_ds_layout,
215
&state->alloc);
216
radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline, &state->alloc);
217
if (device->physical_device->rad_info.chip_class >= GFX9)
218
radv_DestroyPipeline(radv_device_to_handle(device), state->itob.pipeline_3d, &state->alloc);
219
}
220
221
static nir_shader *
222
build_nir_btoi_compute_shader(struct radv_device *dev, bool is_3d)
223
{
224
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D : GLSL_SAMPLER_DIM_2D;
225
const struct glsl_type *buf_type =
226
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
227
const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
228
nir_builder b = nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL,
229
is_3d ? "meta_btoi_cs_3d" : "meta_btoi_cs");
230
b.shader->info.workgroup_size[0] = 8;
231
b.shader->info.workgroup_size[1] = 8;
232
b.shader->info.workgroup_size[2] = 1;
233
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
234
input_img->data.descriptor_set = 0;
235
input_img->data.binding = 0;
236
237
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
238
output_img->data.descriptor_set = 0;
239
output_img->data.binding = 1;
240
241
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
242
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
243
nir_ssa_def *block_size =
244
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
245
b.shader->info.workgroup_size[2], 0);
246
247
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
248
249
nir_ssa_def *offset =
250
nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 16);
251
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
252
253
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
254
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
255
256
nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
257
tmp = nir_iadd(&b, tmp, pos_x);
258
259
nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
260
261
nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
262
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
263
264
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
265
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
266
tex->op = nir_texop_txf;
267
tex->src[0].src_type = nir_tex_src_coord;
268
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
269
tex->src[1].src_type = nir_tex_src_lod;
270
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
271
tex->src[2].src_type = nir_tex_src_texture_deref;
272
tex->src[2].src = nir_src_for_ssa(input_img_deref);
273
tex->dest_type = nir_type_float32;
274
tex->is_array = false;
275
tex->coord_components = 1;
276
277
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
278
nir_builder_instr_insert(&b, &tex->instr);
279
280
nir_ssa_def *outval = &tex->dest.ssa;
281
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, img_coord,
282
nir_ssa_undef(&b, 1, 32), outval, nir_imm_int(&b, 0));
283
284
return b.shader;
285
}
286
287
/* Buffer to image - don't write use image accessors */
288
static VkResult
289
radv_device_init_meta_btoi_state(struct radv_device *device)
290
{
291
VkResult result;
292
nir_shader *cs = build_nir_btoi_compute_shader(device, false);
293
nir_shader *cs_3d = NULL;
294
if (device->physical_device->rad_info.chip_class >= GFX9)
295
cs_3d = build_nir_btoi_compute_shader(device, true);
296
/*
297
* two descriptors one for the image being sampled
298
* one for the buffer being written.
299
*/
300
VkDescriptorSetLayoutCreateInfo ds_create_info = {
301
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
302
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
303
.bindingCount = 2,
304
.pBindings = (VkDescriptorSetLayoutBinding[]){
305
{.binding = 0,
306
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
307
.descriptorCount = 1,
308
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
309
.pImmutableSamplers = NULL},
310
{.binding = 1,
311
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
312
.descriptorCount = 1,
313
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
314
.pImmutableSamplers = NULL},
315
}};
316
317
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
318
&device->meta_state.alloc,
319
&device->meta_state.btoi.img_ds_layout);
320
if (result != VK_SUCCESS)
321
goto fail;
322
323
VkPipelineLayoutCreateInfo pl_create_info = {
324
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
325
.setLayoutCount = 1,
326
.pSetLayouts = &device->meta_state.btoi.img_ds_layout,
327
.pushConstantRangeCount = 1,
328
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
329
};
330
331
result =
332
radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
333
&device->meta_state.alloc, &device->meta_state.btoi.img_p_layout);
334
if (result != VK_SUCCESS)
335
goto fail;
336
337
/* compute shader */
338
339
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
340
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
341
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
342
.module = vk_shader_module_handle_from_nir(cs),
343
.pName = "main",
344
.pSpecializationInfo = NULL,
345
};
346
347
VkComputePipelineCreateInfo vk_pipeline_info = {
348
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
349
.stage = pipeline_shader_stage,
350
.flags = 0,
351
.layout = device->meta_state.btoi.img_p_layout,
352
};
353
354
result = radv_CreateComputePipelines(radv_device_to_handle(device),
355
radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
356
&vk_pipeline_info, NULL, &device->meta_state.btoi.pipeline);
357
if (result != VK_SUCCESS)
358
goto fail;
359
360
if (device->physical_device->rad_info.chip_class >= GFX9) {
361
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
362
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
363
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
364
.module = vk_shader_module_handle_from_nir(cs_3d),
365
.pName = "main",
366
.pSpecializationInfo = NULL,
367
};
368
369
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
370
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
371
.stage = pipeline_shader_stage_3d,
372
.flags = 0,
373
.layout = device->meta_state.btoi.img_p_layout,
374
};
375
376
result = radv_CreateComputePipelines(
377
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
378
&vk_pipeline_info_3d, NULL, &device->meta_state.btoi.pipeline_3d);
379
ralloc_free(cs_3d);
380
}
381
ralloc_free(cs);
382
383
return VK_SUCCESS;
384
fail:
385
ralloc_free(cs_3d);
386
ralloc_free(cs);
387
return result;
388
}
389
390
static void
391
radv_device_finish_meta_btoi_state(struct radv_device *device)
392
{
393
struct radv_meta_state *state = &device->meta_state;
394
395
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi.img_p_layout,
396
&state->alloc);
397
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->btoi.img_ds_layout,
398
&state->alloc);
399
radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline, &state->alloc);
400
radv_DestroyPipeline(radv_device_to_handle(device), state->btoi.pipeline_3d, &state->alloc);
401
}
402
403
/* Buffer to image - special path for R32G32B32 */
404
static nir_shader *
405
build_nir_btoi_r32g32b32_compute_shader(struct radv_device *dev)
406
{
407
const struct glsl_type *buf_type =
408
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
409
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
410
nir_builder b =
411
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_btoi_r32g32b32_cs");
412
b.shader->info.workgroup_size[0] = 8;
413
b.shader->info.workgroup_size[1] = 8;
414
b.shader->info.workgroup_size[2] = 1;
415
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
416
input_img->data.descriptor_set = 0;
417
input_img->data.binding = 0;
418
419
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
420
output_img->data.descriptor_set = 0;
421
output_img->data.binding = 1;
422
423
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
424
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
425
nir_ssa_def *block_size =
426
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
427
b.shader->info.workgroup_size[2], 0);
428
429
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
430
431
nir_ssa_def *offset = nir_load_push_constant(&b, 2, 32, nir_imm_int(&b, 0), .range = 16);
432
nir_ssa_def *pitch = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 8), .range = 16);
433
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
434
435
nir_ssa_def *pos_x = nir_channel(&b, global_id, 0);
436
nir_ssa_def *pos_y = nir_channel(&b, global_id, 1);
437
438
nir_ssa_def *tmp = nir_imul(&b, pos_y, stride);
439
tmp = nir_iadd(&b, tmp, pos_x);
440
441
nir_ssa_def *buf_coord = nir_vec4(&b, tmp, tmp, tmp, tmp);
442
443
nir_ssa_def *img_coord = nir_iadd(&b, global_id, offset);
444
445
nir_ssa_def *global_pos =
446
nir_iadd(&b, nir_imul(&b, nir_channel(&b, img_coord, 1), pitch),
447
nir_imul(&b, nir_channel(&b, img_coord, 0), nir_imm_int(&b, 3)));
448
449
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
450
451
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
452
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
453
tex->op = nir_texop_txf;
454
tex->src[0].src_type = nir_tex_src_coord;
455
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, buf_coord, 1));
456
tex->src[1].src_type = nir_tex_src_lod;
457
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
458
tex->src[2].src_type = nir_tex_src_texture_deref;
459
tex->src[2].src = nir_src_for_ssa(input_img_deref);
460
tex->dest_type = nir_type_float32;
461
tex->is_array = false;
462
tex->coord_components = 1;
463
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
464
nir_builder_instr_insert(&b, &tex->instr);
465
466
nir_ssa_def *outval = &tex->dest.ssa;
467
468
for (int chan = 0; chan < 3; chan++) {
469
nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
470
471
nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
472
473
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
474
nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, chan),
475
nir_imm_int(&b, 0));
476
}
477
478
return b.shader;
479
}
480
481
static VkResult
482
radv_device_init_meta_btoi_r32g32b32_state(struct radv_device *device)
483
{
484
VkResult result;
485
nir_shader *cs = build_nir_btoi_r32g32b32_compute_shader(device);
486
487
VkDescriptorSetLayoutCreateInfo ds_create_info = {
488
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
489
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
490
.bindingCount = 2,
491
.pBindings = (VkDescriptorSetLayoutBinding[]){
492
{.binding = 0,
493
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
494
.descriptorCount = 1,
495
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
496
.pImmutableSamplers = NULL},
497
{.binding = 1,
498
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
499
.descriptorCount = 1,
500
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
501
.pImmutableSamplers = NULL},
502
}};
503
504
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
505
&device->meta_state.alloc,
506
&device->meta_state.btoi_r32g32b32.img_ds_layout);
507
if (result != VK_SUCCESS)
508
goto fail;
509
510
VkPipelineLayoutCreateInfo pl_create_info = {
511
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
512
.setLayoutCount = 1,
513
.pSetLayouts = &device->meta_state.btoi_r32g32b32.img_ds_layout,
514
.pushConstantRangeCount = 1,
515
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
516
};
517
518
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
519
&device->meta_state.alloc,
520
&device->meta_state.btoi_r32g32b32.img_p_layout);
521
if (result != VK_SUCCESS)
522
goto fail;
523
524
/* compute shader */
525
526
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
527
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
528
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
529
.module = vk_shader_module_handle_from_nir(cs),
530
.pName = "main",
531
.pSpecializationInfo = NULL,
532
};
533
534
VkComputePipelineCreateInfo vk_pipeline_info = {
535
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
536
.stage = pipeline_shader_stage,
537
.flags = 0,
538
.layout = device->meta_state.btoi_r32g32b32.img_p_layout,
539
};
540
541
result = radv_CreateComputePipelines(
542
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
543
&vk_pipeline_info, NULL, &device->meta_state.btoi_r32g32b32.pipeline);
544
545
fail:
546
ralloc_free(cs);
547
return result;
548
}
549
550
static void
551
radv_device_finish_meta_btoi_r32g32b32_state(struct radv_device *device)
552
{
553
struct radv_meta_state *state = &device->meta_state;
554
555
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->btoi_r32g32b32.img_p_layout,
556
&state->alloc);
557
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
558
state->btoi_r32g32b32.img_ds_layout, &state->alloc);
559
radv_DestroyPipeline(radv_device_to_handle(device), state->btoi_r32g32b32.pipeline,
560
&state->alloc);
561
}
562
563
static nir_shader *
564
build_nir_itoi_compute_shader(struct radv_device *dev, bool is_3d, int samples)
565
{
566
bool is_multisampled = samples > 1;
567
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
568
: is_multisampled ? GLSL_SAMPLER_DIM_MS
569
: GLSL_SAMPLER_DIM_2D;
570
const struct glsl_type *buf_type = glsl_sampler_type(dim, false, false, GLSL_TYPE_FLOAT);
571
const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
572
nir_builder b = nir_builder_init_simple_shader(
573
MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_itoi_cs_3d-%d" : "meta_itoi_cs-%d", samples);
574
b.shader->info.workgroup_size[0] = 8;
575
b.shader->info.workgroup_size[1] = 8;
576
b.shader->info.workgroup_size[2] = 1;
577
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, buf_type, "s_tex");
578
input_img->data.descriptor_set = 0;
579
input_img->data.binding = 0;
580
581
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
582
output_img->data.descriptor_set = 0;
583
output_img->data.binding = 1;
584
585
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
586
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
587
nir_ssa_def *block_size =
588
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
589
b.shader->info.workgroup_size[2], 0);
590
591
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
592
593
nir_ssa_def *src_offset =
594
nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 0), .range = 24);
595
nir_ssa_def *dst_offset =
596
nir_load_push_constant(&b, is_3d ? 3 : 2, 32, nir_imm_int(&b, 12), .range = 24);
597
598
nir_ssa_def *src_coord = nir_iadd(&b, global_id, src_offset);
599
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
600
601
nir_ssa_def *dst_coord = nir_iadd(&b, global_id, dst_offset);
602
603
nir_tex_instr *tex_instr[8];
604
for (uint32_t i = 0; i < samples; i++) {
605
tex_instr[i] = nir_tex_instr_create(b.shader, is_multisampled ? 4 : 3);
606
607
nir_tex_instr *tex = tex_instr[i];
608
tex->sampler_dim = dim;
609
tex->op = is_multisampled ? nir_texop_txf_ms : nir_texop_txf;
610
tex->src[0].src_type = nir_tex_src_coord;
611
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, is_3d ? 0x7 : 0x3));
612
tex->src[1].src_type = nir_tex_src_lod;
613
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
614
tex->src[2].src_type = nir_tex_src_texture_deref;
615
tex->src[2].src = nir_src_for_ssa(input_img_deref);
616
if (is_multisampled) {
617
tex->src[3].src_type = nir_tex_src_ms_index;
618
tex->src[3].src = nir_src_for_ssa(nir_imm_int(&b, i));
619
}
620
tex->dest_type = nir_type_float32;
621
tex->is_array = false;
622
tex->coord_components = is_3d ? 3 : 2;
623
624
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
625
nir_builder_instr_insert(&b, &tex->instr);
626
}
627
628
for (uint32_t i = 0; i < samples; i++) {
629
nir_ssa_def *outval = &tex_instr[i]->dest.ssa;
630
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
631
nir_imm_int(&b, i), outval, nir_imm_int(&b, 0));
632
}
633
634
return b.shader;
635
}
636
637
static VkResult
638
create_itoi_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
639
{
640
struct radv_meta_state *state = &device->meta_state;
641
nir_shader *cs = build_nir_itoi_compute_shader(device, false, samples);
642
VkResult result;
643
644
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
645
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
646
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
647
.module = vk_shader_module_handle_from_nir(cs),
648
.pName = "main",
649
.pSpecializationInfo = NULL,
650
};
651
652
VkComputePipelineCreateInfo vk_pipeline_info = {
653
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
654
.stage = pipeline_shader_stage,
655
.flags = 0,
656
.layout = state->itoi.img_p_layout,
657
};
658
659
result = radv_CreateComputePipelines(radv_device_to_handle(device),
660
radv_pipeline_cache_to_handle(&state->cache), 1,
661
&vk_pipeline_info, NULL, pipeline);
662
ralloc_free(cs);
663
return result;
664
}
665
666
/* image to image - don't write use image accessors */
667
static VkResult
668
radv_device_init_meta_itoi_state(struct radv_device *device)
669
{
670
VkResult result;
671
672
/*
673
* two descriptors one for the image being sampled
674
* one for the buffer being written.
675
*/
676
VkDescriptorSetLayoutCreateInfo ds_create_info = {
677
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
678
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
679
.bindingCount = 2,
680
.pBindings = (VkDescriptorSetLayoutBinding[]){
681
{.binding = 0,
682
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
683
.descriptorCount = 1,
684
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
685
.pImmutableSamplers = NULL},
686
{.binding = 1,
687
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
688
.descriptorCount = 1,
689
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
690
.pImmutableSamplers = NULL},
691
}};
692
693
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
694
&device->meta_state.alloc,
695
&device->meta_state.itoi.img_ds_layout);
696
if (result != VK_SUCCESS)
697
goto fail;
698
699
VkPipelineLayoutCreateInfo pl_create_info = {
700
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
701
.setLayoutCount = 1,
702
.pSetLayouts = &device->meta_state.itoi.img_ds_layout,
703
.pushConstantRangeCount = 1,
704
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
705
};
706
707
result =
708
radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
709
&device->meta_state.alloc, &device->meta_state.itoi.img_p_layout);
710
if (result != VK_SUCCESS)
711
goto fail;
712
713
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
714
uint32_t samples = 1 << i;
715
result = create_itoi_pipeline(device, samples, &device->meta_state.itoi.pipeline[i]);
716
if (result != VK_SUCCESS)
717
goto fail;
718
}
719
720
if (device->physical_device->rad_info.chip_class >= GFX9) {
721
nir_shader *cs_3d = build_nir_itoi_compute_shader(device, true, 1);
722
723
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
724
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
725
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
726
.module = vk_shader_module_handle_from_nir(cs_3d),
727
.pName = "main",
728
.pSpecializationInfo = NULL,
729
};
730
731
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
732
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
733
.stage = pipeline_shader_stage_3d,
734
.flags = 0,
735
.layout = device->meta_state.itoi.img_p_layout,
736
};
737
738
result = radv_CreateComputePipelines(
739
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
740
&vk_pipeline_info_3d, NULL, &device->meta_state.itoi.pipeline_3d);
741
ralloc_free(cs_3d);
742
}
743
744
return VK_SUCCESS;
745
fail:
746
return result;
747
}
748
749
static void
750
radv_device_finish_meta_itoi_state(struct radv_device *device)
751
{
752
struct radv_meta_state *state = &device->meta_state;
753
754
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi.img_p_layout,
755
&state->alloc);
756
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->itoi.img_ds_layout,
757
&state->alloc);
758
759
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
760
radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline[i], &state->alloc);
761
}
762
763
if (device->physical_device->rad_info.chip_class >= GFX9)
764
radv_DestroyPipeline(radv_device_to_handle(device), state->itoi.pipeline_3d, &state->alloc);
765
}
766
767
static nir_shader *
768
build_nir_itoi_r32g32b32_compute_shader(struct radv_device *dev)
769
{
770
const struct glsl_type *type =
771
glsl_sampler_type(GLSL_SAMPLER_DIM_BUF, false, false, GLSL_TYPE_FLOAT);
772
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
773
nir_builder b =
774
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_itoi_r32g32b32_cs");
775
b.shader->info.workgroup_size[0] = 8;
776
b.shader->info.workgroup_size[1] = 8;
777
b.shader->info.workgroup_size[2] = 1;
778
nir_variable *input_img = nir_variable_create(b.shader, nir_var_uniform, type, "input_img");
779
input_img->data.descriptor_set = 0;
780
input_img->data.binding = 0;
781
782
nir_variable *output_img =
783
nir_variable_create(b.shader, nir_var_uniform, img_type, "output_img");
784
output_img->data.descriptor_set = 0;
785
output_img->data.binding = 1;
786
787
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
788
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
789
nir_ssa_def *block_size =
790
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
791
b.shader->info.workgroup_size[2], 0);
792
793
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
794
795
nir_ssa_def *src_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 24);
796
nir_ssa_def *dst_offset = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 12), .range = 24);
797
798
nir_ssa_def *src_stride = nir_channel(&b, src_offset, 2);
799
nir_ssa_def *dst_stride = nir_channel(&b, dst_offset, 2);
800
801
nir_ssa_def *src_img_coord = nir_iadd(&b, global_id, src_offset);
802
nir_ssa_def *dst_img_coord = nir_iadd(&b, global_id, dst_offset);
803
804
nir_ssa_def *src_global_pos =
805
nir_iadd(&b, nir_imul(&b, nir_channel(&b, src_img_coord, 1), src_stride),
806
nir_imul(&b, nir_channel(&b, src_img_coord, 0), nir_imm_int(&b, 3)));
807
808
nir_ssa_def *dst_global_pos =
809
nir_iadd(&b, nir_imul(&b, nir_channel(&b, dst_img_coord, 1), dst_stride),
810
nir_imul(&b, nir_channel(&b, dst_img_coord, 0), nir_imm_int(&b, 3)));
811
812
for (int chan = 0; chan < 3; chan++) {
813
/* src */
814
nir_ssa_def *src_local_pos = nir_iadd(&b, src_global_pos, nir_imm_int(&b, chan));
815
816
nir_ssa_def *src_coord =
817
nir_vec4(&b, src_local_pos, src_local_pos, src_local_pos, src_local_pos);
818
819
nir_ssa_def *input_img_deref = &nir_build_deref_var(&b, input_img)->dest.ssa;
820
821
nir_tex_instr *tex = nir_tex_instr_create(b.shader, 3);
822
tex->sampler_dim = GLSL_SAMPLER_DIM_BUF;
823
tex->op = nir_texop_txf;
824
tex->src[0].src_type = nir_tex_src_coord;
825
tex->src[0].src = nir_src_for_ssa(nir_channels(&b, src_coord, 1));
826
tex->src[1].src_type = nir_tex_src_lod;
827
tex->src[1].src = nir_src_for_ssa(nir_imm_int(&b, 0));
828
tex->src[2].src_type = nir_tex_src_texture_deref;
829
tex->src[2].src = nir_src_for_ssa(input_img_deref);
830
tex->dest_type = nir_type_float32;
831
tex->is_array = false;
832
tex->coord_components = 1;
833
nir_ssa_dest_init(&tex->instr, &tex->dest, 4, 32, "tex");
834
nir_builder_instr_insert(&b, &tex->instr);
835
836
nir_ssa_def *outval = &tex->dest.ssa;
837
838
/* dst */
839
nir_ssa_def *dst_local_pos = nir_iadd(&b, dst_global_pos, nir_imm_int(&b, chan));
840
841
nir_ssa_def *dst_coord =
842
nir_vec4(&b, dst_local_pos, dst_local_pos, dst_local_pos, dst_local_pos);
843
844
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, dst_coord,
845
nir_ssa_undef(&b, 1, 32), nir_channel(&b, outval, 0),
846
nir_imm_int(&b, 0));
847
}
848
849
return b.shader;
850
}
851
852
/* Image to image - special path for R32G32B32 */
853
static VkResult
854
radv_device_init_meta_itoi_r32g32b32_state(struct radv_device *device)
855
{
856
VkResult result;
857
nir_shader *cs = build_nir_itoi_r32g32b32_compute_shader(device);
858
859
VkDescriptorSetLayoutCreateInfo ds_create_info = {
860
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
861
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
862
.bindingCount = 2,
863
.pBindings = (VkDescriptorSetLayoutBinding[]){
864
{.binding = 0,
865
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
866
.descriptorCount = 1,
867
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
868
.pImmutableSamplers = NULL},
869
{.binding = 1,
870
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
871
.descriptorCount = 1,
872
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
873
.pImmutableSamplers = NULL},
874
}};
875
876
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
877
&device->meta_state.alloc,
878
&device->meta_state.itoi_r32g32b32.img_ds_layout);
879
if (result != VK_SUCCESS)
880
goto fail;
881
882
VkPipelineLayoutCreateInfo pl_create_info = {
883
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
884
.setLayoutCount = 1,
885
.pSetLayouts = &device->meta_state.itoi_r32g32b32.img_ds_layout,
886
.pushConstantRangeCount = 1,
887
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 24},
888
};
889
890
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
891
&device->meta_state.alloc,
892
&device->meta_state.itoi_r32g32b32.img_p_layout);
893
if (result != VK_SUCCESS)
894
goto fail;
895
896
/* compute shader */
897
898
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
899
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
900
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
901
.module = vk_shader_module_handle_from_nir(cs),
902
.pName = "main",
903
.pSpecializationInfo = NULL,
904
};
905
906
VkComputePipelineCreateInfo vk_pipeline_info = {
907
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
908
.stage = pipeline_shader_stage,
909
.flags = 0,
910
.layout = device->meta_state.itoi_r32g32b32.img_p_layout,
911
};
912
913
result = radv_CreateComputePipelines(
914
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
915
&vk_pipeline_info, NULL, &device->meta_state.itoi_r32g32b32.pipeline);
916
917
fail:
918
ralloc_free(cs);
919
return result;
920
}
921
922
static void
923
radv_device_finish_meta_itoi_r32g32b32_state(struct radv_device *device)
924
{
925
struct radv_meta_state *state = &device->meta_state;
926
927
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->itoi_r32g32b32.img_p_layout,
928
&state->alloc);
929
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
930
state->itoi_r32g32b32.img_ds_layout, &state->alloc);
931
radv_DestroyPipeline(radv_device_to_handle(device), state->itoi_r32g32b32.pipeline,
932
&state->alloc);
933
}
934
935
static nir_shader *
936
build_nir_cleari_compute_shader(struct radv_device *dev, bool is_3d, int samples)
937
{
938
bool is_multisampled = samples > 1;
939
enum glsl_sampler_dim dim = is_3d ? GLSL_SAMPLER_DIM_3D
940
: is_multisampled ? GLSL_SAMPLER_DIM_MS
941
: GLSL_SAMPLER_DIM_2D;
942
const struct glsl_type *img_type = glsl_image_type(dim, false, GLSL_TYPE_FLOAT);
943
nir_builder b = nir_builder_init_simple_shader(
944
MESA_SHADER_COMPUTE, NULL, is_3d ? "meta_cleari_cs_3d-%d" : "meta_cleari_cs-%d", samples);
945
b.shader->info.workgroup_size[0] = 8;
946
b.shader->info.workgroup_size[1] = 8;
947
b.shader->info.workgroup_size[2] = 1;
948
949
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
950
output_img->data.descriptor_set = 0;
951
output_img->data.binding = 0;
952
953
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
954
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
955
nir_ssa_def *block_size =
956
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
957
b.shader->info.workgroup_size[2], 0);
958
959
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
960
961
nir_ssa_def *clear_val = nir_load_push_constant(&b, 4, 32, nir_imm_int(&b, 0), .range = 20);
962
nir_ssa_def *layer = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 16), .range = 20);
963
964
nir_ssa_def *global_z = nir_iadd(&b, nir_channel(&b, global_id, 2), layer);
965
966
nir_ssa_def *comps[4];
967
comps[0] = nir_channel(&b, global_id, 0);
968
comps[1] = nir_channel(&b, global_id, 1);
969
comps[2] = global_z;
970
comps[3] = nir_imm_int(&b, 0);
971
global_id = nir_vec(&b, comps, 4);
972
973
for (uint32_t i = 0; i < samples; i++) {
974
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, global_id,
975
nir_imm_int(&b, i), clear_val, nir_imm_int(&b, 0));
976
}
977
978
return b.shader;
979
}
980
981
static VkResult
982
create_cleari_pipeline(struct radv_device *device, int samples, VkPipeline *pipeline)
983
{
984
nir_shader *cs = build_nir_cleari_compute_shader(device, false, samples);
985
VkResult result;
986
987
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
988
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
989
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
990
.module = vk_shader_module_handle_from_nir(cs),
991
.pName = "main",
992
.pSpecializationInfo = NULL,
993
};
994
995
VkComputePipelineCreateInfo vk_pipeline_info = {
996
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
997
.stage = pipeline_shader_stage,
998
.flags = 0,
999
.layout = device->meta_state.cleari.img_p_layout,
1000
};
1001
1002
result = radv_CreateComputePipelines(radv_device_to_handle(device),
1003
radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1004
&vk_pipeline_info, NULL, pipeline);
1005
ralloc_free(cs);
1006
return result;
1007
}
1008
1009
static VkResult
1010
radv_device_init_meta_cleari_state(struct radv_device *device)
1011
{
1012
VkResult result;
1013
1014
/*
1015
* two descriptors one for the image being sampled
1016
* one for the buffer being written.
1017
*/
1018
VkDescriptorSetLayoutCreateInfo ds_create_info = {
1019
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1020
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1021
.bindingCount = 1,
1022
.pBindings = (VkDescriptorSetLayoutBinding[]){
1023
{.binding = 0,
1024
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1025
.descriptorCount = 1,
1026
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1027
.pImmutableSamplers = NULL},
1028
}};
1029
1030
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
1031
&device->meta_state.alloc,
1032
&device->meta_state.cleari.img_ds_layout);
1033
if (result != VK_SUCCESS)
1034
goto fail;
1035
1036
VkPipelineLayoutCreateInfo pl_create_info = {
1037
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1038
.setLayoutCount = 1,
1039
.pSetLayouts = &device->meta_state.cleari.img_ds_layout,
1040
.pushConstantRangeCount = 1,
1041
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 20},
1042
};
1043
1044
result =
1045
radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
1046
&device->meta_state.alloc, &device->meta_state.cleari.img_p_layout);
1047
if (result != VK_SUCCESS)
1048
goto fail;
1049
1050
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; i++) {
1051
uint32_t samples = 1 << i;
1052
result = create_cleari_pipeline(device, samples, &device->meta_state.cleari.pipeline[i]);
1053
if (result != VK_SUCCESS)
1054
goto fail;
1055
}
1056
1057
if (device->physical_device->rad_info.chip_class >= GFX9) {
1058
nir_shader *cs_3d = build_nir_cleari_compute_shader(device, true, 1);
1059
1060
/* compute shader */
1061
VkPipelineShaderStageCreateInfo pipeline_shader_stage_3d = {
1062
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1063
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1064
.module = vk_shader_module_handle_from_nir(cs_3d),
1065
.pName = "main",
1066
.pSpecializationInfo = NULL,
1067
};
1068
1069
VkComputePipelineCreateInfo vk_pipeline_info_3d = {
1070
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1071
.stage = pipeline_shader_stage_3d,
1072
.flags = 0,
1073
.layout = device->meta_state.cleari.img_p_layout,
1074
};
1075
1076
result = radv_CreateComputePipelines(
1077
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1078
&vk_pipeline_info_3d, NULL, &device->meta_state.cleari.pipeline_3d);
1079
ralloc_free(cs_3d);
1080
}
1081
1082
return VK_SUCCESS;
1083
fail:
1084
return result;
1085
}
1086
1087
static void
1088
radv_device_finish_meta_cleari_state(struct radv_device *device)
1089
{
1090
struct radv_meta_state *state = &device->meta_state;
1091
1092
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari.img_p_layout,
1093
&state->alloc);
1094
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device), state->cleari.img_ds_layout,
1095
&state->alloc);
1096
1097
for (uint32_t i = 0; i < MAX_SAMPLES_LOG2; ++i) {
1098
radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline[i], &state->alloc);
1099
}
1100
1101
radv_DestroyPipeline(radv_device_to_handle(device), state->cleari.pipeline_3d, &state->alloc);
1102
}
1103
1104
/* Special path for clearing R32G32B32 images using a compute shader. */
1105
static nir_shader *
1106
build_nir_cleari_r32g32b32_compute_shader(struct radv_device *dev)
1107
{
1108
const struct glsl_type *img_type = glsl_image_type(GLSL_SAMPLER_DIM_BUF, false, GLSL_TYPE_FLOAT);
1109
nir_builder b =
1110
nir_builder_init_simple_shader(MESA_SHADER_COMPUTE, NULL, "meta_cleari_r32g32b32_cs");
1111
b.shader->info.workgroup_size[0] = 8;
1112
b.shader->info.workgroup_size[1] = 8;
1113
b.shader->info.workgroup_size[2] = 1;
1114
1115
nir_variable *output_img = nir_variable_create(b.shader, nir_var_uniform, img_type, "out_img");
1116
output_img->data.descriptor_set = 0;
1117
output_img->data.binding = 0;
1118
1119
nir_ssa_def *invoc_id = nir_load_local_invocation_id(&b);
1120
nir_ssa_def *wg_id = nir_load_workgroup_id(&b, 32);
1121
nir_ssa_def *block_size =
1122
nir_imm_ivec4(&b, b.shader->info.workgroup_size[0], b.shader->info.workgroup_size[1],
1123
b.shader->info.workgroup_size[2], 0);
1124
1125
nir_ssa_def *global_id = nir_iadd(&b, nir_imul(&b, wg_id, block_size), invoc_id);
1126
1127
nir_ssa_def *clear_val = nir_load_push_constant(&b, 3, 32, nir_imm_int(&b, 0), .range = 16);
1128
nir_ssa_def *stride = nir_load_push_constant(&b, 1, 32, nir_imm_int(&b, 12), .range = 16);
1129
1130
nir_ssa_def *global_x = nir_channel(&b, global_id, 0);
1131
nir_ssa_def *global_y = nir_channel(&b, global_id, 1);
1132
1133
nir_ssa_def *global_pos =
1134
nir_iadd(&b, nir_imul(&b, global_y, stride), nir_imul(&b, global_x, nir_imm_int(&b, 3)));
1135
1136
for (unsigned chan = 0; chan < 3; chan++) {
1137
nir_ssa_def *local_pos = nir_iadd(&b, global_pos, nir_imm_int(&b, chan));
1138
1139
nir_ssa_def *coord = nir_vec4(&b, local_pos, local_pos, local_pos, local_pos);
1140
1141
nir_image_deref_store(&b, &nir_build_deref_var(&b, output_img)->dest.ssa, coord,
1142
nir_ssa_undef(&b, 1, 32), nir_channel(&b, clear_val, chan),
1143
nir_imm_int(&b, 0));
1144
}
1145
1146
return b.shader;
1147
}
1148
1149
static VkResult
1150
radv_device_init_meta_cleari_r32g32b32_state(struct radv_device *device)
1151
{
1152
VkResult result;
1153
nir_shader *cs = build_nir_cleari_r32g32b32_compute_shader(device);
1154
1155
VkDescriptorSetLayoutCreateInfo ds_create_info = {
1156
.sType = VK_STRUCTURE_TYPE_DESCRIPTOR_SET_LAYOUT_CREATE_INFO,
1157
.flags = VK_DESCRIPTOR_SET_LAYOUT_CREATE_PUSH_DESCRIPTOR_BIT_KHR,
1158
.bindingCount = 1,
1159
.pBindings = (VkDescriptorSetLayoutBinding[]){
1160
{.binding = 0,
1161
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1162
.descriptorCount = 1,
1163
.stageFlags = VK_SHADER_STAGE_COMPUTE_BIT,
1164
.pImmutableSamplers = NULL},
1165
}};
1166
1167
result = radv_CreateDescriptorSetLayout(radv_device_to_handle(device), &ds_create_info,
1168
&device->meta_state.alloc,
1169
&device->meta_state.cleari_r32g32b32.img_ds_layout);
1170
if (result != VK_SUCCESS)
1171
goto fail;
1172
1173
VkPipelineLayoutCreateInfo pl_create_info = {
1174
.sType = VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO,
1175
.setLayoutCount = 1,
1176
.pSetLayouts = &device->meta_state.cleari_r32g32b32.img_ds_layout,
1177
.pushConstantRangeCount = 1,
1178
.pPushConstantRanges = &(VkPushConstantRange){VK_SHADER_STAGE_COMPUTE_BIT, 0, 16},
1179
};
1180
1181
result = radv_CreatePipelineLayout(radv_device_to_handle(device), &pl_create_info,
1182
&device->meta_state.alloc,
1183
&device->meta_state.cleari_r32g32b32.img_p_layout);
1184
if (result != VK_SUCCESS)
1185
goto fail;
1186
1187
/* compute shader */
1188
VkPipelineShaderStageCreateInfo pipeline_shader_stage = {
1189
.sType = VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,
1190
.stage = VK_SHADER_STAGE_COMPUTE_BIT,
1191
.module = vk_shader_module_handle_from_nir(cs),
1192
.pName = "main",
1193
.pSpecializationInfo = NULL,
1194
};
1195
1196
VkComputePipelineCreateInfo vk_pipeline_info = {
1197
.sType = VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,
1198
.stage = pipeline_shader_stage,
1199
.flags = 0,
1200
.layout = device->meta_state.cleari_r32g32b32.img_p_layout,
1201
};
1202
1203
result = radv_CreateComputePipelines(
1204
radv_device_to_handle(device), radv_pipeline_cache_to_handle(&device->meta_state.cache), 1,
1205
&vk_pipeline_info, NULL, &device->meta_state.cleari_r32g32b32.pipeline);
1206
1207
fail:
1208
ralloc_free(cs);
1209
return result;
1210
}
1211
1212
static void
1213
radv_device_finish_meta_cleari_r32g32b32_state(struct radv_device *device)
1214
{
1215
struct radv_meta_state *state = &device->meta_state;
1216
1217
radv_DestroyPipelineLayout(radv_device_to_handle(device), state->cleari_r32g32b32.img_p_layout,
1218
&state->alloc);
1219
radv_DestroyDescriptorSetLayout(radv_device_to_handle(device),
1220
state->cleari_r32g32b32.img_ds_layout, &state->alloc);
1221
radv_DestroyPipeline(radv_device_to_handle(device), state->cleari_r32g32b32.pipeline,
1222
&state->alloc);
1223
}
1224
1225
void
1226
radv_device_finish_meta_bufimage_state(struct radv_device *device)
1227
{
1228
radv_device_finish_meta_itob_state(device);
1229
radv_device_finish_meta_btoi_state(device);
1230
radv_device_finish_meta_btoi_r32g32b32_state(device);
1231
radv_device_finish_meta_itoi_state(device);
1232
radv_device_finish_meta_itoi_r32g32b32_state(device);
1233
radv_device_finish_meta_cleari_state(device);
1234
radv_device_finish_meta_cleari_r32g32b32_state(device);
1235
}
1236
1237
VkResult
1238
radv_device_init_meta_bufimage_state(struct radv_device *device)
1239
{
1240
VkResult result;
1241
1242
result = radv_device_init_meta_itob_state(device);
1243
if (result != VK_SUCCESS)
1244
goto fail_itob;
1245
1246
result = radv_device_init_meta_btoi_state(device);
1247
if (result != VK_SUCCESS)
1248
goto fail_btoi;
1249
1250
result = radv_device_init_meta_btoi_r32g32b32_state(device);
1251
if (result != VK_SUCCESS)
1252
goto fail_btoi_r32g32b32;
1253
1254
result = radv_device_init_meta_itoi_state(device);
1255
if (result != VK_SUCCESS)
1256
goto fail_itoi;
1257
1258
result = radv_device_init_meta_itoi_r32g32b32_state(device);
1259
if (result != VK_SUCCESS)
1260
goto fail_itoi_r32g32b32;
1261
1262
result = radv_device_init_meta_cleari_state(device);
1263
if (result != VK_SUCCESS)
1264
goto fail_cleari;
1265
1266
result = radv_device_init_meta_cleari_r32g32b32_state(device);
1267
if (result != VK_SUCCESS)
1268
goto fail_cleari_r32g32b32;
1269
1270
return VK_SUCCESS;
1271
fail_cleari_r32g32b32:
1272
radv_device_finish_meta_cleari_r32g32b32_state(device);
1273
fail_cleari:
1274
radv_device_finish_meta_cleari_state(device);
1275
fail_itoi_r32g32b32:
1276
radv_device_finish_meta_itoi_r32g32b32_state(device);
1277
fail_itoi:
1278
radv_device_finish_meta_itoi_state(device);
1279
fail_btoi_r32g32b32:
1280
radv_device_finish_meta_btoi_r32g32b32_state(device);
1281
fail_btoi:
1282
radv_device_finish_meta_btoi_state(device);
1283
fail_itob:
1284
radv_device_finish_meta_itob_state(device);
1285
return result;
1286
}
1287
1288
static void
1289
create_iview(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
1290
struct radv_image_view *iview)
1291
{
1292
VkImageViewType view_type = cmd_buffer->device->physical_device->rad_info.chip_class < GFX9
1293
? VK_IMAGE_VIEW_TYPE_2D
1294
: radv_meta_get_view_type(surf->image);
1295
radv_image_view_init(iview, cmd_buffer->device,
1296
&(VkImageViewCreateInfo){
1297
.sType = VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO,
1298
.image = radv_image_to_handle(surf->image),
1299
.viewType = view_type,
1300
.format = surf->format,
1301
.subresourceRange = {.aspectMask = surf->aspect_mask,
1302
.baseMipLevel = surf->level,
1303
.levelCount = 1,
1304
.baseArrayLayer = surf->layer,
1305
.layerCount = 1},
1306
},
1307
&(struct radv_image_view_extra_create_info){
1308
.disable_compression = surf->disable_compression,
1309
});
1310
}
1311
1312
static void
1313
create_bview(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer, unsigned offset,
1314
VkFormat format, struct radv_buffer_view *bview)
1315
{
1316
radv_buffer_view_init(bview, cmd_buffer->device,
1317
&(VkBufferViewCreateInfo){
1318
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1319
.flags = 0,
1320
.buffer = radv_buffer_to_handle(buffer),
1321
.format = format,
1322
.offset = offset,
1323
.range = VK_WHOLE_SIZE,
1324
});
1325
}
1326
1327
static void
1328
create_buffer_from_image(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *surf,
1329
VkBufferUsageFlagBits usage, VkBuffer *buffer)
1330
{
1331
struct radv_device *device = cmd_buffer->device;
1332
struct radv_device_memory mem = {.bo = surf->image->bo};
1333
1334
radv_CreateBuffer(radv_device_to_handle(device),
1335
&(VkBufferCreateInfo){
1336
.sType = VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
1337
.flags = 0,
1338
.size = surf->image->size,
1339
.usage = usage,
1340
.sharingMode = VK_SHARING_MODE_EXCLUSIVE,
1341
},
1342
NULL, buffer);
1343
1344
radv_BindBufferMemory2(radv_device_to_handle(device), 1,
1345
(VkBindBufferMemoryInfo[]){{
1346
.sType = VK_STRUCTURE_TYPE_BIND_BUFFER_MEMORY_INFO,
1347
.buffer = *buffer,
1348
.memory = radv_device_memory_to_handle(&mem),
1349
.memoryOffset = surf->image->offset,
1350
}});
1351
}
1352
1353
static void
1354
create_bview_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer *buffer,
1355
unsigned offset, VkFormat src_format, struct radv_buffer_view *bview)
1356
{
1357
VkFormat format;
1358
1359
switch (src_format) {
1360
case VK_FORMAT_R32G32B32_UINT:
1361
format = VK_FORMAT_R32_UINT;
1362
break;
1363
case VK_FORMAT_R32G32B32_SINT:
1364
format = VK_FORMAT_R32_SINT;
1365
break;
1366
case VK_FORMAT_R32G32B32_SFLOAT:
1367
format = VK_FORMAT_R32_SFLOAT;
1368
break;
1369
default:
1370
unreachable("invalid R32G32B32 format");
1371
}
1372
1373
radv_buffer_view_init(bview, cmd_buffer->device,
1374
&(VkBufferViewCreateInfo){
1375
.sType = VK_STRUCTURE_TYPE_BUFFER_VIEW_CREATE_INFO,
1376
.flags = 0,
1377
.buffer = radv_buffer_to_handle(buffer),
1378
.format = format,
1379
.offset = offset,
1380
.range = VK_WHOLE_SIZE,
1381
});
1382
}
1383
1384
static unsigned
1385
get_image_stride_for_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1386
struct radv_meta_blit2d_surf *surf)
1387
{
1388
unsigned stride;
1389
1390
if (cmd_buffer->device->physical_device->rad_info.chip_class >= GFX9) {
1391
stride = surf->image->planes[0].surface.u.gfx9.surf_pitch;
1392
} else {
1393
stride = surf->image->planes[0].surface.u.legacy.level[0].nblk_x * 3;
1394
}
1395
1396
return stride;
1397
}
1398
1399
static void
1400
itob_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
1401
struct radv_buffer_view *dst)
1402
{
1403
struct radv_device *device = cmd_buffer->device;
1404
1405
radv_meta_push_descriptor_set(
1406
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itob.img_p_layout, 0, /* set */
1407
2, /* descriptorWriteCount */
1408
(VkWriteDescriptorSet[]){
1409
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1410
.dstBinding = 0,
1411
.dstArrayElement = 0,
1412
.descriptorCount = 1,
1413
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1414
.pImageInfo =
1415
(VkDescriptorImageInfo[]){
1416
{
1417
.sampler = VK_NULL_HANDLE,
1418
.imageView = radv_image_view_to_handle(src),
1419
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1420
},
1421
}},
1422
{
1423
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1424
.dstBinding = 1,
1425
.dstArrayElement = 0,
1426
.descriptorCount = 1,
1427
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1428
.pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1429
}});
1430
}
1431
1432
void
1433
radv_meta_image_to_buffer(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1434
struct radv_meta_blit2d_buffer *dst, unsigned num_rects,
1435
struct radv_meta_blit2d_rect *rects)
1436
{
1437
VkPipeline pipeline = cmd_buffer->device->meta_state.itob.pipeline;
1438
struct radv_device *device = cmd_buffer->device;
1439
struct radv_image_view src_view;
1440
struct radv_buffer_view dst_view;
1441
1442
create_iview(cmd_buffer, src, &src_view);
1443
create_bview(cmd_buffer, dst->buffer, dst->offset, dst->format, &dst_view);
1444
itob_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1445
1446
if (device->physical_device->rad_info.chip_class >= GFX9 && src->image->type == VK_IMAGE_TYPE_3D)
1447
pipeline = cmd_buffer->device->meta_state.itob.pipeline_3d;
1448
1449
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1450
pipeline);
1451
1452
for (unsigned r = 0; r < num_rects; ++r) {
1453
unsigned push_constants[4] = {rects[r].src_x, rects[r].src_y, src->layer, dst->pitch};
1454
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1455
device->meta_state.itob.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1456
16, push_constants);
1457
1458
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1459
}
1460
}
1461
1462
static void
1463
btoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1464
struct radv_buffer_view *dst)
1465
{
1466
struct radv_device *device = cmd_buffer->device;
1467
1468
radv_meta_push_descriptor_set(
1469
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi_r32g32b32.img_p_layout,
1470
0, /* set */
1471
2, /* descriptorWriteCount */
1472
(VkWriteDescriptorSet[]){
1473
{
1474
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1475
.dstBinding = 0,
1476
.dstArrayElement = 0,
1477
.descriptorCount = 1,
1478
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1479
.pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1480
},
1481
{
1482
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1483
.dstBinding = 1,
1484
.dstArrayElement = 0,
1485
.descriptorCount = 1,
1486
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1487
.pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1488
}});
1489
}
1490
1491
static void
1492
radv_meta_buffer_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1493
struct radv_meta_blit2d_buffer *src,
1494
struct radv_meta_blit2d_surf *dst, unsigned num_rects,
1495
struct radv_meta_blit2d_rect *rects)
1496
{
1497
VkPipeline pipeline = cmd_buffer->device->meta_state.btoi_r32g32b32.pipeline;
1498
struct radv_device *device = cmd_buffer->device;
1499
struct radv_buffer_view src_view, dst_view;
1500
unsigned dst_offset = 0;
1501
unsigned stride;
1502
VkBuffer buffer;
1503
1504
/* This special btoi path for R32G32B32 formats will write the linear
1505
* image as a buffer with the same underlying memory. The compute
1506
* shader will copy all components separately using a R32 format.
1507
*/
1508
create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
1509
1510
create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1511
create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), dst_offset, dst->format,
1512
&dst_view);
1513
btoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1514
1515
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1516
pipeline);
1517
1518
stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1519
1520
for (unsigned r = 0; r < num_rects; ++r) {
1521
unsigned push_constants[4] = {
1522
rects[r].dst_x,
1523
rects[r].dst_y,
1524
stride,
1525
src->pitch,
1526
};
1527
1528
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1529
device->meta_state.btoi_r32g32b32.img_p_layout,
1530
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1531
1532
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1533
}
1534
1535
radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1536
}
1537
1538
static void
1539
btoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1540
struct radv_image_view *dst)
1541
{
1542
struct radv_device *device = cmd_buffer->device;
1543
1544
radv_meta_push_descriptor_set(
1545
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.btoi.img_p_layout, 0, /* set */
1546
2, /* descriptorWriteCount */
1547
(VkWriteDescriptorSet[]){
1548
{
1549
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1550
.dstBinding = 0,
1551
.dstArrayElement = 0,
1552
.descriptorCount = 1,
1553
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1554
.pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1555
},
1556
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1557
.dstBinding = 1,
1558
.dstArrayElement = 0,
1559
.descriptorCount = 1,
1560
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1561
.pImageInfo = (VkDescriptorImageInfo[]){
1562
{
1563
.sampler = VK_NULL_HANDLE,
1564
.imageView = radv_image_view_to_handle(dst),
1565
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1566
},
1567
}}});
1568
}
1569
1570
void
1571
radv_meta_buffer_to_image_cs(struct radv_cmd_buffer *cmd_buffer,
1572
struct radv_meta_blit2d_buffer *src, struct radv_meta_blit2d_surf *dst,
1573
unsigned num_rects, struct radv_meta_blit2d_rect *rects)
1574
{
1575
VkPipeline pipeline = cmd_buffer->device->meta_state.btoi.pipeline;
1576
struct radv_device *device = cmd_buffer->device;
1577
struct radv_buffer_view src_view;
1578
struct radv_image_view dst_view;
1579
1580
if (dst->image->vk_format == VK_FORMAT_R32G32B32_UINT ||
1581
dst->image->vk_format == VK_FORMAT_R32G32B32_SINT ||
1582
dst->image->vk_format == VK_FORMAT_R32G32B32_SFLOAT) {
1583
radv_meta_buffer_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
1584
return;
1585
}
1586
1587
create_bview(cmd_buffer, src->buffer, src->offset, src->format, &src_view);
1588
create_iview(cmd_buffer, dst, &dst_view);
1589
btoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1590
1591
if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
1592
pipeline = cmd_buffer->device->meta_state.btoi.pipeline_3d;
1593
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1594
pipeline);
1595
1596
for (unsigned r = 0; r < num_rects; ++r) {
1597
unsigned push_constants[4] = {
1598
rects[r].dst_x,
1599
rects[r].dst_y,
1600
dst->layer,
1601
src->pitch,
1602
};
1603
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1604
device->meta_state.btoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1605
16, push_constants);
1606
1607
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1608
}
1609
}
1610
1611
static void
1612
itoi_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *src,
1613
struct radv_buffer_view *dst)
1614
{
1615
struct radv_device *device = cmd_buffer->device;
1616
1617
radv_meta_push_descriptor_set(
1618
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi_r32g32b32.img_p_layout,
1619
0, /* set */
1620
2, /* descriptorWriteCount */
1621
(VkWriteDescriptorSet[]){
1622
{
1623
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1624
.dstBinding = 0,
1625
.dstArrayElement = 0,
1626
.descriptorCount = 1,
1627
.descriptorType = VK_DESCRIPTOR_TYPE_UNIFORM_TEXEL_BUFFER,
1628
.pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(src)},
1629
},
1630
{
1631
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1632
.dstBinding = 1,
1633
.dstArrayElement = 0,
1634
.descriptorCount = 1,
1635
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1636
.pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(dst)},
1637
}});
1638
}
1639
1640
static void
1641
radv_meta_image_to_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1642
struct radv_meta_blit2d_surf *src,
1643
struct radv_meta_blit2d_surf *dst, unsigned num_rects,
1644
struct radv_meta_blit2d_rect *rects)
1645
{
1646
VkPipeline pipeline = cmd_buffer->device->meta_state.itoi_r32g32b32.pipeline;
1647
struct radv_device *device = cmd_buffer->device;
1648
struct radv_buffer_view src_view, dst_view;
1649
unsigned src_offset = 0, dst_offset = 0;
1650
unsigned src_stride, dst_stride;
1651
VkBuffer src_buffer, dst_buffer;
1652
1653
/* 96-bit formats are only compatible to themselves. */
1654
assert(dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1655
dst->format == VK_FORMAT_R32G32B32_SFLOAT);
1656
1657
/* This special itoi path for R32G32B32 formats will write the linear
1658
* image as a buffer with the same underlying memory. The compute
1659
* shader will copy all components separately using a R32 format.
1660
*/
1661
create_buffer_from_image(cmd_buffer, src, VK_BUFFER_USAGE_UNIFORM_TEXEL_BUFFER_BIT, &src_buffer);
1662
create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &dst_buffer);
1663
1664
create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(src_buffer), src_offset,
1665
src->format, &src_view);
1666
create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(dst_buffer), dst_offset,
1667
dst->format, &dst_view);
1668
itoi_r32g32b32_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1669
1670
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1671
pipeline);
1672
1673
src_stride = get_image_stride_for_r32g32b32(cmd_buffer, src);
1674
dst_stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1675
1676
for (unsigned r = 0; r < num_rects; ++r) {
1677
unsigned push_constants[6] = {
1678
rects[r].src_x, rects[r].src_y, src_stride, rects[r].dst_x, rects[r].dst_y, dst_stride,
1679
};
1680
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1681
device->meta_state.itoi_r32g32b32.img_p_layout,
1682
VK_SHADER_STAGE_COMPUTE_BIT, 0, 24, push_constants);
1683
1684
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1685
}
1686
1687
radv_DestroyBuffer(radv_device_to_handle(device), src_buffer, NULL);
1688
radv_DestroyBuffer(radv_device_to_handle(device), dst_buffer, NULL);
1689
}
1690
1691
static void
1692
itoi_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *src,
1693
struct radv_image_view *dst)
1694
{
1695
struct radv_device *device = cmd_buffer->device;
1696
1697
radv_meta_push_descriptor_set(
1698
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.itoi.img_p_layout, 0, /* set */
1699
2, /* descriptorWriteCount */
1700
(VkWriteDescriptorSet[]){{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1701
.dstBinding = 0,
1702
.dstArrayElement = 0,
1703
.descriptorCount = 1,
1704
.descriptorType = VK_DESCRIPTOR_TYPE_SAMPLED_IMAGE,
1705
.pImageInfo =
1706
(VkDescriptorImageInfo[]){
1707
{
1708
.sampler = VK_NULL_HANDLE,
1709
.imageView = radv_image_view_to_handle(src),
1710
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1711
},
1712
}},
1713
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1714
.dstBinding = 1,
1715
.dstArrayElement = 0,
1716
.descriptorCount = 1,
1717
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1718
.pImageInfo = (VkDescriptorImageInfo[]){
1719
{
1720
.sampler = VK_NULL_HANDLE,
1721
.imageView = radv_image_view_to_handle(dst),
1722
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1723
},
1724
}}});
1725
}
1726
1727
void
1728
radv_meta_image_to_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *src,
1729
struct radv_meta_blit2d_surf *dst, unsigned num_rects,
1730
struct radv_meta_blit2d_rect *rects)
1731
{
1732
struct radv_device *device = cmd_buffer->device;
1733
struct radv_image_view src_view, dst_view;
1734
uint32_t samples = src->image->info.samples;
1735
uint32_t samples_log2 = ffs(samples) - 1;
1736
1737
if (src->format == VK_FORMAT_R32G32B32_UINT || src->format == VK_FORMAT_R32G32B32_SINT ||
1738
src->format == VK_FORMAT_R32G32B32_SFLOAT) {
1739
radv_meta_image_to_image_cs_r32g32b32(cmd_buffer, src, dst, num_rects, rects);
1740
return;
1741
}
1742
1743
create_iview(cmd_buffer, src, &src_view);
1744
create_iview(cmd_buffer, dst, &dst_view);
1745
1746
itoi_bind_descriptors(cmd_buffer, &src_view, &dst_view);
1747
1748
VkPipeline pipeline = cmd_buffer->device->meta_state.itoi.pipeline[samples_log2];
1749
if (device->physical_device->rad_info.chip_class >= GFX9 &&
1750
(src->image->type == VK_IMAGE_TYPE_3D || dst->image->type == VK_IMAGE_TYPE_3D))
1751
pipeline = cmd_buffer->device->meta_state.itoi.pipeline_3d;
1752
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1753
pipeline);
1754
1755
for (unsigned r = 0; r < num_rects; ++r) {
1756
unsigned push_constants[6] = {
1757
rects[r].src_x, rects[r].src_y, src->layer, rects[r].dst_x, rects[r].dst_y, dst->layer,
1758
};
1759
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1760
device->meta_state.itoi.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0,
1761
24, push_constants);
1762
1763
radv_unaligned_dispatch(cmd_buffer, rects[r].width, rects[r].height, 1);
1764
}
1765
}
1766
1767
static void
1768
cleari_r32g32b32_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_buffer_view *view)
1769
{
1770
struct radv_device *device = cmd_buffer->device;
1771
1772
radv_meta_push_descriptor_set(
1773
cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE, device->meta_state.cleari_r32g32b32.img_p_layout,
1774
0, /* set */
1775
1, /* descriptorWriteCount */
1776
(VkWriteDescriptorSet[]){{
1777
.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1778
.dstBinding = 0,
1779
.dstArrayElement = 0,
1780
.descriptorCount = 1,
1781
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_TEXEL_BUFFER,
1782
.pTexelBufferView = (VkBufferView[]){radv_buffer_view_to_handle(view)},
1783
}});
1784
}
1785
1786
static void
1787
radv_meta_clear_image_cs_r32g32b32(struct radv_cmd_buffer *cmd_buffer,
1788
struct radv_meta_blit2d_surf *dst,
1789
const VkClearColorValue *clear_color)
1790
{
1791
VkPipeline pipeline = cmd_buffer->device->meta_state.cleari_r32g32b32.pipeline;
1792
struct radv_device *device = cmd_buffer->device;
1793
struct radv_buffer_view dst_view;
1794
unsigned stride;
1795
VkBuffer buffer;
1796
1797
/* This special clear path for R32G32B32 formats will write the linear
1798
* image as a buffer with the same underlying memory. The compute
1799
* shader will clear all components separately using a R32 format.
1800
*/
1801
create_buffer_from_image(cmd_buffer, dst, VK_BUFFER_USAGE_STORAGE_TEXEL_BUFFER_BIT, &buffer);
1802
1803
create_bview_for_r32g32b32(cmd_buffer, radv_buffer_from_handle(buffer), 0, dst->format,
1804
&dst_view);
1805
cleari_r32g32b32_bind_descriptors(cmd_buffer, &dst_view);
1806
1807
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1808
pipeline);
1809
1810
stride = get_image_stride_for_r32g32b32(cmd_buffer, dst);
1811
1812
unsigned push_constants[4] = {
1813
clear_color->uint32[0],
1814
clear_color->uint32[1],
1815
clear_color->uint32[2],
1816
stride,
1817
};
1818
1819
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1820
device->meta_state.cleari_r32g32b32.img_p_layout,
1821
VK_SHADER_STAGE_COMPUTE_BIT, 0, 16, push_constants);
1822
1823
radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
1824
1825
radv_DestroyBuffer(radv_device_to_handle(device), buffer, NULL);
1826
}
1827
1828
static void
1829
cleari_bind_descriptors(struct radv_cmd_buffer *cmd_buffer, struct radv_image_view *dst_iview)
1830
{
1831
struct radv_device *device = cmd_buffer->device;
1832
1833
radv_meta_push_descriptor_set(cmd_buffer, VK_PIPELINE_BIND_POINT_COMPUTE,
1834
device->meta_state.cleari.img_p_layout, 0, /* set */
1835
1, /* descriptorWriteCount */
1836
(VkWriteDescriptorSet[]){
1837
{.sType = VK_STRUCTURE_TYPE_WRITE_DESCRIPTOR_SET,
1838
.dstBinding = 0,
1839
.dstArrayElement = 0,
1840
.descriptorCount = 1,
1841
.descriptorType = VK_DESCRIPTOR_TYPE_STORAGE_IMAGE,
1842
.pImageInfo =
1843
(VkDescriptorImageInfo[]){
1844
{
1845
.sampler = VK_NULL_HANDLE,
1846
.imageView = radv_image_view_to_handle(dst_iview),
1847
.imageLayout = VK_IMAGE_LAYOUT_GENERAL,
1848
},
1849
}},
1850
});
1851
}
1852
1853
void
1854
radv_meta_clear_image_cs(struct radv_cmd_buffer *cmd_buffer, struct radv_meta_blit2d_surf *dst,
1855
const VkClearColorValue *clear_color)
1856
{
1857
struct radv_device *device = cmd_buffer->device;
1858
struct radv_image_view dst_iview;
1859
uint32_t samples = dst->image->info.samples;
1860
uint32_t samples_log2 = ffs(samples) - 1;
1861
1862
if (dst->format == VK_FORMAT_R32G32B32_UINT || dst->format == VK_FORMAT_R32G32B32_SINT ||
1863
dst->format == VK_FORMAT_R32G32B32_SFLOAT) {
1864
radv_meta_clear_image_cs_r32g32b32(cmd_buffer, dst, clear_color);
1865
return;
1866
}
1867
1868
create_iview(cmd_buffer, dst, &dst_iview);
1869
cleari_bind_descriptors(cmd_buffer, &dst_iview);
1870
1871
VkPipeline pipeline = cmd_buffer->device->meta_state.cleari.pipeline[samples_log2];
1872
if (device->physical_device->rad_info.chip_class >= GFX9 && dst->image->type == VK_IMAGE_TYPE_3D)
1873
pipeline = cmd_buffer->device->meta_state.cleari.pipeline_3d;
1874
1875
radv_CmdBindPipeline(radv_cmd_buffer_to_handle(cmd_buffer), VK_PIPELINE_BIND_POINT_COMPUTE,
1876
pipeline);
1877
1878
unsigned push_constants[5] = {
1879
clear_color->uint32[0],
1880
clear_color->uint32[1],
1881
clear_color->uint32[2],
1882
clear_color->uint32[3],
1883
dst->layer,
1884
};
1885
1886
radv_CmdPushConstants(radv_cmd_buffer_to_handle(cmd_buffer),
1887
device->meta_state.cleari.img_p_layout, VK_SHADER_STAGE_COMPUTE_BIT, 0, 20,
1888
push_constants);
1889
1890
radv_unaligned_dispatch(cmd_buffer, dst->image->info.width, dst->image->info.height, 1);
1891
}
1892
1893